blob: 82c3e4ccca91226a99f5698dccade1af75eb85fa [file] [log] [blame]
//===-- AMDILModuleInfo.cpp - TODO: Add brief description -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
#include "AMDILModuleInfo.h"
#include "AMDILDevices.h"
#include "AMDILKernel.h"
#include "AMDILSubtarget.h"
#include "AMDILAlgorithms.tpp"
#include "AMDILModuleInfo.h"
#include "AMDILDevices.h"
#include "AMDILKernel.h"
#include "AMDILSubtarget.h"
#include "AMDILUtilityFunctions.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Support/FormattedStream.h"
#include <cstdio>
#define CB_BASE_OFFSET 2
using namespace llvm;
AMDILModuleInfo::AMDILModuleInfo(const MachineModuleInfo &MMI)
{
mMMI = &MMI;
mOffset = 0;
mReservedBuffs = 0;
symTab = NULL;
mCurrentCPOffset = 0;
mPrintfOffset = 0;
}
AMDILModuleInfo::~AMDILModuleInfo() {
for (StringMap<AMDILKernel*>::iterator kb = mKernels.begin(), ke = mKernels.end();
kb != ke; ++kb) {
StringMapEntry<AMDILKernel*> cur = *kb;
AMDILKernel *ptr = cur.getValue();
delete ptr;
}
}
static const AMDILConstPtr *getConstPtr(const AMDILKernel *krnl, const std::string &arg) {
llvm::SmallVector<AMDILConstPtr, DEFAULT_VEC_SLOTS>::const_iterator begin, end;
for (begin = krnl->constPtr.begin(), end = krnl->constPtr.end();
begin != end; ++begin) {
if (!strcmp(begin->name.data(),arg.c_str())) {
return &(*begin);
}
}
return NULL;
}
#if 0
static bool structContainsSub32bitType(const StructType *ST) {
StructType::element_iterator eib, eie;
for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
Type *ptr = *eib;
uint32_t size = (uint32_t)GET_SCALAR_SIZE(ptr);
if (!size) {
if (const StructType *ST = dyn_cast<StructType>(ptr)) {
if (structContainsSub32bitType(ST)) {
return true;
}
}
} else if (size < 32) {
return true;
}
}
return false;
}
#endif
void AMDILModuleInfo::processModule(const Module *M,
const AMDILTargetMachine *mTM)
{
Module::const_global_iterator GI;
Module::const_global_iterator GE;
mSTM = mTM->getSubtargetImpl();
for (GI = M->global_begin(), GE = M->global_end(); GI != GE; ++GI) {
const GlobalValue *GV = GI;
llvm::StringRef GVName = GV->getName();
const char *name = GVName.data();
if (!strncmp(name, "sgv", 3)) {
mKernelArgs[GVName] = parseSGV(GV);
} else if (!strncmp(name, "fgv", 3)) {
// we can ignore this since we don't care about the filename
// string
} else if (!strncmp(name, "lvgv", 4)) {
mLocalArgs[GVName] = parseLVGV(GV);
} else if (!strncmp(name, "llvm.image.annotations", 22)) {
parseImageAnnotate(GV);
} else if (!strncmp(name, "llvm.global.annotations", 23)) {
parseGlobalAnnotate(GV);
} else if (!strncmp(name, "llvm.constpointer.annotations", 29)) {
parseConstantPtrAnnotate(GV);
} else if (!strncmp(name, "llvm.readonlypointer.annotations", 32)) {
// These are skipped as we handle them later in AMDILPointerManager.cpp
} else if (GV->getType()->getAddressSpace() == 3) { // *** Match cl_kernel.h local AS #
parseAutoArray(GV, false);
} else if (strstr(name, "clregion")) {
parseAutoArray(GV, true);
} else if (!GV->use_empty()
&& mIgnoreStr.find(GVName) == mIgnoreStr.end()) {
parseConstantPtr(GV);
}
}
allocateGlobalCB();
safeForEach(M->begin(), M->end(),
std::bind1st(
std::mem_fun(&AMDILModuleInfo::checkConstPtrsUseHW),
this));
}
void AMDILModuleInfo::allocateGlobalCB(void) {
uint32_t maxCBSize = mSTM->device()->getMaxCBSize();
uint32_t offset = 0;
uint32_t curCB = 0;
uint32_t swoffset = 0;
for (StringMap<AMDILConstPtr>::iterator cpb = mConstMems.begin(),
cpe = mConstMems.end(); cpb != cpe; ++cpb) {
bool constHW = mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
cpb->second.usesHardware = false;
if (constHW) {
// If we have a limit on the max CB Size, then we need to make sure that
// the constant sizes fall within the limits.
if (cpb->second.size <= maxCBSize) {
if (offset + cpb->second.size > maxCBSize) {
offset = 0;
curCB++;
}
if (curCB < mSTM->device()->getMaxNumCBs()) {
cpb->second.cbNum = curCB + CB_BASE_OFFSET;
cpb->second.offset = offset;
offset += (cpb->second.size + 15) & (~15);
cpb->second.usesHardware = true;
continue;
}
}
}
cpb->second.cbNum = 0;
cpb->second.offset = swoffset;
swoffset += (cpb->second.size + 15) & (~15);
}
if (!mConstMems.empty()) {
mReservedBuffs = curCB + 1;
}
}
bool AMDILModuleInfo::checkConstPtrsUseHW(llvm::Module::const_iterator *FCI)
{
Function::const_arg_iterator AI, AE;
const Function *func = *FCI;
std::string name = func->getName();
if (!strstr(name.c_str(), "__OpenCL")
|| !strstr(name.c_str(), "_AMDILKernel")) {
return false;
}
AMDILKernel *krnl = mKernels[name];
if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
for (AI = func->arg_begin(), AE = func->arg_end();
AI != AE; ++AI) {
const Argument *Arg = &(*AI);
const PointerType *P = dyn_cast<PointerType>(Arg->getType());
if (!P) {
continue;
}
if (P->getAddressSpace() != AMDILAS::CONSTANT_ADDRESS) {
continue;
}
const AMDILConstPtr *ptr = getConstPtr(krnl, Arg->getName());
if (ptr) {
continue;
}
AMDILConstPtr constAttr;
constAttr.name = Arg->getName();
constAttr.size = this->mSTM->device()->getMaxCBSize();
constAttr.base = Arg;
constAttr.isArgument = true;
constAttr.isArray = false;
constAttr.offset = 0;
constAttr.usesHardware =
mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
if (constAttr.usesHardware) {
constAttr.cbNum = krnl->constPtr.size() + 2;
} else {
constAttr.cbNum = 0;
}
krnl->constPtr.push_back(constAttr);
}
}
// Now lets make sure that only the N largest buffers
// get allocated in hardware if we have too many buffers
uint32_t numPtrs = krnl->constPtr.size();
if (numPtrs > (this->mSTM->device()->getMaxNumCBs() - mReservedBuffs)) {
// TODO: Change this routine so it sorts
// AMDILConstPtr instead of pulling the sizes out
// and then grab the N largest and disable the rest
llvm::SmallVector<uint32_t, 16> sizes;
for (uint32_t x = 0; x < numPtrs; ++x) {
sizes.push_back(krnl->constPtr[x].size);
}
std::sort(sizes.begin(), sizes.end());
uint32_t numToDisable = numPtrs - (mSTM->device()->getMaxNumCBs() -
mReservedBuffs);
uint32_t safeSize = sizes[numToDisable-1];
for (uint32_t x = 0; x < numPtrs && numToDisable; ++x) {
if (krnl->constPtr[x].size <= safeSize) {
krnl->constPtr[x].usesHardware = false;
--numToDisable;
}
}
}
// Renumber all of the valid CB's so that
// they are linear increase
uint32_t CBid = 2 + mReservedBuffs;
for (uint32_t x = 0; x < numPtrs; ++x) {
if (krnl->constPtr[x].usesHardware) {
krnl->constPtr[x].cbNum = CBid++;
}
}
for (StringMap<AMDILConstPtr>::iterator cpb = mConstMems.begin(),
cpe = mConstMems.end(); cpb != cpe; ++cpb) {
if (cpb->second.usesHardware) {
krnl->constPtr.push_back(cpb->second);
}
}
for (uint32_t x = 0; x < krnl->constPtr.size(); ++x) {
AMDILConstPtr &c = krnl->constPtr[x];
uint32_t cbNum = c.cbNum - CB_BASE_OFFSET;
if (cbNum < HW_MAX_NUM_CB && c.cbNum >= CB_BASE_OFFSET) {
if ((c.size + c.offset) > krnl->constSizes[cbNum]) {
krnl->constSizes[cbNum] =
((c.size + c.offset) + 15) & ~15;
}
} else {
krnl->constPtr[x].usesHardware = false;
}
}
return false;
}
int32_t AMDILModuleInfo::getArrayOffset(const llvm::StringRef &a) const {
StringMap<AMDILArrayMem>::const_iterator iter = mArrayMems.find(a);
if (iter != mArrayMems.end()) {
return iter->second.offset;
} else {
return -1;
}
}
int32_t AMDILModuleInfo::getConstOffset(const llvm::StringRef &a) const {
StringMap<AMDILConstPtr>::const_iterator iter = mConstMems.find(a);
if (iter != mConstMems.end()) {
return iter->second.offset;
} else {
return -1;
}
}
bool AMDILModuleInfo::getConstHWBit(const llvm::StringRef &name) const {
StringMap<AMDILConstPtr>::const_iterator iter = mConstMems.find(name);
if (iter != mConstMems.end()) {
return iter->second.usesHardware;
} else {
return false;
}
}
// As of right now we only care about the required group size
// so we can skip the variable encoding
AMDILKernelAttr AMDILModuleInfo::parseSGV(const GlobalValue *G) {
AMDILKernelAttr nArg;
const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
memset(&nArg, 0, sizeof(nArg));
for (int x = 0; x < 3; ++x) {
nArg.reqGroupSize[x] = mSTM->getDefaultSize(x);
nArg.reqRegionSize[x] = mSTM->getDefaultSize(x);
}
if (!GV || !GV->hasInitializer()) {
return nArg;
}
const Constant *CV = GV->getInitializer();
const ConstantDataArray *CA = dyn_cast_or_null<ConstantDataArray>(CV);
if (!CA || !CA->isString()) {
return nArg;
}
std::string init = CA->getAsString();
size_t pos = init.find("RWG");
if (pos != llvm::StringRef::npos) {
pos += 3;
std::string LWS = init.substr(pos, init.length() - pos);
const char *lws = LWS.c_str();
sscanf(lws, "%u,%u,%u", &(nArg.reqGroupSize[0]),
&(nArg.reqGroupSize[1]),
&(nArg.reqGroupSize[2]));
nArg.mHasRWG = true;
}
pos = init.find("RWR");
if (pos != llvm::StringRef::npos) {
pos += 3;
std::string LWS = init.substr(pos, init.length() - pos);
const char *lws = LWS.c_str();
sscanf(lws, "%u,%u,%u", &(nArg.reqRegionSize[0]),
&(nArg.reqRegionSize[1]),
&(nArg.reqRegionSize[2]));
nArg.mHasRWR = true;
}
return nArg;
}
AMDILLocalArg AMDILModuleInfo::parseLVGV(const GlobalValue *G) {
AMDILLocalArg nArg;
const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
nArg.name = "";
if (!GV || !GV->hasInitializer()) {
return nArg;
}
const ConstantArray *CA =
dyn_cast_or_null<ConstantArray>(GV->getInitializer());
if (!CA) {
return nArg;
}
for (size_t x = 0, y = CA->getNumOperands(); x < y; ++x) {
const Value *local = CA->getOperand(x);
const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(local);
if (!CE || !CE->getNumOperands()) {
continue;
}
nArg.name = (*(CE->op_begin()))->getName();
if (mArrayMems.find(nArg.name) != mArrayMems.end()) {
nArg.local.push_back(&(mArrayMems[nArg.name]));
}
}
return nArg;
}
void AMDILModuleInfo::parseConstantPtrAnnotate(const GlobalValue *G) {
const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(G);
const ConstantArray *CA =
dyn_cast_or_null<ConstantArray>(GV->getInitializer());
if (!CA) {
return;
}
uint32_t numOps = CA->getNumOperands();
for (uint32_t x = 0; x < numOps; ++x) {
const Value *V = CA->getOperand(x);
const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
if (!CS) {
continue;
}
assert(CS->getNumOperands() == 2 && "There can only be 2"
" fields, a name and size");
const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CS->getOperand(0));
const ConstantInt *sizeField = dyn_cast<ConstantInt>(CS->getOperand(1));
assert(nameField && "There must be a constant name field");
assert(sizeField && "There must be a constant size field");
const GlobalVariable *nameGV =
dyn_cast<GlobalVariable>(nameField->getOperand(0));
const ConstantDataArray *nameArray =
dyn_cast<ConstantDataArray>(nameGV->getInitializer());
// Lets add this string to the set of strings we should ignore processing
mIgnoreStr.insert(nameGV->getName());
if (mConstMems.find(nameGV->getName())
!= mConstMems.end()) {
// If we already processesd this string as a constant, lets remove it from
// the list of known constants. This way we don't process unneeded data
// and don't generate code/metadata for strings that are never used.
mConstMems.erase(mConstMems.find(nameGV->getName()));
} else {
mIgnoreStr.insert(CS->getOperand(0)->getName());
}
AMDILConstPtr constAttr;
constAttr.name = nameArray->getAsString();
constAttr.size = (sizeField->getZExtValue() + 15) & ~15;
constAttr.base = CS;
constAttr.isArgument = true;
constAttr.isArray = false;
constAttr.cbNum = 0;
constAttr.offset = 0;
constAttr.usesHardware = (constAttr.size <= mSTM->device()->getMaxCBSize());
// Now that we have all our constant information,
// lets update the AMDILKernel
llvm::StringRef AMDILKernelName = G->getName().data() + 30;
AMDILKernel *k;
if (mKernels.find(AMDILKernelName) != mKernels.end()) {
k = mKernels[AMDILKernelName];
} else {
k = new AMDILKernel;
k->curSize = 0;
k->curRSize = 0;
k->curHWSize = 0;
k->curHWRSize = 0;
k->constSize = 0;
k->lvgv = NULL;
k->sgv = NULL;
memset(k->constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
}
constAttr.cbNum = k->constPtr.size() + 2;
k->constPtr.push_back(constAttr);
mKernels[AMDILKernelName] = k;
}
}
void AMDILModuleInfo::parseImageAnnotate(const GlobalValue *G) {
const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
if (!CA) {
return;
}
if (isa<GlobalValue>(CA)) {
return;
}
uint32_t e = CA->getNumOperands();
if (!e) {
return;
}
AMDILKernel *k;
llvm::StringRef name = G->getName().data() + 23;
if (mKernels.find(name) != mKernels.end()) {
k = mKernels[name];
} else {
k = new AMDILKernel;
k->curSize = 0;
k->curRSize = 0;
k->curHWSize = 0;
k->curHWRSize = 0;
k->constSize = 0;
k->lvgv = NULL;
k->sgv = NULL;
memset(k->constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
}
for (uint32_t i = 0; i != e; ++i) {
const Value *V = CA->getOperand(i);
const Constant *C = dyn_cast<Constant>(V);
const ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
if (CS && CS->getNumOperands() == 2) {
if (mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()) !=
mConstMems.end()) {
// If we already processesd this string as a constant, lets remove it
// from the list of known constants. This way we don't process unneeded
// data and don't generate code/metadata for strings that are never
// used.
mConstMems.erase(
mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()));
} else {
mIgnoreStr.insert(CS->getOperand(0)->getOperand(0)->getName());
}
const ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(1));
uint32_t val = (uint32_t)CI->getZExtValue();
if (val == 1) {
k->readOnly.insert(i);
} else if (val == 2) {
k->writeOnly.insert(i);
} else {
assert(!"Unknown image type value!");
}
}
}
mKernels[name] = k;
}
void AMDILModuleInfo::parseAutoArray(const GlobalValue *GV, bool isRegion) {
const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
Type *Ty = (G) ? G->getType() : NULL;
AMDILArrayMem tmp;
tmp.isHW = true;
tmp.offset = 0;
tmp.vecSize = getTypeSize(Ty, true);
tmp.isRegion = isRegion;
mArrayMems[GV->getName()] = tmp;
}
void AMDILModuleInfo::parseConstantPtr(const GlobalValue *GV) {
const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
Type *Ty = (G) ? G->getType() : NULL;
AMDILConstPtr constAttr;
constAttr.name = G->getName();
constAttr.size = getTypeSize(Ty, true);
constAttr.base = GV;
constAttr.isArgument = false;
constAttr.isArray = true;
constAttr.offset = 0;
constAttr.cbNum = 0;
constAttr.usesHardware = false;
mConstMems[GV->getName()] = constAttr;
}
void AMDILModuleInfo::parseGlobalAnnotate(const GlobalValue *G) {
const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
if (!GV->hasInitializer()) {
return;
}
const Constant *CT = GV->getInitializer();
if (!CT || isa<GlobalValue>(CT)) {
return;
}
const ConstantArray *CA = dyn_cast<ConstantArray>(CT);
if (!CA) {
return;
}
unsigned int nKernels = CA->getNumOperands();
for (unsigned int i = 0, e = nKernels; i != e; ++i) {
parseKernelInformation(CA->getOperand(i));
}
}
void AMDILModuleInfo::parseKernelInformation(const Value *V) {
if (isa<GlobalValue>(V)) {
return;
}
const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
if (!CS) {
return;
}
uint32_t N = CS->getNumOperands();
if (N != 5) {
return;
}
AMDILKernel *tmp;
// The first operand is always a pointer to the AMDILKernel.
const Constant *CV = dyn_cast<Constant>(CS->getOperand(0));
llvm::StringRef AMDILKernelName = "";
if (CV->getNumOperands()) {
AMDILKernelName = (*(CV->op_begin()))->getName();
}
// If we have images, then we have already created the AMDILKernel and we just need
// to get the AMDILKernel information.
if (mKernels.find(AMDILKernelName) != mKernels.end()) {
tmp = mKernels[AMDILKernelName];
} else {
tmp = new AMDILKernel;
tmp->curSize = 0;
tmp->curRSize = 0;
tmp->curHWSize = 0;
tmp->curHWRSize = 0;
tmp->constSize = 0;
tmp->lvgv = NULL;
tmp->sgv = NULL;
memset(tmp->constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
}
// The second operand is SGV, there can only be one so we don't need to worry
// about parsing out multiple data points.
CV = dyn_cast<Constant>(CS->getOperand(1));
llvm::StringRef sgvName;
if (CV->getNumOperands()) {
sgvName = (*(CV->op_begin()))->getName();
}
if (mKernelArgs.find(sgvName) != mKernelArgs.end()) {
tmp->sgv = &mKernelArgs[sgvName];
}
// The third operand is FGV, which is skipped
// The fourth operand is LVGV
// There can be multiple local arrays, so we
// need to handle each one seperatly
CV = dyn_cast<Constant>(CS->getOperand(3));
llvm::StringRef lvgvName = "";
if (CV->getNumOperands()) {
lvgvName = (*(CV->op_begin()))->getName();
}
if (mLocalArgs.find(lvgvName) != mLocalArgs.end()) {
AMDILLocalArg *ptr = &mLocalArgs[lvgvName];
tmp->lvgv = ptr;
llvm::SmallVector<AMDILArrayMem *, DEFAULT_VEC_SLOTS>::iterator ib, ie;
for (ib = ptr->local.begin(), ie = ptr->local.end(); ib != ie; ++ib) {
if ((*ib)->isRegion) {
if ((*ib)->isHW) {
(*ib)->offset = tmp->curHWRSize;
tmp->curHWRSize += ((*ib)->vecSize + 15) & ~15;
} else {
(*ib)->offset = tmp->curRSize;
tmp->curRSize += ((*ib)->vecSize + 15) & ~15;
}
} else {
if ((*ib)->isHW) {
(*ib)->offset = tmp->curHWSize;
tmp->curHWSize += ((*ib)->vecSize + 15) & ~15;
} else {
(*ib)->offset = tmp->curSize;
tmp->curSize += ((*ib)->vecSize + 15) & ~15;
}
}
}
}
// The fifth operand is NULL
mKernels[AMDILKernelName] = tmp;
}
AMDILKernel *
AMDILModuleInfo::getKernel(const llvm::StringRef &name) {
StringMap<AMDILKernel*>::iterator iter = mKernels.find(name);
if (iter == mKernels.end()) {
return NULL;
} else {
return iter->second;
}
}
bool AMDILModuleInfo::isKernel(const llvm::StringRef &name) const {
return (mKernels.find(name) != mKernels.end());
}
bool AMDILModuleInfo::isWriteOnlyImage(const llvm::StringRef &name,
uint32_t iID) const {
const StringMap<AMDILKernel*>::const_iterator kiter = mKernels.find(name);
if (kiter == mKernels.end()) {
return false;
}
return kiter->second->writeOnly.count(iID);
}
#if 0
uint32_t
AMDILModuleInfo::getNumWriteImages(const llvm::StringRef &name) const {
char *env = NULL;
env = getenv("GPU_DISABLE_RAW_UAV");
if (env && env[0] == '1') {
return 8;
}
const StringMap<AMDILKernel*>::const_iterator kiter = mKernels.find(name);
if (kiter == mKernels.end()) {
return 0;
} else {
return kiter->second->writeOnly.size();
}
}
#endif
bool AMDILModuleInfo::isReadOnlyImage(const llvm::StringRef &name,
uint32_t iID) const {
const StringMap<AMDILKernel*>::const_iterator kiter = mKernels.find(name);
if (kiter == mKernels.end()) {
return false;
}
return kiter->second->readOnly.count(iID);
}
#if 0
bool AMDILModuleInfo::hasRWG(const llvm::StringRef &name) const {
StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
if (iter != mKernels.end()) {
AMDILKernelAttr *ptr = iter->second->sgv;
if (ptr) {
return ptr->mHasRWG;
}
}
return false;
}
bool AMDILModuleInfo::hasRWR(const llvm::StringRef &name) const {
StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
if (iter != mKernels.end()) {
AMDILKernelAttr *ptr = iter->second->sgv;
if (ptr) {
return ptr->mHasRWR;
}
}
return false;
}
uint32_t
AMDILModuleInfo::getMaxGroupSize(const llvm::StringRef &name) const {
StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
if (iter != mKernels.end()) {
AMDILKernelAttr *sgv = iter->second->sgv;
if (sgv) {
return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2];
}
}
return mSTM->getDefaultSize(0) *
mSTM->getDefaultSize(1) *
mSTM->getDefaultSize(2);
}
uint32_t
AMDILModuleInfo::getMaxRegionSize(const llvm::StringRef &name) const {
StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
if (iter != mKernels.end()) {
AMDILKernelAttr *sgv = iter->second->sgv;
if (sgv) {
return sgv->reqRegionSize[0] *
sgv->reqRegionSize[1] *
sgv->reqRegionSize[2];
}
}
return mSTM->getDefaultSize(0) *
mSTM->getDefaultSize(1) *
mSTM->getDefaultSize(2);
}
uint32_t AMDILModuleInfo::getRegionSize(const llvm::StringRef &name) const {
StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
if (iter != mKernels.end()) {
return iter->second->curRSize;
} else {
return 0;
}
}
uint32_t AMDILModuleInfo::getLocalSize(const llvm::StringRef &name) const {
StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
if (iter != mKernels.end()) {
return iter->second->curSize;
} else {
return 0;
}
}
uint32_t AMDILModuleInfo::getConstSize(const llvm::StringRef &name) const {
StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
if (iter != mKernels.end()) {
return iter->second->constSize;
} else {
return 0;
}
}
uint32_t
AMDILModuleInfo::getHWRegionSize(const llvm::StringRef &name) const {
StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
if (iter != mKernels.end()) {
return iter->second->curHWRSize;
} else {
return 0;
}
}
uint32_t AMDILModuleInfo::getHWLocalSize(const llvm::StringRef &name) const {
StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
if (iter != mKernels.end()) {
return iter->second->curHWSize;
} else {
return 0;
}
}
#endif
int32_t AMDILModuleInfo::getArgID(const Argument *arg) {
DenseMap<const Argument *, int32_t>::iterator argiter = mArgIDMap.find(arg);
if (argiter != mArgIDMap.end()) {
return argiter->second;
} else {
return -1;
}
}
uint32_t
AMDILModuleInfo::getRegion(const llvm::StringRef &name, uint32_t dim) const {
StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
if (iter != mKernels.end() && iter->second->sgv) {
AMDILKernelAttr *sgv = iter->second->sgv;
switch (dim) {
default: break;
case 0:
case 1:
case 2:
return sgv->reqRegionSize[dim];
break;
case 3:
return sgv->reqRegionSize[0] *
sgv->reqRegionSize[1] *
sgv->reqRegionSize[2];
};
}
switch (dim) {
default:
return 1;
case 3:
return mSTM->getDefaultSize(0) *
mSTM->getDefaultSize(1) *
mSTM->getDefaultSize(2);
case 2:
case 1:
case 0:
return mSTM->getDefaultSize(dim);
break;
};
return 1;
}
StringMap<AMDILConstPtr>::iterator AMDILModuleInfo::consts_begin() {
return mConstMems.begin();
}
StringMap<AMDILConstPtr>::iterator AMDILModuleInfo::consts_end() {
return mConstMems.end();
}
bool AMDILModuleInfo::byteStoreExists(StringRef S) const {
return mByteStore.find(S) != mByteStore.end();
}
uint32_t AMDILModuleInfo::getConstPtrSize(const AMDILKernel *krnl,
const llvm::StringRef &arg)
{
const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
if (curConst) {
return curConst->size;
} else {
return 0;
}
}
uint32_t AMDILModuleInfo::getConstPtrOff(const AMDILKernel *krnl,
const llvm::StringRef &arg)
{
const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
if (curConst) {
return curConst->offset;
} else {
return 0;
}
}
uint32_t AMDILModuleInfo::getConstPtrCB(const AMDILKernel *krnl,
const llvm::StringRef &arg)
{
const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
if (curConst) {
return curConst->cbNum;
} else {
return 0;
}
}
void AMDILModuleInfo::calculateCPOffsets(const MachineFunction *MF,
AMDILKernel *krnl)
{
const MachineConstantPool *MCP = MF->getConstantPool();
if (!MCP) {
return;
}
const std::vector<MachineConstantPoolEntry> consts = MCP->getConstants();
size_t numConsts = consts.size();
for (size_t x = 0; x < numConsts; ++x) {
krnl->CPOffsets.push_back(
std::make_pair<uint32_t, const Constant*>(
mCurrentCPOffset, consts[x].Val.ConstVal));
size_t curSize = getTypeSize(consts[x].Val.ConstVal->getType(), true);
// Align the size to the vector boundary
curSize = (curSize + 15) & (~15);
mCurrentCPOffset += curSize;
}
}
bool AMDILModuleInfo::isConstPtrArray(const AMDILKernel *krnl,
const llvm::StringRef &arg) {
const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
if (curConst) {
return curConst->isArray;
} else {
return false;
}
}
bool AMDILModuleInfo::isConstPtrArgument(const AMDILKernel *krnl,
const llvm::StringRef &arg)
{
const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
if (curConst) {
return curConst->isArgument;
} else {
return false;
}
}
const Value *AMDILModuleInfo::getConstPtrValue(const AMDILKernel *krnl,
const llvm::StringRef &arg) {
const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
if (curConst) {
return curConst->base;
} else {
return NULL;
}
}
static void
dumpZeroElements(StructType * const T, llvm::raw_ostream &O, bool asBytes);
static void
dumpZeroElements(IntegerType * const T, llvm::raw_ostream &O, bool asBytes);
static void
dumpZeroElements(ArrayType * const T, llvm::raw_ostream &O, bool asBytes);
static void
dumpZeroElements(VectorType * const T, llvm::raw_ostream &O, bool asBytes);
static void
dumpZeroElements(Type * const T, llvm::raw_ostream &O, bool asBytes);
void dumpZeroElements(Type * const T, llvm::raw_ostream &O, bool asBytes) {
if (!T) {
return;
}
switch(T->getTypeID()) {
case Type::X86_FP80TyID:
case Type::FP128TyID:
case Type::PPC_FP128TyID:
case Type::LabelTyID:
assert(0 && "These types are not supported by this backend");
default:
case Type::DoubleTyID:
if (asBytes) {
O << ":0:0:0:0:0:0:0:0";
} else {
O << ":0";
}
break;
case Type::FloatTyID:
case Type::PointerTyID:
case Type::FunctionTyID:
if (asBytes) {
O << ":0:0:0:0";
} else {
O << ":0";
}
case Type::IntegerTyID:
dumpZeroElements(dyn_cast<IntegerType>(T), O, asBytes);
break;
case Type::StructTyID:
{
const StructType *ST = cast<StructType>(T);
if (!ST->isOpaque()) {
dumpZeroElements(dyn_cast<StructType>(T), O, asBytes);
} else { // A pre-LLVM 3.0 opaque type
if (asBytes) {
O << ":0:0:0:0";
} else {
O << ":0";
}
}
}
break;
case Type::ArrayTyID:
dumpZeroElements(dyn_cast<ArrayType>(T), O, asBytes);
break;
case Type::VectorTyID:
dumpZeroElements(dyn_cast<VectorType>(T), O, asBytes);
break;
};
}
void
dumpZeroElements(StructType * const ST, llvm::raw_ostream &O, bool asBytes) {
if (!ST) {
return;
}
Type *curType;
StructType::element_iterator eib = ST->element_begin();
StructType::element_iterator eie = ST->element_end();
for (;eib != eie; ++eib) {
curType = *eib;
dumpZeroElements(curType, O, asBytes);
}
}
void
dumpZeroElements(IntegerType * const IT, llvm::raw_ostream &O, bool asBytes) {
if (asBytes) {
unsigned byteWidth = (IT->getBitWidth() >> 3);
for (unsigned x = 0; x < byteWidth; ++x) {
O << ":0";
}
}
}
void
dumpZeroElements(ArrayType * const AT, llvm::raw_ostream &O, bool asBytes) {
size_t size = AT->getNumElements();
for (size_t x = 0; x < size; ++x) {
dumpZeroElements(AT->getElementType(), O, asBytes);
}
}
void
dumpZeroElements(VectorType * const VT, llvm::raw_ostream &O, bool asBytes) {
size_t size = VT->getNumElements();
for (size_t x = 0; x < size; ++x) {
dumpZeroElements(VT->getElementType(), O, asBytes);
}
}
void AMDILModuleInfo::printConstantValue(const Constant *CAval,
llvm::raw_ostream &O, bool asBytes) {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CAval)) {
bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
if (isDouble) {
double val = CFP->getValueAPF().convertToDouble();
union dtol_union {
double d;
uint64_t l;
char c[8];
} conv;
conv.d = val;
if (!asBytes) {
O << ":";
O.write_hex(conv.l);
} else {
for (int i = 0; i < 8; ++i) {
O << ":";
O.write_hex((unsigned)conv.c[i] & 0xFF);
}
}
} else {
float val = CFP->getValueAPF().convertToFloat();
union ftoi_union {
float f;
uint32_t u;
char c[4];
} conv;
conv.f = val;
if (!asBytes) {
O << ":";
O.write_hex(conv.u);
} else {
for (int i = 0; i < 4; ++i) {
O << ":";
O.write_hex((unsigned)conv.c[i] & 0xFF);
}
}
}
} else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CAval)) {
uint64_t zVal = CI->getValue().getZExtValue();
if (!asBytes) {
O << ":";
O.write_hex(zVal);
} else {
switch (CI->getBitWidth()) {
default:
{
union ltob_union {
uint64_t l;
char c[8];
} conv;
conv.l = zVal;
for (int i = 0; i < 8; ++i) {
O << ":";
O.write_hex((unsigned)conv.c[i] & 0xFF);
}
}
break;
case 8:
O << ":";
O.write_hex(zVal & 0xFF);
break;
case 16:
{
union stob_union {
uint16_t s;
char c[2];
} conv;
conv.s = (uint16_t)zVal;
O << ":";
O.write_hex((unsigned)conv.c[0] & 0xFF);
O << ":";
O.write_hex((unsigned)conv.c[1] & 0xFF);
}
break;
case 32:
{
union itob_union {
uint32_t i;
char c[4];
} conv;
conv.i = (uint32_t)zVal;
for (int i = 0; i < 4; ++i) {
O << ":";
O.write_hex((unsigned)conv.c[i] & 0xFF);
}
}
break;
}
}
} else if (const ConstantVector *CV = dyn_cast<ConstantVector>(CAval)) {
int y = CV->getNumOperands()-1;
int x = 0;
for (; x < y; ++x) {
printConstantValue(CV->getOperand(x), O, asBytes);
}
printConstantValue(CV->getOperand(x), O, asBytes);
} else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CAval)) {
int y = CS->getNumOperands();
int x = 0;
for (; x < y; ++x) {
printConstantValue(CS->getOperand(x), O, asBytes);
}
} else if (const ConstantAggregateZero *CAZ
= dyn_cast<ConstantAggregateZero>(CAval)) {
int y = CAZ->getNumOperands();
if (y > 0) {
int x = 0;
for (; x < y; ++x) {
printConstantValue((llvm::Constant *)CAZ->getOperand(x),
O, asBytes);
}
} else {
if (asBytes) {
dumpZeroElements(CAval->getType(), O, asBytes);
} else {
int y = getNumElements(CAval->getType())-1;
for (int x = 0; x < y; ++x) {
O << ":0";
}
O << ":0";
}
}
} else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CAval)) {
int y = CA->getNumOperands();
int x = 0;
for (; x < y; ++x) {
printConstantValue(CA->getOperand(x), O, asBytes);
}
} else if (dyn_cast<ConstantPointerNull>(CAval)) {
O << ":0";
//assert(0 && "Hit condition which was not expected");
} else if (dyn_cast<ConstantExpr>(CAval)) {
O << ":0";
//assert(0 && "Hit condition which was not expected");
} else if (dyn_cast<UndefValue>(CAval)) {
O << ":0";
//assert(0 && "Hit condition which was not expected");
} else {
assert(0 && "Hit condition which was not expected");
}
}
#if 0
static bool isStruct(Type * const T)
{
if (!T) {
return false;
}
switch (T->getTypeID()) {
default:
return false;
case Type::PointerTyID:
return isStruct(T->getContainedType(0));
case Type::StructTyID:
return true;
case Type::ArrayTyID:
case Type::VectorTyID:
return isStruct(dyn_cast<SequentialType>(T)->getElementType());
};
}
void AMDILModuleInfo::dumpDataToCB(llvm::raw_ostream &O, AMDILKernelManager *km,
uint32_t id) {
uint32_t size = 0;
for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(),
cme = consts_end(); cmb != cme; ++cmb) {
if (id == cmb->second.cbNum) {
size += (cmb->second.size + 15) & (~15);
}
}
if (id == 0) {
O << ";#DATASTART:" << (size + mCurrentCPOffset) << "\n";
if (mCurrentCPOffset) {
for (StringMap<AMDILKernel*>::iterator kcpb = mKernels.begin(),
kcpe = mKernels.end(); kcpb != kcpe; ++kcpb) {
const AMDILKernel *k = kcpb->second;
size_t numConsts = k->CPOffsets.size();
for (size_t x = 0; x < numConsts; ++x) {
size_t offset = k->CPOffsets[x].first;
const Constant *C = k->CPOffsets[x].second;
Type *Ty = C->getType();
size_t size = (isStruct(Ty) ? getTypeSize(Ty, true)
: getNumElements(Ty));
O << ";#" << km->getTypeName(Ty, symTab) << ":";
O << offset << ":" << size ;
printConstantValue(C, O, isStruct(Ty));
O << "\n";
}
}
}
} else {
O << ";#DATASTART:" << id << ":" << size << "\n";
}
for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(), cme = consts_end();
cmb != cme; ++cmb) {
if (cmb->second.cbNum != id) {
continue;
}
const GlobalVariable *G = dyn_cast<GlobalVariable>(cmb->second.base);
Type *Ty = (G) ? G->getType() : NULL;
size_t offset = cmb->second.offset;
const Constant *C = G->getInitializer();
size_t size = (isStruct(Ty)
? getTypeSize(Ty, true)
: getNumElements(Ty));
O << ";#" << km->getTypeName(Ty, symTab) << ":";
if (!id) {
O << (offset + mCurrentCPOffset) << ":" << size;
} else {
O << offset << ":" << size;
}
if (C) {
printConstantValue(C, O, isStruct(Ty));
} else {
assert(0 && "Cannot have a constant pointer"
" without an initializer!");
}
O <<"\n";
}
if (id == 0) {
O << ";#DATAEND\n";
} else {
O << ";#DATAEND:" << id << "\n";
}
}
void
AMDILModuleInfo::dumpDataSection(llvm::raw_ostream &O, AMDILKernelManager *km) {
if (mConstMems.empty() && !mCurrentCPOffset) {
return;
} else {
llvm::DenseSet<uint32_t> const_set;
for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(), cme = consts_end();
cmb != cme; ++cmb) {
const_set.insert(cmb->second.cbNum);
}
if (mCurrentCPOffset) {
const_set.insert(0);
}
for (llvm::DenseSet<uint32_t>::iterator setb = const_set.begin(),
sete = const_set.end(); setb != sete; ++setb) {
dumpDataToCB(O, km, *setb);
}
}
}
#endif
/// Create a function ID if it is not known or return the known
/// function ID.
uint32_t AMDILModuleInfo::getOrCreateFunctionID(const GlobalValue* func) {
if (func->getName().size()) {
return getOrCreateFunctionID(func->getName());
}
uint32_t id;
if (mFuncPtrNames.find(func) == mFuncPtrNames.end()) {
id = mFuncPtrNames.size() + RESERVED_FUNCS + mFuncNames.size();
mFuncPtrNames[func] = id;
} else {
id = mFuncPtrNames[func];
}
return id;
}
uint32_t AMDILModuleInfo::getOrCreateFunctionID(const std::string &func) {
uint32_t id;
if (mFuncNames.find(func) == mFuncNames.end()) {
id = mFuncNames.size() + RESERVED_FUNCS + mFuncPtrNames.size();
mFuncNames[func] = id;
} else {
id = mFuncNames[func];
}
return id;
}