wrongtest commented on code in PR #11181:
URL: https://github.com/apache/tvm/pull/11181#discussion_r866331023
##########
src/target/llvm/codegen_llvm.cc:
##########
@@ -472,23 +472,47 @@ llvm::Type* CodeGenLLVM::GetLLVMType(const PrimExpr&
expr) const {
//
// This trick comes from Halide's CodeGen_LLVM
//
-void CodeGenLLVM::AddAliasInfo(llvm::Instruction* inst, const VarNode* buffer,
PrimExpr index) {
- if (alias_var_set_.count(buffer) != 0) {
+void CodeGenLLVM::AddAliasInfo(llvm::Instruction* inst, const VarNode*
buffer_var, PrimExpr index,
+ DataType access_dtype) {
+ if (alias_var_set_.count(buffer_var) != 0) {
// Mark all possibly aliased pointer as same type.
llvm::MDNode* meta = md_tbaa_alias_set_;
inst->setMetadata("tbaa", md_builder_->createTBAAStructTagNode(meta, meta,
0));
return;
}
+ // Extract the underlying element bit width of the allocated buffer.
+ // fallback to byte type if no type annotation present.
+ int64_t buffer_elem_bits = 8;
+ int64_t access_elem_bits = access_dtype.bits() * access_dtype.lanes();
+ if (buffer_var->type_annotation.defined()) {
+ Type elem_ty =
Downcast<PointerType>(buffer_var->type_annotation)->element_type;
+ if (auto* ptype = elem_ty.as<PrimTypeNode>()) {
+ if (!ptype->dtype.is_void()) {
+ buffer_elem_bits = ptype->dtype.bits() * ptype->dtype.lanes();
+ }
+ }
+ }
+
int64_t base = 0, width = 0;
arith::PVar<IntImm> pbase, pstride;
arith::PVar<int> planes;
// create meta-data for alias analysis
// Use a group of binary tree ranges of memory banks.
if (index.defined()) {
+ int64_t xwith = 0;
if (arith::ramp(pbase, pstride, planes).Match(index)) {
base = pbase.Eval()->value;
- int64_t xwith = planes.Eval() * pstride.Eval()->value;
+ xwith = planes.Eval() * pstride.Eval()->value;
+ } else if (auto* ptr = index.as<tir::IntImmNode>()) {
+ base = ptr->value;
+ xwith = 1;
+ }
+ if (buffer_elem_bits != access_elem_bits) {
+ base = base * access_elem_bits / buffer_elem_bits;
Review Comment:
> to just bits or bytes
I agree and would like to follow that in current pr. That make codes much
clean and avoid sort of false positives.
Note there is a magic width number `1024`, above which the access fallbacks
to full region access (root tag for current buffer var). Thus the type tree
depth will decrease on huge vector compared to original version, but I think it
could be a minor issue and we can turn back until certain performance
regression detected.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]