Skip to content

Commit b758132

Browse files
michalpaszkowskiigcbot
authored andcommitted
Detect type mismatch in SOA promotion when alloca with multi-byte elements is accessed through i8 GEPs
Add mismatch detection when an alloca with multi-byte elements (e.g., [4 x float]) is accessed through GEPs with different element sizes (e.g., i8 byte-level access). This pattern occurs in memcpy loops where bytes are copied individually into a larger element type array. The current transformation incorrectly treats each byte as a separate vector element rather than accumulating bytes into complete lanes.
1 parent e899d0b commit b758132

File tree

2 files changed

+49
-0
lines changed

2 files changed

+49
-0
lines changed

IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,50 @@ bool IGC::SOALayoutChecker::MismatchDetected(Instruction &I) {
603603
if (!allocaEltTy)
604604
return false;
605605

606+
// Check for byte-level access patterns. This detects memcpy-like patterns where an alloca with multi-byte elements
607+
// (e.g., [4 x float]) is accessed through i8 GEPs, which the promotion transformation cannot handle correctly.
608+
if (allocaEltTy && !allocaEltTy->isIntegerTy(8)) {
609+
uint64_t allocaEltSize = pDL->getTypeStoreSizeInBits(allocaEltTy);
610+
if (allocaEltSize > 8) {
611+
SmallVector<Value *, 16> worklist;
612+
SmallPtrSet<Value *, 16> visited;
613+
614+
// Start with the alloca itself
615+
worklist.push_back(&allocaRef);
616+
617+
while (!worklist.empty()) {
618+
Value *current = worklist.pop_back_val();
619+
620+
// Skip if already visited
621+
if (!visited.insert(current).second)
622+
continue;
623+
624+
for (User *U : current->users()) {
625+
if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
626+
Type *gepSrcTy = GEP->getSourceElementType();
627+
// If this GEP uses i8 (byte) indexing into a non-byte alloca element type,
628+
// this is a memcpy-like pattern that we cannot handle correctly.
629+
// The transformation would incorrectly treat each byte as a separate
630+
// element rather than accumulating bytes into complete lanes.
631+
if (gepSrcTy->isIntegerTy(8)) {
632+
pInfo->canUseSOALayout = false;
633+
return true;
634+
}
635+
// Add GEP to worklist to check its users.
636+
worklist.push_back(GEP);
637+
} else if (auto *BC = dyn_cast<BitCastInst>(U)) {
638+
// Follow bitcasts to find derived pointers.
639+
worklist.push_back(BC);
640+
} else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(U)) {
641+
// Follow address space casts.
642+
worklist.push_back(ASC);
643+
}
644+
// Load/Store/Intrinsics are terminal, don't need to follow them.
645+
}
646+
}
647+
}
648+
}
649+
606650
// Skip when we see a non-promoted type GEP with a non-constant (dynamic) byte offset. The legacy (old) algorithm
607651
// assumes byte offsets map exactly to whole promoted elements (e.g. multiples of the lane size) and cannot safely
608652
// reconstruct sub‑element (inter-lane or unaligned) accesses. Using it would risk incorrect indexing. The new

IGC/Compiler/tests/PrivateMemoryResolution/i8_gep_byte_offset.ll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88

99
; RUN: igc_opt --opaque-pointers --igc-private-mem-resolution --platformlnl -S %s | FileCheck %s
1010

11+
; TODO: The GEP scalarization algorithm is disabled since it does not correctly handle remainders
12+
; in byte-wise addressing into larger lanes. The algorithm can only write whole-lanes at a
13+
; time. The test here demonstrates a case correctly handled by the algorithm.
14+
; XFAIL: *
15+
1116
; This test ensures GEP scalarization on i8*/opaque ptr offsets treats the index as bytes and converts to element index via recovered base type size.
1217

1318
; CHECK-NOT: mul i32 64

0 commit comments

Comments
 (0)