Skip to content

Commit 35cbc48

Browse files
esukhovigcbot
authored andcommitted
Enable Joint-Waveall vectorization by default
Joint-Waveall vectorization is enabled by default inside IGCVectorizer pass.
1 parent a2f3759 commit 35cbc48

File tree

10 files changed

+3169
-3
lines changed

10 files changed

+3169
-3
lines changed

IGC/Compiler/CISACodeGen/IGCVectorizer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,12 @@ bool IGCVectorizer::handleWaveAll(VecArr &Slice) {
906906
if (!IGC_GET_FLAG_VALUE(VectorizerAllowWAVEALLJoint))
907907
return true;
908908

909+
// if the platform is unsupported we vectorize as stubs
910+
if (!AllowedPlatform) {
911+
PRINT_LOG_NL("Unsupported platform");
912+
return true;
913+
}
914+
909915
auto *First = llvm::dyn_cast<WaveAllIntrinsic>(Slice.front());
910916
if (!First)
911917
return false;
@@ -1529,6 +1535,7 @@ bool IGCVectorizer::runOnFunction(llvm::Function &F) {
15291535
CGCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
15301536
initializeLogFile(F);
15311537

1538+
AllowedPlatform = CGCtx->platform.isCoreXE2() || CGCtx->platform.isPVC();
15321539
SIMDSize = checkSIMD(F);
15331540
// we have DPAS and simd8 for DG2 platforms
15341541
bool SupportedSIMD = SIMDSize == 16 || SIMDSize == 32;

IGC/Compiler/CISACodeGen/IGCVectorizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class IGCVectorizer : public llvm::FunctionPass {
7272
llvm::raw_string_ostream OutputLogStream = raw_string_ostream(LogStr);
7373
Module *M = nullptr;
7474
unsigned SIMDSize = 0;
75+
bool AllowedPlatform = true;
7576
unsigned checkSIMD(llvm::Function &F);
7677
void initializeLogFile(Function &F);
7778
void writeLog();

IGC/Compiler/CISACodeGen/Platform.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ class CPlatform {
194194
return m_platformInfo.eProductFamily >= product;
195195
}
196196

197+
bool isPVC() const { return (m_platformInfo.eProductFamily == IGFX_PVC); }
197198
bool isCoreXE2() const { return (m_platformInfo.eRenderCoreFamily == IGFX_XE2_HPG_CORE); }
198199

199200
bool isCoreXE3() const {
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: regkeys
10+
11+
; RUN: igc_opt -S -dce -platformbmg -igc-emit-visa --regkey=DumpVISAASMToConsole=1 -simd-mode 16 < %s | FileCheck %s
12+
13+
; CHECK: .decl vector331 v_type=G type=f num_elts=128
14+
; CHECK: .decl vectorized_cast v_type=G type=hf num_elts=128
15+
16+
; CHECK: exp (M1, 16) vectorized_intrinsic(0,0)<1> vector331(0,0)<1;1,0>
17+
; CHECK: exp (M1, 16) vectorized_intrinsic(1,0)<1> vector331(1,0)<1;1,0>
18+
; CHECK: exp (M1, 16) vectorized_intrinsic(2,0)<1> vector331(2,0)<1;1,0>
19+
; CHECK: exp (M1, 16) vectorized_intrinsic(3,0)<1> vector331(3,0)<1;1,0>
20+
; CHECK: exp (M1, 16) vectorized_intrinsic(4,0)<1> vector331(4,0)<1;1,0>
21+
; CHECK: exp (M1, 16) vectorized_intrinsic(5,0)<1> vector331(5,0)<1;1,0>
22+
; CHECK: exp (M1, 16) vectorized_intrinsic(6,0)<1> vector331(6,0)<1;1,0>
23+
; CHECK: exp (M1, 16) vectorized_intrinsic(7,0)<1> vector331(7,0)<1;1,0>
24+
25+
; CHECK: max (M1_NM, 8) vectorized_joint_waveall(0,0)<1> reduceSrc_vectorized_intrinsic(0,0)<2;1,1> reduceSrc_vectorized_intrinsic(0,1)<2;1,1>
26+
; CHECK: max (M1, 16) vectorized_intrinsic2(0,0)<1> vectorized_joint_waveall(0,0)<0;1,0> vectorized_intrinsic(0,0)<1;1,0> /// $63
27+
; CHECK: max (M1, 16) vectorized_intrinsic2(1,0)<1> vectorized_joint_waveall(0,1)<0;1,0> vectorized_intrinsic(1,0)<1;1,0> /// $64
28+
; CHECK: max (M1, 16) vectorized_intrinsic2(2,0)<1> vectorized_joint_waveall(0,2)<0;1,0> vectorized_intrinsic(2,0)<1;1,0> /// $65
29+
; CHECK: max (M1, 16) vectorized_intrinsic2(3,0)<1> vectorized_joint_waveall(0,3)<0;1,0> vectorized_intrinsic(3,0)<1;1,0> /// $66
30+
; CHECK: max (M1, 16) vectorized_intrinsic2(4,0)<1> vectorized_joint_waveall(0,4)<0;1,0> vectorized_intrinsic(4,0)<1;1,0> /// $67
31+
; CHECK: max (M1, 16) vectorized_intrinsic2(5,0)<1> vectorized_joint_waveall(0,5)<0;1,0> vectorized_intrinsic(5,0)<1;1,0> /// $68
32+
; CHECK: max (M1, 16) vectorized_intrinsic2(6,0)<1> vectorized_joint_waveall(0,6)<0;1,0> vectorized_intrinsic(6,0)<1;1,0> /// $69
33+
; CHECK: max (M1, 16) vectorized_intrinsic2(7,0)<1> vectorized_joint_waveall(0,7)<0;1,0> vectorized_intrinsic(7,0)<1;1,0> /// $70
34+
35+
36+
define spir_kernel void @foo() {
37+
br label %._crit_edge
38+
39+
._crit_edge: ; preds = %._crit_edge, %6
40+
%tmp7 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
41+
%tmp8 = extractelement <8 x float> %tmp7, i64 0
42+
%vector331 = insertelement <8 x float> zeroinitializer, float %tmp8, i32 0
43+
%vectorized_intrinsic = call <8 x float> @llvm.exp2.v8f32(<8 x float> %vector331)
44+
%vectorized_joint_waveall = call <8 x float> @llvm.genx.GenISA.WaveAll.v8f32(<8 x float> %vectorized_intrinsic, i8 12, i32 0)
45+
%vectorized_intrinsic2 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %vectorized_joint_waveall, <8 x float> %vectorized_intrinsic)
46+
%vectorized_cast = fptrunc <8 x float> %vectorized_intrinsic2 to <8 x half>
47+
%tmp9 = bitcast <8 x half> %vectorized_cast to <8 x i16>
48+
%tmp10 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> %tmp9, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
49+
br label %._crit_edge
50+
}
51+
52+
declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)
53+
54+
declare <8 x float> @llvm.genx.GenISA.WaveAll.v8f32(<8 x float>, i8, i32) #1
55+
56+
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
57+
declare <8 x float> @llvm.exp2.v8f32(<8 x float>) #1
58+
59+
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
60+
declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1
61+
62+
attributes #0 = { nofree nosync nounwind readnone speculatable willreturn }
63+
!igc.functions = !{!0}
64+
65+
!0 = distinct !{void ()* @foo, !1}
66+
!1 = distinct !{!2, !3}
67+
!2 = distinct !{!"function_type", i32 0}
68+
!3 = distinct !{!"sub_group_size", i32 16}
69+
70+

IGC/Compiler/tests/IGCVectorizer/vectorizer-waveall-basic.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: llvm-16-plus, regkeys
2-
; RUN: igc_opt -S -opaque-pointers --igc-vectorizer -dce --regkey=VectorizerAllowWAVEALL=1 --regkey=VectorizerAllowWAVEALLJoint=1 < %s 2>&1 | FileCheck %s
2+
; RUN: igc_opt -S -opaque-pointers -platformbmg --igc-vectorizer -dce --regkey=VectorizerAllowWAVEALL=1 --regkey=VectorizerAllowWAVEALLJoint=1 < %s 2>&1 | FileCheck %s
33

44
;CHECK: %vectorized_joint_waveall = call <8 x float> @llvm.genx.GenISA.WaveAll.v8f32(<8 x float> %{{.*}}, i8 12, i32 0)
55
;CHECK: %{{vectorized_joint_waveall.*}} = call <8 x float> @llvm.genx.GenISA.WaveAll.v8f32(<8 x float> %{{.*}}, i8 12, i32 0)

IGC/Compiler/tests/IGCVectorizer/vectorizer-waveall-interleaved.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: llvm-16-plus, regkeys
2-
; RUN: igc_opt -S -opaque-pointers --igc-vectorizer -dce --regkey=VectorizerDepWindowMultiplier=4 --regkey=VectorizerAllowWAVEALLJoint=1 --regkey=VectorizerAllowWAVEALL=1 < %s 2>&1 | FileCheck %s
2+
; RUN: igc_opt -S -opaque-pointers -platformbmg --igc-vectorizer -dce --regkey=VectorizerDepWindowMultiplier=4 --regkey=VectorizerAllowWAVEALLJoint=1 --regkey=VectorizerAllowWAVEALL=1 < %s 2>&1 | FileCheck %s
33

44
;CHECK: %vectorized_joint_waveall = call <8 x float> @llvm.genx.GenISA.WaveAll.v8f32(<8 x float> %{{.*}}, i8 12, i32 0)
55
;CHECK: %{{vectorized_joint_waveall.*}} = call <8 x float> @llvm.genx.GenISA.WaveAll.v8f32(<8 x float> %{{.*}}, i8 12, i32 0)

0 commit comments

Comments
 (0)