|
| 1 | +;=========================== begin_copyright_notice ============================ |
| 2 | +; |
| 3 | +; Copyright (C) 2025 Intel Corporation |
| 4 | +; |
| 5 | +; SPDX-License-Identifier: MIT |
| 6 | +; |
| 7 | +;============================ end_copyright_notice ============================= |
| 8 | + |
| 9 | +; REQUIRES: regkeys |
| 10 | + |
| 11 | +; RUN: igc_opt -S -dce -platformbmg -igc-emit-visa --regkey=DumpVISAASMToConsole=1 -simd-mode 16 < %s | FileCheck %s |
| 12 | + |
| 13 | +; CHECK: .decl vector331 v_type=G type=f num_elts=128 |
| 14 | +; CHECK: .decl vectorized_cast v_type=G type=hf num_elts=128 |
| 15 | + |
| 16 | +; CHECK: exp (M1, 16) vectorized_intrinsic(0,0)<1> vector331(0,0)<1;1,0> |
| 17 | +; CHECK: exp (M1, 16) vectorized_intrinsic(1,0)<1> vector331(1,0)<1;1,0> |
| 18 | +; CHECK: exp (M1, 16) vectorized_intrinsic(2,0)<1> vector331(2,0)<1;1,0> |
| 19 | +; CHECK: exp (M1, 16) vectorized_intrinsic(3,0)<1> vector331(3,0)<1;1,0> |
| 20 | +; CHECK: exp (M1, 16) vectorized_intrinsic(4,0)<1> vector331(4,0)<1;1,0> |
| 21 | +; CHECK: exp (M1, 16) vectorized_intrinsic(5,0)<1> vector331(5,0)<1;1,0> |
| 22 | +; CHECK: exp (M1, 16) vectorized_intrinsic(6,0)<1> vector331(6,0)<1;1,0> |
| 23 | +; CHECK: exp (M1, 16) vectorized_intrinsic(7,0)<1> vector331(7,0)<1;1,0> |
| 24 | + |
| 25 | +; CHECK: max (M1_NM, 8) vectorized_joint_waveall(0,0)<1> reduceSrc_vectorized_intrinsic(0,0)<2;1,1> reduceSrc_vectorized_intrinsic(0,1)<2;1,1> |
| 26 | +; CHECK: max (M1, 16) vectorized_intrinsic2(0,0)<1> vectorized_joint_waveall(0,0)<0;1,0> vectorized_intrinsic(0,0)<1;1,0> /// $63 |
| 27 | +; CHECK: max (M1, 16) vectorized_intrinsic2(1,0)<1> vectorized_joint_waveall(0,1)<0;1,0> vectorized_intrinsic(1,0)<1;1,0> /// $64 |
| 28 | +; CHECK: max (M1, 16) vectorized_intrinsic2(2,0)<1> vectorized_joint_waveall(0,2)<0;1,0> vectorized_intrinsic(2,0)<1;1,0> /// $65 |
| 29 | +; CHECK: max (M1, 16) vectorized_intrinsic2(3,0)<1> vectorized_joint_waveall(0,3)<0;1,0> vectorized_intrinsic(3,0)<1;1,0> /// $66 |
| 30 | +; CHECK: max (M1, 16) vectorized_intrinsic2(4,0)<1> vectorized_joint_waveall(0,4)<0;1,0> vectorized_intrinsic(4,0)<1;1,0> /// $67 |
| 31 | +; CHECK: max (M1, 16) vectorized_intrinsic2(5,0)<1> vectorized_joint_waveall(0,5)<0;1,0> vectorized_intrinsic(5,0)<1;1,0> /// $68 |
| 32 | +; CHECK: max (M1, 16) vectorized_intrinsic2(6,0)<1> vectorized_joint_waveall(0,6)<0;1,0> vectorized_intrinsic(6,0)<1;1,0> /// $69 |
| 33 | +; CHECK: max (M1, 16) vectorized_intrinsic2(7,0)<1> vectorized_joint_waveall(0,7)<0;1,0> vectorized_intrinsic(7,0)<1;1,0> /// $70 |
| 34 | + |
| 35 | + |
| 36 | +define spir_kernel void @foo() { |
| 37 | + br label %._crit_edge |
| 38 | + |
| 39 | +._crit_edge: ; preds = %._crit_edge, %6 |
| 40 | + %tmp7 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false) |
| 41 | + %tmp8 = extractelement <8 x float> %tmp7, i64 0 |
| 42 | + %vector331 = insertelement <8 x float> zeroinitializer, float %tmp8, i32 0 |
| 43 | + %vectorized_intrinsic = call <8 x float> @llvm.exp2.v8f32(<8 x float> %vector331) |
| 44 | + %vectorized_joint_waveall = call <8 x float> @llvm.genx.GenISA.WaveAll.v8f32(<8 x float> %vectorized_intrinsic, i8 12, i32 0) |
| 45 | + %vectorized_intrinsic2 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %vectorized_joint_waveall, <8 x float> %vectorized_intrinsic) |
| 46 | + %vectorized_cast = fptrunc <8 x float> %vectorized_intrinsic2 to <8 x half> |
| 47 | + %tmp9 = bitcast <8 x half> %vectorized_cast to <8 x i16> |
| 48 | + %tmp10 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> %tmp9, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false) |
| 49 | + br label %._crit_edge |
| 50 | +} |
| 51 | + |
| 52 | +declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1) |
| 53 | + |
| 54 | +declare <8 x float> @llvm.genx.GenISA.WaveAll.v8f32(<8 x float>, i8, i32) #1 |
| 55 | + |
| 56 | +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn |
| 57 | +declare <8 x float> @llvm.exp2.v8f32(<8 x float>) #1 |
| 58 | + |
| 59 | +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn |
| 60 | +declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1 |
| 61 | + |
| 62 | +attributes #0 = { nofree nosync nounwind readnone speculatable willreturn } |
| 63 | +!igc.functions = !{!0} |
| 64 | + |
| 65 | +!0 = distinct !{void ()* @foo, !1} |
| 66 | +!1 = distinct !{!2, !3} |
| 67 | +!2 = distinct !{!"function_type", i32 0} |
| 68 | +!3 = distinct !{!"sub_group_size", i32 16} |
| 69 | + |
| 70 | + |
0 commit comments