@@ -13,7 +13,7 @@ float3x2 case1() {
1313 // vec[3] = 1
1414 // vec[4] = 3
1515 // vec[5] = 5
16- return float3x2 (0 , 1 ,
16+ return float3x2 (0 , 1 ,
1717 2 , 3 ,
1818 4 , 5 );
1919}
@@ -24,25 +24,26 @@ RWStructuredBuffer<float> In;
2424// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <6 x float> @_Z5case2v(
2525// CHECK-SAME: ) #[[ATTR0]] {
2626// CHECK-NEXT: [[ENTRY:.*:]]
27- // CHECK-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 0) #[[ATTR3 :[0-9]+]]
28- // CHECK-NEXT: [[CALL1:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 1) #[[ATTR3 ]]
29- // CHECK-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 2) #[[ATTR3 ]]
30- // CHECK-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 3) #[[ATTR3 ]]
31- // CHECK-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 4) #[[ATTR3 ]]
32- // CHECK-NEXT: [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 5) #[[ATTR3 ]]
27+ // CHECK-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 0) #[[ATTR4 :[0-9]+]]
28+ // CHECK-NEXT: [[CALL1:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 1) #[[ATTR4 ]]
29+ // CHECK-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 2) #[[ATTR4 ]]
30+ // CHECK-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 3) #[[ATTR4 ]]
31+ // CHECK-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 4) #[[ATTR4 ]]
32+ // CHECK-NEXT: [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr noundef nonnull align 4 dereferenceable(8) @_ZL2In, i32 noundef 5) #[[ATTR4 ]]
3333// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[CALL]], align 4
3434// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <6 x float> poison, float [[TMP0]], i32 0
35- // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[CALL2 ]], align 4
35+ // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[CALL1 ]], align 4
3636// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <6 x float> [[VECINIT]], float [[TMP1]], i32 1
37- // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[CALL4 ]], align 4
37+ // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[CALL2 ]], align 4
3838// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <6 x float> [[VECINIT6]], float [[TMP2]], i32 2
39- // CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[CALL1 ]], align 4
39+ // CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[CALL3 ]], align 4
4040// CHECK-NEXT: [[VECINIT8:%.*]] = insertelement <6 x float> [[VECINIT7]], float [[TMP3]], i32 3
41- // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[CALL3 ]], align 4
41+ // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[CALL4 ]], align 4
4242// CHECK-NEXT: [[VECINIT9:%.*]] = insertelement <6 x float> [[VECINIT8]], float [[TMP4]], i32 4
4343// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[CALL5]], align 4
4444// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <6 x float> [[VECINIT9]], float [[TMP5]], i32 5
45- // CHECK-NEXT: ret <6 x float> [[VECINIT10]]
45+ // CHECK-NEXT: [[MATRIX_ROWMAJOR2COLMAJOR:%.*]] = shufflevector <6 x float> [[VECINIT10]], <6 x float> poison, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
46+ // CHECK-NEXT: ret <6 x float> [[MATRIX_ROWMAJOR2COLMAJOR]]
4647//
4748float3x2 case2 () {
4849 // vec[0] = Call
@@ -51,7 +52,7 @@ float3x2 case2() {
5152 // vec[3] = Call1
5253 // vec[4] = Call3
5354 // vec[5] = Call5
54- return float3x2 (In[0 ], In[1 ],
55+ return float3x2 (In[0 ], In[1 ],
5556 In[2 ], In[3 ],
5657 In[4 ], In[5 ]);
5758}
@@ -68,28 +69,29 @@ float3x2 case2() {
6869// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <3 x float> [[TMP0]], i64 0
6970// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <6 x float> poison, float [[VECEXT]], i32 0
7071// CHECK-NEXT: [[TMP1:%.*]] = load <3 x float>, ptr [[A_ADDR]], align 16
71- // CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <3 x float> [[TMP1]], i64 2
72+ // CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <3 x float> [[TMP1]], i64 1
7273// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <6 x float> [[VECINIT]], float [[VECEXT1]], i32 1
73- // CHECK-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[B_ADDR ]], align 16
74- // CHECK-NEXT: [[VECEXT3:%.*]] = extractelement <3 x float> [[TMP2]], i64 1
74+ // CHECK-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[A_ADDR ]], align 16
75+ // CHECK-NEXT: [[VECEXT3:%.*]] = extractelement <3 x float> [[TMP2]], i64 2
7576// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <6 x float> [[VECINIT2]], float [[VECEXT3]], i32 2
76- // CHECK-NEXT: [[TMP3:%.*]] = load <3 x float>, ptr [[A_ADDR ]], align 16
77- // CHECK-NEXT: [[VECEXT5:%.*]] = extractelement <3 x float> [[TMP3]], i64 1
77+ // CHECK-NEXT: [[TMP3:%.*]] = load <3 x float>, ptr [[B_ADDR ]], align 16
78+ // CHECK-NEXT: [[VECEXT5:%.*]] = extractelement <3 x float> [[TMP3]], i64 0
7879// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <6 x float> [[VECINIT4]], float [[VECEXT5]], i32 3
7980// CHECK-NEXT: [[TMP4:%.*]] = load <3 x float>, ptr [[B_ADDR]], align 16
80- // CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <3 x float> [[TMP4]], i64 0
81+ // CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <3 x float> [[TMP4]], i64 1
8182// CHECK-NEXT: [[VECINIT8:%.*]] = insertelement <6 x float> [[VECINIT6]], float [[VECEXT7]], i32 4
8283// CHECK-NEXT: [[TMP5:%.*]] = load <3 x float>, ptr [[B_ADDR]], align 16
8384// CHECK-NEXT: [[VECEXT9:%.*]] = extractelement <3 x float> [[TMP5]], i64 2
8485// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <6 x float> [[VECINIT8]], float [[VECEXT9]], i32 5
85- // CHECK-NEXT: ret <6 x float> [[VECINIT10]]
86+ // CHECK-NEXT: [[MATRIX_ROWMAJOR2COLMAJOR:%.*]] = shufflevector <6 x float> [[VECINIT10]], <6 x float> poison, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
87+ // CHECK-NEXT: ret <6 x float> [[MATRIX_ROWMAJOR2COLMAJOR]]
8688//
8789float3x2 case3 (float3 a, float3 b) {
8890 // vec[0] = A[0]
89- // vec[1] = A[2 ]
90- // vec[2] = B[1 ]
91- // vec[3] = A[1 ]
92- // vec[4] = B[0 ]
91+ // vec[1] = A[1 ]
92+ // vec[2] = A[2 ]
93+ // vec[3] = B[0 ]
94+ // vec[4] = B[1 ]
9395 // vec[5] = B[2]
9496 return float3x2 (a,b);
9597}
0 commit comments