Skip to content

Commit 1b8fda8

Browse files
committed
slight tf32 diversion
1 parent bb48fd4 commit 1b8fda8

1 file changed

Lines changed: 16 additions & 3 deletions

File tree

samples/99_matrixexperimentstf32/matrix_kernel_tiled_tf32.cl

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,22 @@ kernel void MM_KERNEL_NAME(tf32_dpas_rowmajor_tiled, 8, 16, MM, NN)(global float
145145

146146
void HELPER_NAME(atile_block_load_rowmajor, MM, NN)(global float* A, int tM, int M, int K, int m, int k, float4 aData[KK][MM])
147147
{
148-
for (int kk = 0; kk < KK; kk++) {
149-
for (int mm = 0; mm < MM; mm++) {
150-
aData[kk][mm] = as_float4(intel_subgroup_block_read_u32_m8k8(A, K * sizeof(float), M, K * sizeof(float), (int2)(k + kk * tK, m + mm * tM)));
148+
if (KK % 2 == 0) {
149+
for (int kk = 0; kk < KK; kk+=2) {
150+
for (int mm = 0; mm < MM; mm++) {
151+
//if (get_sub_group_local_id() == 0) {
152+
// printf("atile block load : %d, %d, %2d: m = %3d, k = %3d, mm = %2d, kk = %2d, coord = %3d, %3d\n", (int)get_group_id(1), (int)get_group_id(0), get_sub_group_id(), m, k, mm, kk, k + kk * tK, m + mm * tM);
153+
//}
154+
float8 aTemp = as_float8(intel_subgroup_block_read_u32_m8k8v2(A, K * sizeof(float), M, K * sizeof(float), (int2)(k + kk * tK, m + mm * tM)));
155+
aData[kk + 0][mm] = aTemp.lo;
156+
aData[kk + 1][mm] = aTemp.hi;
157+
}
158+
}
159+
} else {
160+
for (int kk = 0; kk < KK; kk++) {
161+
for (int mm = 0; mm < MM; mm++) {
162+
aData[kk][mm] = as_float4(intel_subgroup_block_read_u32_m8k8(A, K * sizeof(float), M, K * sizeof(float), (int2)(k + kk * tK, m + mm * tM)));
163+
}
151164
}
152165
}
153166
}

0 commit comments

Comments
 (0)