Skip to content

Commit 608a9ce

Browse files
committed
Regen 531613e0fb17aca41971649c15c942231a870777 Fri 27 Mar 00:06:55 GMT 2026
1 parent 943b756 commit 608a9ce

2 files changed

Lines changed: 8 additions & 8 deletions

File tree

data-int-sve2.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6855,10 +6855,10 @@
68556855
"thru": 38,
68566856
"lat": 4,
68576857
"sizelat": 4,
6858-
"size": 12,
6858+
"size": 13,
68596859
"gisize": 108,
68606860
"extrasize": 0,
6861-
"asm": "adrp x8, .LCPI0_0\nmovi v4.8h, #1\nmovi v5.8h, #254\nldr q2, [x8, :lo12:.LCPI0_0]\nsmulh z3.b, z0.b, z2.b\nsmulh z2.b, z1.b, z2.b\nmla v3.16b, v0.16b, v4.16b\nmla v2.16b, v1.16b, v4.16b\nsshl v0.16b, v3.16b, v5.16b\nsshl v1.16b, v2.16b, v5.16b\nusra v0.16b, v0.16b, #7\nusra v1.16b, v1.16b, #7\nret",
6861+
"asm": "adrp x8, .LCPI0_0\nmovi v4.8h, #254\nldr q2, [x8, :lo12:.LCPI0_0]\nsmulh z3.b, z0.b, z2.b\nsmulh z2.b, z1.b, z2.b\nbic v0.8h, #255, lsl #8\nbic v1.8h, #255, lsl #8\nadd v0.16b, v3.16b, v0.16b\nadd v1.16b, v2.16b, v1.16b\nsshl v0.16b, v0.16b, v4.16b\nsshl v1.16b, v1.16b, v4.16b\nusra v0.16b, v0.16b, #7\nusra v1.16b, v1.16b, #7\nret",
68626862
"giasm": "str x27, [sp, #-80]! // 8-byte Folded Spill\nstp x26, x25, [sp, #16] // 16-byte Folded Spill\nstp x24, x23, [sp, #32] // 16-byte Folded Spill\nstp x22, x21, [sp, #48] // 16-byte Folded Spill\nstp x20, x19, [sp, #64] // 16-byte Folded Spill\nsmov w8, v0.b[0]\nsmov w10, v0.b[1]\nmov w9, #7 // =0x7\nsmov w6, v1.b[0]\nsmov w27, v1.b[9]\nsdiv w25, w8, w9\nmov w8, #6 // =0x6\nsdiv w5, w10, w8\nsmov w10, v0.b[2]\nsdiv w4, w10, w9\nsmov w10, v0.b[3]\nsdiv w3, w10, w8\nsmov w10, v0.b[4]\nsdiv w2, w10, w9\nsmov w10, v0.b[5]\nsdiv w1, w10, w8\nsmov w10, v0.b[6]\nsdiv w0, w10, w9\nsmov w10, v0.b[7]\nsdiv w18, w10, w8\nsmov w10, v0.b[8]\nsdiv w17, w10, w9\nsmov w10, v0.b[9]\nsdiv w16, w10, w8\nsmov w10, v0.b[10]\nsdiv w15, w10, w9\nsmov w10, v0.b[11]\nsdiv w26, w6, w9\nsmov w6, v1.b[1]\nsdiv w14, w10, w8\nsmov w10, v0.b[12]\nfmov s2, w26\nsdiv w24, w6, w8\nsmov w6, v1.b[2]\nsdiv w13, w10, w9\nsmov w10, v0.b[13]\nmov v2.b[1], w24\nsdiv w23, w6, w9\nsmov w6, v1.b[3]\nsdiv w12, w10, w8\nsmov w10, v0.b[14]\nmov v2.b[2], w23\nldp x24, x23, [sp, #32] // 16-byte Folded Reload\nsdiv w22, w6, w8\nsmov w6, v1.b[4]\nsdiv w11, w10, w9\nsmov w10, v0.b[15]\nfmov s0, w25\nmov v2.b[3], w22\nmov v0.b[1], w5\nsmov w5, v1.b[10]\nmov v0.b[2], w4\nsdiv w21, w6, w9\nsmov w6, v1.b[5]\nmov v0.b[3], w3\nsmov w3, v1.b[11]\nmov v0.b[4], w2\nsdiv w20, w6, w8\nsmov w6, v1.b[6]\nmov v2.b[4], w21\nldp x22, x21, [sp, #48] // 16-byte Folded Reload\nmov v0.b[5], w1\nsmov w1, v1.b[12]\nmov v0.b[6], w0\nsdiv w19, w6, w9\nsmov w6, v1.b[7]\nmov v2.b[5], w20\nmov v0.b[7], w18\nsmov w18, v1.b[13]\nmov v0.b[8], w17\nsdiv w7, w6, w8\nsmov w6, v1.b[8]\nmov v2.b[6], w19\nldp x20, x19, [sp, #64] // 16-byte Folded Reload\nmov v0.b[9], w16\nsmov w16, v1.b[14]\nmov v0.b[10], w15\nsdiv w6, w6, w9\nmov v2.b[7], w7\nmov v0.b[11], w14\nsmov w14, v1.b[15]\nmov v0.b[12], w13\nsdiv w25, w27, w8\nmov v2.b[8], w6\nmov v0.b[13], w12\nmov v0.b[14], w11\nsdiv w4, w5, w9\nmov v2.b[9], w25\nldp x26, x25, [sp, #16] // 16-byte Folded Reload\nsdiv w2, w3, w8\nmov v2.b[10], w4\nsdiv w0, w1, w9\nmov v2.b[11], w2\nsdiv w17, w18, w8\nmov v2.b[12], w0\nsdiv w9, w16, w9\nmov v2.b[13], w17\nsdiv w10, w10, w8\nmov v2.b[14], w9\nsdiv w8, w14, w8\nmov v0.b[15], w10\nmov v2.b[15], w8\nmov v1.16b, v2.16b\nldr x27, [sp], #80 // 8-byte Folded Reload\nret",
68636863
"ll": "define <32 x i8> @test(<32 x i8> %a) {\n %r = sdiv <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\n ret <32 x i8> %r\n}",
68646864
"costoutput": "Printing analysis 'Cost Model Analysis' for function 'test':\nCost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %r = sdiv <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\nCost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %r"
@@ -8215,10 +8215,10 @@
82158215
"thru": 102,
82168216
"lat": 4,
82178217
"sizelat": 4,
8218-
"size": 16,
8218+
"size": 17,
82198219
"gisize": 155,
82208220
"extrasize": 0,
8221-
"asm": "adrp x8, .LCPI0_0\nmovi v4.8h, #1\nmovi v5.8h, #254\nldr q2, [x8, :lo12:.LCPI0_0]\nadrp x8, .LCPI0_1\nsmulh z3.b, z0.b, z2.b\nsmulh z2.b, z1.b, z2.b\nmla v3.16b, v0.16b, v4.16b\nmla v2.16b, v1.16b, v4.16b\nldr q4, [x8, :lo12:.LCPI0_1]\nsshl v3.16b, v3.16b, v5.16b\nsshl v2.16b, v2.16b, v5.16b\nusra v3.16b, v3.16b, #7\nusra v2.16b, v2.16b, #7\nmls v0.16b, v3.16b, v4.16b\nmls v1.16b, v2.16b, v4.16b\nret",
8221+
"asm": "adrp x8, .LCPI0_0\nmov v4.16b, v0.16b\nmov v5.16b, v1.16b\nldr q2, [x8, :lo12:.LCPI0_0]\nmovi v6.8h, #254\nadrp x8, .LCPI0_1\nsmulh z3.b, z0.b, z2.b\nsmulh z2.b, z1.b, z2.b\nbic v4.8h, #255, lsl #8\nbic v5.8h, #255, lsl #8\nadd v3.16b, v3.16b, v4.16b\nadd v2.16b, v2.16b, v5.16b\nldr q4, [x8, :lo12:.LCPI0_1]\nsshl v3.16b, v3.16b, v6.16b\nsshl v2.16b, v2.16b, v6.16b\nusra v3.16b, v3.16b, #7\nusra v2.16b, v2.16b, #7\nmls v0.16b, v3.16b, v4.16b\nmls v1.16b, v2.16b, v4.16b\nret",
82228222
"giasm": "sub sp, sp, #112\nstp x29, x30, [sp, #16] // 16-byte Folded Spill\nstp x28, x27, [sp, #32] // 16-byte Folded Spill\nstp x26, x25, [sp, #48] // 16-byte Folded Spill\nstp x24, x23, [sp, #64] // 16-byte Folded Spill\nstp x22, x21, [sp, #80] // 16-byte Folded Spill\nstp x20, x19, [sp, #96] // 16-byte Folded Spill\nsshll v2.8h, v0.8b, #0\nsshll v4.8h, v1.8b, #0\nmov w11, #7 // =0x7\nsshll v3.4s, v2.4h, #0\nsshll2 v2.4s, v2.8h, #0\nsshll v5.4s, v4.4h, #0\nsshll2 v4.4s, v4.8h, #0\nfmov w9, s3\nmov w10, v3.s[1]\nfmov w12, s2\nfmov w20, s4\nfmov w5, s5\nsdiv w13, w9, w11\nmov w9, #6 // =0x6\nsdiv w18, w12, w11\nmov w12, v2.s[1]\nfmov s6, w13\nsdiv w15, w10, w9\nmov w10, v3.s[2]\nfmov s7, w18\nsdiv w25, w20, w11\nmov w20, v4.s[1]\nmov v6.s[1], w15\nsdiv w1, w12, w9\nmov w12, v2.s[2]\nfmov s20, w25\nsdiv w21, w5, w11\nmov w5, v5.s[1]\nmov v7.s[1], w1\nsdiv w8, w10, w11\nmov w10, v3.s[3]\nsshll2 v3.8h, v0.16b, #0\nfmov s19, w21\nsshll v0.8h, v0.8b, #0\nsshll v24.4s, v0.4h, #0\nsshll2 v0.4s, v0.8h, #0\nsdiv w26, w20, w9\nmov w20, v4.s[2]\nstr w8, [sp, #8] // 4-byte Spill\nsdiv w17, w12, w11\nmov w12, v2.s[3]\nsshll v2.4s, v3.4h, #0\nmov v20.s[1], w26\nldp x26, x25, [sp, #48] // 16-byte Folded Reload\nfmov w14, s2\nsdiv w22, w5, w9\nmov w5, v5.s[2]\nmov v7.s[2], w17\nsdiv w23, w20, w11\nmov w20, v4.s[3]\nsshll2 v4.8h, v1.16b, #0\nmov v19.s[1], w22\nsshll v1.8h, v1.8b, #0\nldp x22, x21, [sp, #80] // 16-byte Folded Reload\nsshll v25.4s, v1.4h, #0\nsshll2 v1.4s, v1.8h, #0\nsdiv w19, w5, w11\nmov w5, v5.s[3]\nsshll v5.4s, v4.4h, #0\nmov v20.s[2], w23\nfmov w24, s5\nsdiv w2, w14, w11\nmov w14, v2.s[1]\nmov v19.s[2], w19\nsdiv w3, w14, w9\nmov w14, v2.s[2]\nfmov s17, w2\nsdiv w27, w24, w11\nmov w24, v5.s[1]\nmov v17.s[1], w3\nsdiv w16, w14, w11\nmov w14, v2.s[3]\nsshll2 v2.4s, v3.8h, #0\nfmov s21, w27\nsshll v3.4s, v3.4h, #0\nfmov w0, s2\nsdiv w28, w24, w9\nmov w24, v5.s[2]\nmov v17.s[2], w16\nsdiv w8, w10, w9\nmov v21.s[1], w28\nldp x28, x27, [sp, #32] // 16-byte Folded Reload\nsdiv w29, w24, w11\nmov w24, v5.s[3]\nsshll2 v5.4s, v4.8h, #0\nstr w8, [sp, #12] // 4-byte Spill\nsshll v4.4s, v4.4h, #0\nfmov w30, s5\nmov w10, v5.s[1]\nmov w8, v5.s[2]\nsdiv w6, w0, w11\nmov w0, v2.s[1]\nmov v21.s[2], w29\nsdiv w7, w0, w9\nmov w0, v2.s[2]\nfmov s18, w6\nsdiv w30, w30, w11\nmov v18.s[1], w7\nsdiv w10, w10, w9\nfmov s22, w30\nldp x29, x30, [sp, #16] // 16-byte Folded Reload\nsdiv w4, w0, w11\nmov w0, v2.s[3]\nmov v22.s[1], w10\nldr w10, [sp, #12] // 4-byte Reload\nsdiv w8, w8, w11\nadrp x11, .LCPI0_0\nmov v18.s[2], w4\nldr d16, [x11, :lo12:.LCPI0_0]\nldr w11, [sp, #8] // 4-byte Reload\nmov v6.s[2], w11\nmov w11, v5.s[3]\nsshll v16.8h, v16.8b, #0\nsshll v23.4s, v16.4h, #0\nsshll2 v16.4s, v16.8h, #0\nmov v6.s[3], w10\nsdiv w12, w12, w9\nmov v22.s[2], w8\nmls v24.4s, v6.4s, v23.4s\nsdiv w14, w14, w9\nmov v7.s[3], w12\nmls v0.4s, v7.4s, v16.4s\nsdiv w0, w0, w9\nmov v17.s[3], w14\nuzp1 v0.8h, v24.8h, v0.8h\nmls v3.4s, v17.4s, v23.4s\nsdiv w5, w5, w9\nmov v18.s[3], w0\nmls v2.4s, v18.4s, v16.4s\nsdiv w20, w20, w9\nmov v19.s[3], w5\nuzp1 v2.8h, v3.8h, v2.8h\nmls v25.4s, v19.4s, v23.4s\nuzp1 v0.16b, v0.16b, v2.16b\nsdiv w24, w24, w9\nmov v20.s[3], w20\nldp x20, x19, [sp, #96] // 16-byte Folded Reload\nmls v1.4s, v20.4s, v16.4s\nsdiv w9, w11, w9\nmov v21.s[3], w24\nuzp1 v1.8h, v25.8h, v1.8h\nldp x24, x23, [sp, #64] // 16-byte Folded Reload\nmls v4.4s, v21.4s, v23.4s\nmov v22.s[3], w9\nmls v5.4s, v22.4s, v16.4s\nuzp1 v3.8h, v4.8h, v5.8h\nuzp1 v1.16b, v1.16b, v3.16b\nadd sp, sp, #112\nret",
82238223
"ll": "define <32 x i8> @test(<32 x i8> %a) {\n %r = srem <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\n ret <32 x i8> %r\n}",
82248224
"costoutput": "Printing analysis 'Cost Model Analysis' for function 'test':\nCost Model: Found costs of RThru:102 CodeSize:4 Lat:4 SizeLat:4 for: %r = srem <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\nCost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %r"

data-int.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4215,10 +4215,10 @@
42154215
"thru": 38,
42164216
"lat": 4,
42174217
"sizelat": 4,
4218-
"size": 16,
4218+
"size": 17,
42194219
"gisize": 108,
42204220
"extrasize": 0,
4221-
"asm": "adrp x8, .LCPI0_0\nldr q2, [x8, :lo12:.LCPI0_0]\nsmull2 v3.8h, v0.16b, v2.16b\nsmull v4.8h, v0.8b, v2.8b\nsmull2 v5.8h, v1.16b, v2.16b\nsmull v2.8h, v1.8b, v2.8b\nuzp2 v3.16b, v4.16b, v3.16b\nmovi v4.8h, #1\nuzp2 v2.16b, v2.16b, v5.16b\nmovi v5.8h, #254\nmla v3.16b, v0.16b, v4.16b\nmla v2.16b, v1.16b, v4.16b\nsshl v0.16b, v3.16b, v5.16b\nsshl v1.16b, v2.16b, v5.16b\nusra v0.16b, v0.16b, #7\nusra v1.16b, v1.16b, #7\nret",
4221+
"asm": "adrp x8, .LCPI0_0\nldr q2, [x8, :lo12:.LCPI0_0]\nsmull2 v3.8h, v0.16b, v2.16b\nsmull v4.8h, v0.8b, v2.8b\nsmull2 v5.8h, v1.16b, v2.16b\nsmull v2.8h, v1.8b, v2.8b\nbic v0.8h, #255, lsl #8\nbic v1.8h, #255, lsl #8\nuzp2 v3.16b, v4.16b, v3.16b\nmovi v4.8h, #254\nuzp2 v2.16b, v2.16b, v5.16b\nadd v0.16b, v3.16b, v0.16b\nadd v1.16b, v2.16b, v1.16b\nsshl v0.16b, v0.16b, v4.16b\nsshl v1.16b, v1.16b, v4.16b\nusra v0.16b, v0.16b, #7\nusra v1.16b, v1.16b, #7\nret",
42224222
"giasm": "str x27, [sp, #-80]! // 8-byte Folded Spill\nstp x26, x25, [sp, #16] // 16-byte Folded Spill\nstp x24, x23, [sp, #32] // 16-byte Folded Spill\nstp x22, x21, [sp, #48] // 16-byte Folded Spill\nstp x20, x19, [sp, #64] // 16-byte Folded Spill\nsmov w8, v0.b[0]\nsmov w10, v0.b[1]\nmov w9, #7 // =0x7\nsmov w6, v1.b[0]\nsmov w27, v1.b[9]\nsdiv w25, w8, w9\nmov w8, #6 // =0x6\nsdiv w5, w10, w8\nsmov w10, v0.b[2]\nsdiv w4, w10, w9\nsmov w10, v0.b[3]\nsdiv w3, w10, w8\nsmov w10, v0.b[4]\nsdiv w2, w10, w9\nsmov w10, v0.b[5]\nsdiv w1, w10, w8\nsmov w10, v0.b[6]\nsdiv w0, w10, w9\nsmov w10, v0.b[7]\nsdiv w18, w10, w8\nsmov w10, v0.b[8]\nsdiv w17, w10, w9\nsmov w10, v0.b[9]\nsdiv w16, w10, w8\nsmov w10, v0.b[10]\nsdiv w15, w10, w9\nsmov w10, v0.b[11]\nsdiv w26, w6, w9\nsmov w6, v1.b[1]\nsdiv w14, w10, w8\nsmov w10, v0.b[12]\nfmov s2, w26\nsdiv w24, w6, w8\nsmov w6, v1.b[2]\nsdiv w13, w10, w9\nsmov w10, v0.b[13]\nmov v2.b[1], w24\nsdiv w23, w6, w9\nsmov w6, v1.b[3]\nsdiv w12, w10, w8\nsmov w10, v0.b[14]\nmov v2.b[2], w23\nldp x24, x23, [sp, #32] // 16-byte Folded Reload\nsdiv w22, w6, w8\nsmov w6, v1.b[4]\nsdiv w11, w10, w9\nsmov w10, v0.b[15]\nfmov s0, w25\nmov v2.b[3], w22\nmov v0.b[1], w5\nsmov w5, v1.b[10]\nmov v0.b[2], w4\nsdiv w21, w6, w9\nsmov w6, v1.b[5]\nmov v0.b[3], w3\nsmov w3, v1.b[11]\nmov v0.b[4], w2\nsdiv w20, w6, w8\nsmov w6, v1.b[6]\nmov v2.b[4], w21\nldp x22, x21, [sp, #48] // 16-byte Folded Reload\nmov v0.b[5], w1\nsmov w1, v1.b[12]\nmov v0.b[6], w0\nsdiv w19, w6, w9\nsmov w6, v1.b[7]\nmov v2.b[5], w20\nmov v0.b[7], w18\nsmov w18, v1.b[13]\nmov v0.b[8], w17\nsdiv w7, w6, w8\nsmov w6, v1.b[8]\nmov v2.b[6], w19\nldp x20, x19, [sp, #64] // 16-byte Folded Reload\nmov v0.b[9], w16\nsmov w16, v1.b[14]\nmov v0.b[10], w15\nsdiv w6, w6, w9\nmov v2.b[7], w7\nmov v0.b[11], w14\nsmov w14, v1.b[15]\nmov v0.b[12], w13\nsdiv w25, w27, w8\nmov v2.b[8], w6\nmov v0.b[13], w12\nmov v0.b[14], w11\nsdiv w4, w5, w9\nmov v2.b[9], w25\nldp x26, x25, [sp, #16] // 16-byte Folded Reload\nsdiv w2, w3, w8\nmov v2.b[10], w4\nsdiv w0, w1, w9\nmov v2.b[11], w2\nsdiv w17, w18, w8\nmov v2.b[12], w0\nsdiv w9, w16, w9\nmov v2.b[13], w17\nsdiv w10, w10, w8\nmov v2.b[14], w9\nsdiv w8, w14, w8\nmov v0.b[15], w10\nmov v2.b[15], w8\nmov v1.16b, v2.16b\nldr x27, [sp], #80 // 8-byte Folded Reload\nret",
42234223
"ll": "define <32 x i8> @test(<32 x i8> %a) {\n %r = sdiv <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\n ret <32 x i8> %r\n}",
42244224
"costoutput": "Printing analysis 'Cost Model Analysis' for function 'test':\nCost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %r = sdiv <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\nCost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %r"
@@ -5047,10 +5047,10 @@
50475047
"thru": 102,
50485048
"lat": 4,
50495049
"sizelat": 4,
5050-
"size": 20,
5050+
"size": 21,
50515051
"gisize": 155,
50525052
"extrasize": 0,
5053-
"asm": "adrp x8, .LCPI0_0\nldr q2, [x8, :lo12:.LCPI0_0]\nadrp x8, .LCPI0_1\nsmull2 v3.8h, v0.16b, v2.16b\nsmull v4.8h, v0.8b, v2.8b\nsmull2 v5.8h, v1.16b, v2.16b\nsmull v2.8h, v1.8b, v2.8b\nuzp2 v3.16b, v4.16b, v3.16b\nmovi v4.8h, #1\nuzp2 v2.16b, v2.16b, v5.16b\nmovi v5.8h, #254\nmla v3.16b, v0.16b, v4.16b\nmla v2.16b, v1.16b, v4.16b\nldr q4, [x8, :lo12:.LCPI0_1]\nsshl v3.16b, v3.16b, v5.16b\nsshl v2.16b, v2.16b, v5.16b\nusra v3.16b, v3.16b, #7\nusra v2.16b, v2.16b, #7\nmls v0.16b, v3.16b, v4.16b\nmls v1.16b, v2.16b, v4.16b\nret",
5053+
"asm": "adrp x8, .LCPI0_0\nmovi v6.8h, #254\nldr q2, [x8, :lo12:.LCPI0_0]\nadrp x8, .LCPI0_1\nsmull2 v3.8h, v0.16b, v2.16b\nsmull v4.8h, v0.8b, v2.8b\nsmull2 v5.8h, v1.16b, v2.16b\nsmull v2.8h, v1.8b, v2.8b\nuzp2 v3.16b, v4.16b, v3.16b\nmov v4.16b, v0.16b\nuzp2 v2.16b, v2.16b, v5.16b\nmov v5.16b, v1.16b\nbic v4.8h, #255, lsl #8\nbic v5.8h, #255, lsl #8\nadd v3.16b, v3.16b, v4.16b\nldr q4, [x8, :lo12:.LCPI0_1]\nadd v2.16b, v2.16b, v5.16b\nsshl v3.16b, v3.16b, v6.16b\nsshl v2.16b, v2.16b, v6.16b\nusra v3.16b, v3.16b, #7\nusra v2.16b, v2.16b, #7\nmls v0.16b, v3.16b, v4.16b\nmls v1.16b, v2.16b, v4.16b\nret",
50545054
"giasm": "sub sp, sp, #112\nstp x29, x30, [sp, #16] // 16-byte Folded Spill\nstp x28, x27, [sp, #32] // 16-byte Folded Spill\nstp x26, x25, [sp, #48] // 16-byte Folded Spill\nstp x24, x23, [sp, #64] // 16-byte Folded Spill\nstp x22, x21, [sp, #80] // 16-byte Folded Spill\nstp x20, x19, [sp, #96] // 16-byte Folded Spill\nsshll v2.8h, v0.8b, #0\nsshll v4.8h, v1.8b, #0\nmov w11, #7 // =0x7\nsshll v3.4s, v2.4h, #0\nsshll2 v2.4s, v2.8h, #0\nsshll v5.4s, v4.4h, #0\nsshll2 v4.4s, v4.8h, #0\nfmov w9, s3\nmov w10, v3.s[1]\nfmov w12, s2\nfmov w20, s4\nfmov w5, s5\nsdiv w13, w9, w11\nmov w9, #6 // =0x6\nsdiv w18, w12, w11\nmov w12, v2.s[1]\nfmov s6, w13\nsdiv w15, w10, w9\nmov w10, v3.s[2]\nfmov s7, w18\nsdiv w25, w20, w11\nmov w20, v4.s[1]\nmov v6.s[1], w15\nsdiv w1, w12, w9\nmov w12, v2.s[2]\nfmov s20, w25\nsdiv w21, w5, w11\nmov w5, v5.s[1]\nmov v7.s[1], w1\nsdiv w8, w10, w11\nmov w10, v3.s[3]\nsshll2 v3.8h, v0.16b, #0\nfmov s19, w21\nsshll v0.8h, v0.8b, #0\nsshll v24.4s, v0.4h, #0\nsshll2 v0.4s, v0.8h, #0\nsdiv w26, w20, w9\nmov w20, v4.s[2]\nstr w8, [sp, #8] // 4-byte Spill\nsdiv w17, w12, w11\nmov w12, v2.s[3]\nsshll v2.4s, v3.4h, #0\nmov v20.s[1], w26\nldp x26, x25, [sp, #48] // 16-byte Folded Reload\nfmov w14, s2\nsdiv w22, w5, w9\nmov w5, v5.s[2]\nmov v7.s[2], w17\nsdiv w23, w20, w11\nmov w20, v4.s[3]\nsshll2 v4.8h, v1.16b, #0\nmov v19.s[1], w22\nsshll v1.8h, v1.8b, #0\nldp x22, x21, [sp, #80] // 16-byte Folded Reload\nsshll v25.4s, v1.4h, #0\nsshll2 v1.4s, v1.8h, #0\nsdiv w19, w5, w11\nmov w5, v5.s[3]\nsshll v5.4s, v4.4h, #0\nmov v20.s[2], w23\nfmov w24, s5\nsdiv w2, w14, w11\nmov w14, v2.s[1]\nmov v19.s[2], w19\nsdiv w3, w14, w9\nmov w14, v2.s[2]\nfmov s17, w2\nsdiv w27, w24, w11\nmov w24, v5.s[1]\nmov v17.s[1], w3\nsdiv w16, w14, w11\nmov w14, v2.s[3]\nsshll2 v2.4s, v3.8h, #0\nfmov s21, w27\nsshll v3.4s, v3.4h, #0\nfmov w0, s2\nsdiv w28, w24, w9\nmov w24, v5.s[2]\nmov v17.s[2], w16\nsdiv w8, w10, w9\nmov v21.s[1], w28\nldp x28, x27, [sp, #32] // 16-byte Folded Reload\nsdiv w29, w24, w11\nmov w24, v5.s[3]\nsshll2 v5.4s, v4.8h, #0\nstr w8, [sp, #12] // 4-byte Spill\nsshll v4.4s, v4.4h, #0\nfmov w30, s5\nmov w10, v5.s[1]\nmov w8, v5.s[2]\nsdiv w6, w0, w11\nmov w0, v2.s[1]\nmov v21.s[2], w29\nsdiv w7, w0, w9\nmov w0, v2.s[2]\nfmov s18, w6\nsdiv w30, w30, w11\nmov v18.s[1], w7\nsdiv w10, w10, w9\nfmov s22, w30\nldp x29, x30, [sp, #16] // 16-byte Folded Reload\nsdiv w4, w0, w11\nmov w0, v2.s[3]\nmov v22.s[1], w10\nldr w10, [sp, #12] // 4-byte Reload\nsdiv w8, w8, w11\nadrp x11, .LCPI0_0\nmov v18.s[2], w4\nldr d16, [x11, :lo12:.LCPI0_0]\nldr w11, [sp, #8] // 4-byte Reload\nmov v6.s[2], w11\nmov w11, v5.s[3]\nsshll v16.8h, v16.8b, #0\nsshll v23.4s, v16.4h, #0\nsshll2 v16.4s, v16.8h, #0\nmov v6.s[3], w10\nsdiv w12, w12, w9\nmov v22.s[2], w8\nmls v24.4s, v6.4s, v23.4s\nsdiv w14, w14, w9\nmov v7.s[3], w12\nmls v0.4s, v7.4s, v16.4s\nsdiv w0, w0, w9\nmov v17.s[3], w14\nuzp1 v0.8h, v24.8h, v0.8h\nmls v3.4s, v17.4s, v23.4s\nsdiv w5, w5, w9\nmov v18.s[3], w0\nmls v2.4s, v18.4s, v16.4s\nsdiv w20, w20, w9\nmov v19.s[3], w5\nuzp1 v2.8h, v3.8h, v2.8h\nmls v25.4s, v19.4s, v23.4s\nuzp1 v0.16b, v0.16b, v2.16b\nsdiv w24, w24, w9\nmov v20.s[3], w20\nldp x20, x19, [sp, #96] // 16-byte Folded Reload\nmls v1.4s, v20.4s, v16.4s\nsdiv w9, w11, w9\nmov v21.s[3], w24\nuzp1 v1.8h, v25.8h, v1.8h\nldp x24, x23, [sp, #64] // 16-byte Folded Reload\nmls v4.4s, v21.4s, v23.4s\nmov v22.s[3], w9\nmls v5.4s, v22.4s, v16.4s\nuzp1 v3.8h, v4.8h, v5.8h\nuzp1 v1.16b, v1.16b, v3.16b\nadd sp, sp, #112\nret",
50555055
"ll": "define <32 x i8> @test(<32 x i8> %a) {\n %r = srem <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\n ret <32 x i8> %r\n}",
50565056
"costoutput": "Printing analysis 'Cost Model Analysis' for function 'test':\nCost Model: Found costs of RThru:102 CodeSize:4 Lat:4 SizeLat:4 for: %r = srem <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\nCost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %r"

0 commit comments

Comments
 (0)