|
4215 | 4215 | "thru": 38, |
4216 | 4216 | "lat": 4, |
4217 | 4217 | "sizelat": 4, |
4218 | | - "size": 16, |
| 4218 | + "size": 17, |
4219 | 4219 | "gisize": 108, |
4220 | 4220 | "extrasize": 0, |
4221 | | - "asm": "adrp x8, .LCPI0_0\nldr q2, [x8, :lo12:.LCPI0_0]\nsmull2 v3.8h, v0.16b, v2.16b\nsmull v4.8h, v0.8b, v2.8b\nsmull2 v5.8h, v1.16b, v2.16b\nsmull v2.8h, v1.8b, v2.8b\nuzp2 v3.16b, v4.16b, v3.16b\nmovi v4.8h, #1\nuzp2 v2.16b, v2.16b, v5.16b\nmovi v5.8h, #254\nmla v3.16b, v0.16b, v4.16b\nmla v2.16b, v1.16b, v4.16b\nsshl v0.16b, v3.16b, v5.16b\nsshl v1.16b, v2.16b, v5.16b\nusra v0.16b, v0.16b, #7\nusra v1.16b, v1.16b, #7\nret", |
| 4221 | + "asm": "adrp x8, .LCPI0_0\nldr q2, [x8, :lo12:.LCPI0_0]\nsmull2 v3.8h, v0.16b, v2.16b\nsmull v4.8h, v0.8b, v2.8b\nsmull2 v5.8h, v1.16b, v2.16b\nsmull v2.8h, v1.8b, v2.8b\nbic v0.8h, #255, lsl #8\nbic v1.8h, #255, lsl #8\nuzp2 v3.16b, v4.16b, v3.16b\nmovi v4.8h, #254\nuzp2 v2.16b, v2.16b, v5.16b\nadd v0.16b, v3.16b, v0.16b\nadd v1.16b, v2.16b, v1.16b\nsshl v0.16b, v0.16b, v4.16b\nsshl v1.16b, v1.16b, v4.16b\nusra v0.16b, v0.16b, #7\nusra v1.16b, v1.16b, #7\nret", |
4222 | 4222 | "giasm": "str x27, [sp, #-80]! // 8-byte Folded Spill\nstp x26, x25, [sp, #16] // 16-byte Folded Spill\nstp x24, x23, [sp, #32] // 16-byte Folded Spill\nstp x22, x21, [sp, #48] // 16-byte Folded Spill\nstp x20, x19, [sp, #64] // 16-byte Folded Spill\nsmov w8, v0.b[0]\nsmov w10, v0.b[1]\nmov w9, #7 // =0x7\nsmov w6, v1.b[0]\nsmov w27, v1.b[9]\nsdiv w25, w8, w9\nmov w8, #6 // =0x6\nsdiv w5, w10, w8\nsmov w10, v0.b[2]\nsdiv w4, w10, w9\nsmov w10, v0.b[3]\nsdiv w3, w10, w8\nsmov w10, v0.b[4]\nsdiv w2, w10, w9\nsmov w10, v0.b[5]\nsdiv w1, w10, w8\nsmov w10, v0.b[6]\nsdiv w0, w10, w9\nsmov w10, v0.b[7]\nsdiv w18, w10, w8\nsmov w10, v0.b[8]\nsdiv w17, w10, w9\nsmov w10, v0.b[9]\nsdiv w16, w10, w8\nsmov w10, v0.b[10]\nsdiv w15, w10, w9\nsmov w10, v0.b[11]\nsdiv w26, w6, w9\nsmov w6, v1.b[1]\nsdiv w14, w10, w8\nsmov w10, v0.b[12]\nfmov s2, w26\nsdiv w24, w6, w8\nsmov w6, v1.b[2]\nsdiv w13, w10, w9\nsmov w10, v0.b[13]\nmov v2.b[1], w24\nsdiv w23, w6, w9\nsmov w6, v1.b[3]\nsdiv w12, w10, w8\nsmov w10, v0.b[14]\nmov v2.b[2], w23\nldp x24, x23, [sp, #32] // 16-byte Folded Reload\nsdiv w22, w6, w8\nsmov w6, v1.b[4]\nsdiv w11, w10, w9\nsmov w10, v0.b[15]\nfmov s0, w25\nmov v2.b[3], w22\nmov v0.b[1], w5\nsmov w5, v1.b[10]\nmov v0.b[2], w4\nsdiv w21, w6, w9\nsmov w6, v1.b[5]\nmov v0.b[3], w3\nsmov w3, v1.b[11]\nmov v0.b[4], w2\nsdiv w20, w6, w8\nsmov w6, v1.b[6]\nmov v2.b[4], w21\nldp x22, x21, [sp, #48] // 16-byte Folded Reload\nmov v0.b[5], w1\nsmov w1, v1.b[12]\nmov v0.b[6], w0\nsdiv w19, w6, w9\nsmov w6, v1.b[7]\nmov v2.b[5], w20\nmov v0.b[7], w18\nsmov w18, v1.b[13]\nmov v0.b[8], w17\nsdiv w7, w6, w8\nsmov w6, v1.b[8]\nmov v2.b[6], w19\nldp x20, x19, [sp, #64] // 16-byte Folded Reload\nmov v0.b[9], w16\nsmov w16, v1.b[14]\nmov v0.b[10], w15\nsdiv w6, w6, w9\nmov v2.b[7], w7\nmov v0.b[11], w14\nsmov w14, v1.b[15]\nmov v0.b[12], w13\nsdiv w25, w27, w8\nmov v2.b[8], w6\nmov v0.b[13], w12\nmov v0.b[14], w11\nsdiv w4, w5, w9\nmov v2.b[9], w25\nldp x26, x25, [sp, #16] // 16-byte Folded Reload\nsdiv w2, w3, w8\nmov v2.b[10], w4\nsdiv w0, w1, w9\nmov v2.b[11], w2\nsdiv w17, w18, w8\nmov v2.b[12], w0\nsdiv w9, w16, w9\nmov v2.b[13], w17\nsdiv w10, w10, w8\nmov v2.b[14], w9\nsdiv w8, w14, w8\nmov v0.b[15], w10\nmov v2.b[15], w8\nmov v1.16b, v2.16b\nldr x27, [sp], #80 // 8-byte Folded Reload\nret", |
4223 | 4223 | "ll": "define <32 x i8> @test(<32 x i8> %a) {\n %r = sdiv <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\n ret <32 x i8> %r\n}", |
4224 | 4224 | "costoutput": "Printing analysis 'Cost Model Analysis' for function 'test':\nCost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %r = sdiv <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\nCost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %r" |
|
5047 | 5047 | "thru": 102, |
5048 | 5048 | "lat": 4, |
5049 | 5049 | "sizelat": 4, |
5050 | | - "size": 20, |
| 5050 | + "size": 21, |
5051 | 5051 | "gisize": 155, |
5052 | 5052 | "extrasize": 0, |
5053 | | - "asm": "adrp x8, .LCPI0_0\nldr q2, [x8, :lo12:.LCPI0_0]\nadrp x8, .LCPI0_1\nsmull2 v3.8h, v0.16b, v2.16b\nsmull v4.8h, v0.8b, v2.8b\nsmull2 v5.8h, v1.16b, v2.16b\nsmull v2.8h, v1.8b, v2.8b\nuzp2 v3.16b, v4.16b, v3.16b\nmovi v4.8h, #1\nuzp2 v2.16b, v2.16b, v5.16b\nmovi v5.8h, #254\nmla v3.16b, v0.16b, v4.16b\nmla v2.16b, v1.16b, v4.16b\nldr q4, [x8, :lo12:.LCPI0_1]\nsshl v3.16b, v3.16b, v5.16b\nsshl v2.16b, v2.16b, v5.16b\nusra v3.16b, v3.16b, #7\nusra v2.16b, v2.16b, #7\nmls v0.16b, v3.16b, v4.16b\nmls v1.16b, v2.16b, v4.16b\nret", |
| 5053 | + "asm": "adrp x8, .LCPI0_0\nmovi v6.8h, #254\nldr q2, [x8, :lo12:.LCPI0_0]\nadrp x8, .LCPI0_1\nsmull2 v3.8h, v0.16b, v2.16b\nsmull v4.8h, v0.8b, v2.8b\nsmull2 v5.8h, v1.16b, v2.16b\nsmull v2.8h, v1.8b, v2.8b\nuzp2 v3.16b, v4.16b, v3.16b\nmov v4.16b, v0.16b\nuzp2 v2.16b, v2.16b, v5.16b\nmov v5.16b, v1.16b\nbic v4.8h, #255, lsl #8\nbic v5.8h, #255, lsl #8\nadd v3.16b, v3.16b, v4.16b\nldr q4, [x8, :lo12:.LCPI0_1]\nadd v2.16b, v2.16b, v5.16b\nsshl v3.16b, v3.16b, v6.16b\nsshl v2.16b, v2.16b, v6.16b\nusra v3.16b, v3.16b, #7\nusra v2.16b, v2.16b, #7\nmls v0.16b, v3.16b, v4.16b\nmls v1.16b, v2.16b, v4.16b\nret", |
5054 | 5054 | "giasm": "sub sp, sp, #112\nstp x29, x30, [sp, #16] // 16-byte Folded Spill\nstp x28, x27, [sp, #32] // 16-byte Folded Spill\nstp x26, x25, [sp, #48] // 16-byte Folded Spill\nstp x24, x23, [sp, #64] // 16-byte Folded Spill\nstp x22, x21, [sp, #80] // 16-byte Folded Spill\nstp x20, x19, [sp, #96] // 16-byte Folded Spill\nsshll v2.8h, v0.8b, #0\nsshll v4.8h, v1.8b, #0\nmov w11, #7 // =0x7\nsshll v3.4s, v2.4h, #0\nsshll2 v2.4s, v2.8h, #0\nsshll v5.4s, v4.4h, #0\nsshll2 v4.4s, v4.8h, #0\nfmov w9, s3\nmov w10, v3.s[1]\nfmov w12, s2\nfmov w20, s4\nfmov w5, s5\nsdiv w13, w9, w11\nmov w9, #6 // =0x6\nsdiv w18, w12, w11\nmov w12, v2.s[1]\nfmov s6, w13\nsdiv w15, w10, w9\nmov w10, v3.s[2]\nfmov s7, w18\nsdiv w25, w20, w11\nmov w20, v4.s[1]\nmov v6.s[1], w15\nsdiv w1, w12, w9\nmov w12, v2.s[2]\nfmov s20, w25\nsdiv w21, w5, w11\nmov w5, v5.s[1]\nmov v7.s[1], w1\nsdiv w8, w10, w11\nmov w10, v3.s[3]\nsshll2 v3.8h, v0.16b, #0\nfmov s19, w21\nsshll v0.8h, v0.8b, #0\nsshll v24.4s, v0.4h, #0\nsshll2 v0.4s, v0.8h, #0\nsdiv w26, w20, w9\nmov w20, v4.s[2]\nstr w8, [sp, #8] // 4-byte Spill\nsdiv w17, w12, w11\nmov w12, v2.s[3]\nsshll v2.4s, v3.4h, #0\nmov v20.s[1], w26\nldp x26, x25, [sp, #48] // 16-byte Folded Reload\nfmov w14, s2\nsdiv w22, w5, w9\nmov w5, v5.s[2]\nmov v7.s[2], w17\nsdiv w23, w20, w11\nmov w20, v4.s[3]\nsshll2 v4.8h, v1.16b, #0\nmov v19.s[1], w22\nsshll v1.8h, v1.8b, #0\nldp x22, x21, [sp, #80] // 16-byte Folded Reload\nsshll v25.4s, v1.4h, #0\nsshll2 v1.4s, v1.8h, #0\nsdiv w19, w5, w11\nmov w5, v5.s[3]\nsshll v5.4s, v4.4h, #0\nmov v20.s[2], w23\nfmov w24, s5\nsdiv w2, w14, w11\nmov w14, v2.s[1]\nmov v19.s[2], w19\nsdiv w3, w14, w9\nmov w14, v2.s[2]\nfmov s17, w2\nsdiv w27, w24, w11\nmov w24, v5.s[1]\nmov v17.s[1], w3\nsdiv w16, w14, w11\nmov w14, v2.s[3]\nsshll2 v2.4s, v3.8h, #0\nfmov s21, w27\nsshll v3.4s, v3.4h, #0\nfmov w0, s2\nsdiv w28, w24, w9\nmov w24, v5.s[2]\nmov v17.s[2], w16\nsdiv w8, w10, w9\nmov v21.s[1], w28\nldp x28, x27, [sp, #32] // 16-byte Folded Reload\nsdiv w29, w24, w11\nmov w24, v5.s[3]\nsshll2 v5.4s, v4.8h, #0\nstr w8, [sp, #12] // 4-byte Spill\nsshll v4.4s, v4.4h, #0\nfmov w30, s5\nmov w10, v5.s[1]\nmov w8, v5.s[2]\nsdiv w6, w0, w11\nmov w0, v2.s[1]\nmov v21.s[2], w29\nsdiv w7, w0, w9\nmov w0, v2.s[2]\nfmov s18, w6\nsdiv w30, w30, w11\nmov v18.s[1], w7\nsdiv w10, w10, w9\nfmov s22, w30\nldp x29, x30, [sp, #16] // 16-byte Folded Reload\nsdiv w4, w0, w11\nmov w0, v2.s[3]\nmov v22.s[1], w10\nldr w10, [sp, #12] // 4-byte Reload\nsdiv w8, w8, w11\nadrp x11, .LCPI0_0\nmov v18.s[2], w4\nldr d16, [x11, :lo12:.LCPI0_0]\nldr w11, [sp, #8] // 4-byte Reload\nmov v6.s[2], w11\nmov w11, v5.s[3]\nsshll v16.8h, v16.8b, #0\nsshll v23.4s, v16.4h, #0\nsshll2 v16.4s, v16.8h, #0\nmov v6.s[3], w10\nsdiv w12, w12, w9\nmov v22.s[2], w8\nmls v24.4s, v6.4s, v23.4s\nsdiv w14, w14, w9\nmov v7.s[3], w12\nmls v0.4s, v7.4s, v16.4s\nsdiv w0, w0, w9\nmov v17.s[3], w14\nuzp1 v0.8h, v24.8h, v0.8h\nmls v3.4s, v17.4s, v23.4s\nsdiv w5, w5, w9\nmov v18.s[3], w0\nmls v2.4s, v18.4s, v16.4s\nsdiv w20, w20, w9\nmov v19.s[3], w5\nuzp1 v2.8h, v3.8h, v2.8h\nmls v25.4s, v19.4s, v23.4s\nuzp1 v0.16b, v0.16b, v2.16b\nsdiv w24, w24, w9\nmov v20.s[3], w20\nldp x20, x19, [sp, #96] // 16-byte Folded Reload\nmls v1.4s, v20.4s, v16.4s\nsdiv w9, w11, w9\nmov v21.s[3], w24\nuzp1 v1.8h, v25.8h, v1.8h\nldp x24, x23, [sp, #64] // 16-byte Folded Reload\nmls v4.4s, v21.4s, v23.4s\nmov v22.s[3], w9\nmls v5.4s, v22.4s, v16.4s\nuzp1 v3.8h, v4.8h, v5.8h\nuzp1 v1.16b, v1.16b, v3.16b\nadd sp, sp, #112\nret", |
5055 | 5055 | "ll": "define <32 x i8> @test(<32 x i8> %a) {\n %r = srem <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\n ret <32 x i8> %r\n}", |
5056 | 5056 | "costoutput": "Printing analysis 'Cost Model Analysis' for function 'test':\nCost Model: Found costs of RThru:102 CodeSize:4 Lat:4 SizeLat:4 for: %r = srem <32 x i8> %a, <i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6, i8 7, i8 6>\nCost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %r" |
|
0 commit comments