@@ -165,6 +165,39 @@ define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
165165 ret <vscale x 16 x i8 > %b
166166}
167167
168+ ; TODO: Implement DAG combiner.
169+ ; Test the INSERT_VECTOR_ELT(poison, ...) -> VECTOR_SPLAT combiner
170+ ; <vscale x 16 x i8> is used as a proxy for testing using IR, but the combiner
171+ ; is agnostic of the element type.
172+
173+ define <vscale x 16 x i8 > @test_lanex_16xi8_poison (i8 %e , i32 %x ) {
174+ ; CHECK-LABEL: test_lanex_16xi8_poison:
175+ ; CHECK: // %bb.0:
176+ ; CHECK-NEXT: index z0.b, #0, #1
177+ ; CHECK-NEXT: mov w8, w1
178+ ; CHECK-NEXT: ptrue p0.b
179+ ; CHECK-NEXT: mov z1.b, w8
180+ ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
181+ ; CHECK-NEXT: mov z0.b, p0/m, w0
182+ ; CHECK-NEXT: ret
183+ %b = insertelement <vscale x 16 x i8 > poison, i8 %e , i32 %x
184+ ret <vscale x 16 x i8 > %b
185+ }
186+
187+ define <vscale x 16 x i8 > @test_lanex_16xi8_poison_imm (i8 %e , i32 %x ) {
188+ ; CHECK-LABEL: test_lanex_16xi8_poison_imm:
189+ ; CHECK: // %bb.0:
190+ ; CHECK-NEXT: index z0.b, #0, #1
191+ ; CHECK-NEXT: mov w8, w1
192+ ; CHECK-NEXT: ptrue p0.b
193+ ; CHECK-NEXT: mov z1.b, w8
194+ ; CHECK-NEXT: mov w8, #5 // =0x5
195+ ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
196+ ; CHECK-NEXT: mov z0.b, p0/m, w8
197+ ; CHECK-NEXT: ret
198+ %b = insertelement <vscale x 16 x i8 > poison, i8 5 , i32 %x
199+ ret <vscale x 16 x i8 > %b
200+ }
168201
169202; Redundant lane insert
170203define <vscale x 4 x i32 > @extract_insert_4xi32 (<vscale x 4 x i32 > %a ) {
@@ -176,20 +209,7 @@ define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
176209 ret <vscale x 4 x i32 > %c
177210}
178211
179- define <vscale x 8 x i16 > @test_lane6_undef_8xi16 (i16 %a ) {
180- ; CHECK-LABEL: test_lane6_undef_8xi16:
181- ; CHECK: // %bb.0:
182- ; CHECK-NEXT: mov w8, #6 // =0x6
183- ; CHECK-NEXT: index z0.h, #0, #1
184- ; CHECK-NEXT: ptrue p0.h
185- ; CHECK-NEXT: mov z1.h, w8
186- ; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
187- ; CHECK-NEXT: mov z0.h, p0/m, w0
188- ; CHECK-NEXT: ret
189- %b = insertelement <vscale x 8 x i16 > poison, i16 %a , i32 6
190- ret <vscale x 8 x i16 > %b
191- }
192-
212+ ; Inserting lane 0 into poison uses fmov instead of broadcasting to all lanes
193213define <vscale x 16 x i8 > @test_lane0_undef_16xi8 (i8 %a ) {
194214; CHECK-LABEL: test_lane0_undef_16xi8:
195215; CHECK: // %bb.0:
@@ -326,120 +346,120 @@ define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
326346}
327347
328348; Insert scalar at index
329- define <vscale x 2 x half > @test_insert_with_index_nxv2f16 (half %h , i64 %idx ) {
349+ define <vscale x 2 x half > @test_insert_with_index_nxv2f16 (<vscale x 2 x half > %a , half %h , i64 %idx ) {
330350; CHECK-LABEL: test_insert_with_index_nxv2f16:
331351; CHECK: // %bb.0:
332- ; CHECK-NEXT: index z1 .d, #0, #1
333- ; CHECK-NEXT: mov z2 .d, x0
352+ ; CHECK-NEXT: index z2 .d, #0, #1
353+ ; CHECK-NEXT: mov z3 .d, x0
334354; CHECK-NEXT: ptrue p0.d
335- ; CHECK-NEXT: cmpeq p0.d, p0/z, z1 .d, z2 .d
336- ; CHECK-NEXT: mov z0.h, p0/m, h0
355+ ; CHECK-NEXT: cmpeq p0.d, p0/z, z2 .d, z3 .d
356+ ; CHECK-NEXT: mov z0.h, p0/m, h1
337357; CHECK-NEXT: ret
338- %res = insertelement <vscale x 2 x half > poison , half %h , i64 %idx
358+ %res = insertelement <vscale x 2 x half > %a , half %h , i64 %idx
339359 ret <vscale x 2 x half > %res
340360}
341361
342- define <vscale x 4 x half > @test_insert_with_index_nxv4f16 (half %h , i64 %idx ) {
362+ define <vscale x 4 x half > @test_insert_with_index_nxv4f16 (<vscale x 4 x half > %a , half %h , i64 %idx ) {
343363; CHECK-LABEL: test_insert_with_index_nxv4f16:
344364; CHECK: // %bb.0:
345- ; CHECK-NEXT: index z1 .s, #0, #1
346- ; CHECK-NEXT: mov z2 .s, w0
365+ ; CHECK-NEXT: index z2 .s, #0, #1
366+ ; CHECK-NEXT: mov z3 .s, w0
347367; CHECK-NEXT: ptrue p0.s
348- ; CHECK-NEXT: cmpeq p0.s, p0/z, z1 .s, z2 .s
349- ; CHECK-NEXT: mov z0.h, p0/m, h0
368+ ; CHECK-NEXT: cmpeq p0.s, p0/z, z2 .s, z3 .s
369+ ; CHECK-NEXT: mov z0.h, p0/m, h1
350370; CHECK-NEXT: ret
351- %res = insertelement <vscale x 4 x half > poison , half %h , i64 %idx
371+ %res = insertelement <vscale x 4 x half > %a , half %h , i64 %idx
352372 ret <vscale x 4 x half > %res
353373}
354374
355- define <vscale x 8 x half > @test_insert_with_index_nxv8f16 (half %h , i64 %idx ) {
375+ define <vscale x 8 x half > @test_insert_with_index_nxv8f16 (<vscale x 8 x half > %a , half %h , i64 %idx ) {
356376; CHECK-LABEL: test_insert_with_index_nxv8f16:
357377; CHECK: // %bb.0:
358- ; CHECK-NEXT: index z1 .h, #0, #1
359- ; CHECK-NEXT: mov z2 .h, w0
378+ ; CHECK-NEXT: index z2 .h, #0, #1
379+ ; CHECK-NEXT: mov z3 .h, w0
360380; CHECK-NEXT: ptrue p0.h
361- ; CHECK-NEXT: cmpeq p0.h, p0/z, z1 .h, z2 .h
362- ; CHECK-NEXT: mov z0.h, p0/m, h0
381+ ; CHECK-NEXT: cmpeq p0.h, p0/z, z2 .h, z3 .h
382+ ; CHECK-NEXT: mov z0.h, p0/m, h1
363383; CHECK-NEXT: ret
364- %res = insertelement <vscale x 8 x half > poison , half %h , i64 %idx
384+ %res = insertelement <vscale x 8 x half > %a , half %h , i64 %idx
365385 ret <vscale x 8 x half > %res
366386}
367387
368- define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16 (bfloat %h , i64 %idx ) {
388+ define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16 (<vscale x 2 x bfloat> %a , bfloat %h , i64 %idx ) {
369389; CHECK-LABEL: test_insert_with_index_nxv2bf16:
370390; CHECK: // %bb.0:
371- ; CHECK-NEXT: index z1 .d, #0, #1
372- ; CHECK-NEXT: mov z2 .d, x0
391+ ; CHECK-NEXT: index z2 .d, #0, #1
392+ ; CHECK-NEXT: mov z3 .d, x0
373393; CHECK-NEXT: ptrue p0.d
374- ; CHECK-NEXT: cmpeq p0.d, p0/z, z1 .d, z2 .d
375- ; CHECK-NEXT: mov z0.h, p0/m, h0
394+ ; CHECK-NEXT: cmpeq p0.d, p0/z, z2 .d, z3 .d
395+ ; CHECK-NEXT: mov z0.h, p0/m, h1
376396; CHECK-NEXT: ret
377- %res = insertelement <vscale x 2 x bfloat> poison , bfloat %h , i64 %idx
397+ %res = insertelement <vscale x 2 x bfloat> %a , bfloat %h , i64 %idx
378398 ret <vscale x 2 x bfloat> %res
379399}
380400
381- define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16 (bfloat %h , i64 %idx ) {
401+ define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16 (<vscale x 4 x bfloat> %a , bfloat %h , i64 %idx ) {
382402; CHECK-LABEL: test_insert_with_index_nxv4bf16:
383403; CHECK: // %bb.0:
384- ; CHECK-NEXT: index z1 .s, #0, #1
385- ; CHECK-NEXT: mov z2 .s, w0
404+ ; CHECK-NEXT: index z2 .s, #0, #1
405+ ; CHECK-NEXT: mov z3 .s, w0
386406; CHECK-NEXT: ptrue p0.s
387- ; CHECK-NEXT: cmpeq p0.s, p0/z, z1 .s, z2 .s
388- ; CHECK-NEXT: mov z0.h, p0/m, h0
407+ ; CHECK-NEXT: cmpeq p0.s, p0/z, z2 .s, z3 .s
408+ ; CHECK-NEXT: mov z0.h, p0/m, h1
389409; CHECK-NEXT: ret
390- %res = insertelement <vscale x 4 x bfloat> poison , bfloat %h , i64 %idx
410+ %res = insertelement <vscale x 4 x bfloat> %a , bfloat %h , i64 %idx
391411 ret <vscale x 4 x bfloat> %res
392412}
393413
394- define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16 (bfloat %h , i64 %idx ) {
414+ define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16 (<vscale x 8 x bfloat> %a , bfloat %h , i64 %idx ) {
395415; CHECK-LABEL: test_insert_with_index_nxv8bf16:
396416; CHECK: // %bb.0:
397- ; CHECK-NEXT: index z1 .h, #0, #1
398- ; CHECK-NEXT: mov z2 .h, w0
417+ ; CHECK-NEXT: index z2 .h, #0, #1
418+ ; CHECK-NEXT: mov z3 .h, w0
399419; CHECK-NEXT: ptrue p0.h
400- ; CHECK-NEXT: cmpeq p0.h, p0/z, z1 .h, z2 .h
401- ; CHECK-NEXT: mov z0.h, p0/m, h0
420+ ; CHECK-NEXT: cmpeq p0.h, p0/z, z2 .h, z3 .h
421+ ; CHECK-NEXT: mov z0.h, p0/m, h1
402422; CHECK-NEXT: ret
403- %res = insertelement <vscale x 8 x bfloat> poison , bfloat %h , i64 %idx
423+ %res = insertelement <vscale x 8 x bfloat> %a , bfloat %h , i64 %idx
404424 ret <vscale x 8 x bfloat> %res
405425}
406426
407- define <vscale x 2 x float > @test_insert_with_index_nxv2f32 (float %f , i64 %idx ) {
427+ define <vscale x 2 x float > @test_insert_with_index_nxv2f32 (<vscale x 2 x float > %a , float %f , i64 %idx ) {
408428; CHECK-LABEL: test_insert_with_index_nxv2f32:
409429; CHECK: // %bb.0:
410- ; CHECK-NEXT: index z1 .d, #0, #1
411- ; CHECK-NEXT: mov z2 .d, x0
430+ ; CHECK-NEXT: index z2 .d, #0, #1
431+ ; CHECK-NEXT: mov z3 .d, x0
412432; CHECK-NEXT: ptrue p0.d
413- ; CHECK-NEXT: cmpeq p0.d, p0/z, z1 .d, z2 .d
414- ; CHECK-NEXT: mov z0.s, p0/m, s0
433+ ; CHECK-NEXT: cmpeq p0.d, p0/z, z2 .d, z3 .d
434+ ; CHECK-NEXT: mov z0.s, p0/m, s1
415435; CHECK-NEXT: ret
416- %res = insertelement <vscale x 2 x float > poison , float %f , i64 %idx
436+ %res = insertelement <vscale x 2 x float > %a , float %f , i64 %idx
417437 ret <vscale x 2 x float > %res
418438}
419439
420- define <vscale x 4 x float > @test_insert_with_index_nxv4f32 (float %f , i64 %idx ) {
440+ define <vscale x 4 x float > @test_insert_with_index_nxv4f32 (<vscale x 4 x float > %a , float %f , i64 %idx ) {
421441; CHECK-LABEL: test_insert_with_index_nxv4f32:
422442; CHECK: // %bb.0:
423- ; CHECK-NEXT: index z1 .s, #0, #1
424- ; CHECK-NEXT: mov z2 .s, w0
443+ ; CHECK-NEXT: index z2 .s, #0, #1
444+ ; CHECK-NEXT: mov z3 .s, w0
425445; CHECK-NEXT: ptrue p0.s
426- ; CHECK-NEXT: cmpeq p0.s, p0/z, z1 .s, z2 .s
427- ; CHECK-NEXT: mov z0.s, p0/m, s0
446+ ; CHECK-NEXT: cmpeq p0.s, p0/z, z2 .s, z3 .s
447+ ; CHECK-NEXT: mov z0.s, p0/m, s1
428448; CHECK-NEXT: ret
429- %res = insertelement <vscale x 4 x float > poison , float %f , i64 %idx
449+ %res = insertelement <vscale x 4 x float > %a , float %f , i64 %idx
430450 ret <vscale x 4 x float > %res
431451}
432452
433- define <vscale x 2 x double > @test_insert_with_index_nxv2f64 (double %d , i64 %idx ) {
453+ define <vscale x 2 x double > @test_insert_with_index_nxv2f64 (<vscale x 2 x double > %a , double %d , i64 %idx ) {
434454; CHECK-LABEL: test_insert_with_index_nxv2f64:
435455; CHECK: // %bb.0:
436- ; CHECK-NEXT: index z1 .d, #0, #1
437- ; CHECK-NEXT: mov z2 .d, x0
456+ ; CHECK-NEXT: index z2 .d, #0, #1
457+ ; CHECK-NEXT: mov z3 .d, x0
438458; CHECK-NEXT: ptrue p0.d
439- ; CHECK-NEXT: cmpeq p0.d, p0/z, z1 .d, z2 .d
440- ; CHECK-NEXT: mov z0.d, p0/m, d0
459+ ; CHECK-NEXT: cmpeq p0.d, p0/z, z2 .d, z3 .d
460+ ; CHECK-NEXT: mov z0.d, p0/m, d1
441461; CHECK-NEXT: ret
442- %res = insertelement <vscale x 2 x double > poison , double %d , i64 %idx
462+ %res = insertelement <vscale x 2 x double > %a , double %d , i64 %idx
443463 ret <vscale x 2 x double > %res
444464}
445465
0 commit comments