Skip to content

Commit 39b24e8

Browse files
authored
[AArch64][ISel] Extend insertelement tests (llvm#173003)
This in preparation to adding a DAG combiner for turning INSERT_VECTOR_ELT(undef, ...) -> VECTOR_SPLAT
1 parent fd402bb commit 39b24e8

File tree

1 file changed

+88
-68
lines changed

1 file changed

+88
-68
lines changed

llvm/test/CodeGen/AArch64/sve-insert-element.ll

Lines changed: 88 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,39 @@ define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
165165
ret <vscale x 16 x i8> %b
166166
}
167167

168+
; TODO: Implement DAG combiner.
169+
; Test the INSERT_VECTOR_ELT(poison, ...) -> VECTOR_SPLAT combiner
170+
; <vscale x 16 x i8> is used as a proxy for testing using IR, but the combiner
171+
; is agnostic of the element type.
172+
173+
define <vscale x 16 x i8> @test_lanex_16xi8_poison(i8 %e, i32 %x) {
174+
; CHECK-LABEL: test_lanex_16xi8_poison:
175+
; CHECK: // %bb.0:
176+
; CHECK-NEXT: index z0.b, #0, #1
177+
; CHECK-NEXT: mov w8, w1
178+
; CHECK-NEXT: ptrue p0.b
179+
; CHECK-NEXT: mov z1.b, w8
180+
; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
181+
; CHECK-NEXT: mov z0.b, p0/m, w0
182+
; CHECK-NEXT: ret
183+
%b = insertelement <vscale x 16 x i8> poison, i8 %e, i32 %x
184+
ret <vscale x 16 x i8> %b
185+
}
186+
187+
define <vscale x 16 x i8> @test_lanex_16xi8_poison_imm(i8 %e, i32 %x) {
188+
; CHECK-LABEL: test_lanex_16xi8_poison_imm:
189+
; CHECK: // %bb.0:
190+
; CHECK-NEXT: index z0.b, #0, #1
191+
; CHECK-NEXT: mov w8, w1
192+
; CHECK-NEXT: ptrue p0.b
193+
; CHECK-NEXT: mov z1.b, w8
194+
; CHECK-NEXT: mov w8, #5 // =0x5
195+
; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
196+
; CHECK-NEXT: mov z0.b, p0/m, w8
197+
; CHECK-NEXT: ret
198+
%b = insertelement <vscale x 16 x i8> poison, i8 5, i32 %x
199+
ret <vscale x 16 x i8> %b
200+
}
168201

169202
; Redundant lane insert
170203
define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
@@ -176,20 +209,7 @@ define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
176209
ret <vscale x 4 x i32> %c
177210
}
178211

179-
define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
180-
; CHECK-LABEL: test_lane6_undef_8xi16:
181-
; CHECK: // %bb.0:
182-
; CHECK-NEXT: mov w8, #6 // =0x6
183-
; CHECK-NEXT: index z0.h, #0, #1
184-
; CHECK-NEXT: ptrue p0.h
185-
; CHECK-NEXT: mov z1.h, w8
186-
; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
187-
; CHECK-NEXT: mov z0.h, p0/m, w0
188-
; CHECK-NEXT: ret
189-
%b = insertelement <vscale x 8 x i16> poison, i16 %a, i32 6
190-
ret <vscale x 8 x i16> %b
191-
}
192-
212+
; Inserting lane 0 into poison uses fmov instead of broadcasting to all lanes
193213
define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
194214
; CHECK-LABEL: test_lane0_undef_16xi8:
195215
; CHECK: // %bb.0:
@@ -326,120 +346,120 @@ define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
326346
}
327347

328348
; Insert scalar at index
329-
define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) {
349+
define <vscale x 2 x half> @test_insert_with_index_nxv2f16(<vscale x 2 x half> %a, half %h, i64 %idx) {
330350
; CHECK-LABEL: test_insert_with_index_nxv2f16:
331351
; CHECK: // %bb.0:
332-
; CHECK-NEXT: index z1.d, #0, #1
333-
; CHECK-NEXT: mov z2.d, x0
352+
; CHECK-NEXT: index z2.d, #0, #1
353+
; CHECK-NEXT: mov z3.d, x0
334354
; CHECK-NEXT: ptrue p0.d
335-
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
336-
; CHECK-NEXT: mov z0.h, p0/m, h0
355+
; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
356+
; CHECK-NEXT: mov z0.h, p0/m, h1
337357
; CHECK-NEXT: ret
338-
%res = insertelement <vscale x 2 x half> poison, half %h, i64 %idx
358+
%res = insertelement <vscale x 2 x half> %a, half %h, i64 %idx
339359
ret <vscale x 2 x half> %res
340360
}
341361

342-
define <vscale x 4 x half> @test_insert_with_index_nxv4f16(half %h, i64 %idx) {
362+
define <vscale x 4 x half> @test_insert_with_index_nxv4f16(<vscale x 4 x half> %a, half %h, i64 %idx) {
343363
; CHECK-LABEL: test_insert_with_index_nxv4f16:
344364
; CHECK: // %bb.0:
345-
; CHECK-NEXT: index z1.s, #0, #1
346-
; CHECK-NEXT: mov z2.s, w0
365+
; CHECK-NEXT: index z2.s, #0, #1
366+
; CHECK-NEXT: mov z3.s, w0
347367
; CHECK-NEXT: ptrue p0.s
348-
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
349-
; CHECK-NEXT: mov z0.h, p0/m, h0
368+
; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
369+
; CHECK-NEXT: mov z0.h, p0/m, h1
350370
; CHECK-NEXT: ret
351-
%res = insertelement <vscale x 4 x half> poison, half %h, i64 %idx
371+
%res = insertelement <vscale x 4 x half> %a, half %h, i64 %idx
352372
ret <vscale x 4 x half> %res
353373
}
354374

355-
define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) {
375+
define <vscale x 8 x half> @test_insert_with_index_nxv8f16(<vscale x 8 x half> %a, half %h, i64 %idx) {
356376
; CHECK-LABEL: test_insert_with_index_nxv8f16:
357377
; CHECK: // %bb.0:
358-
; CHECK-NEXT: index z1.h, #0, #1
359-
; CHECK-NEXT: mov z2.h, w0
378+
; CHECK-NEXT: index z2.h, #0, #1
379+
; CHECK-NEXT: mov z3.h, w0
360380
; CHECK-NEXT: ptrue p0.h
361-
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
362-
; CHECK-NEXT: mov z0.h, p0/m, h0
381+
; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
382+
; CHECK-NEXT: mov z0.h, p0/m, h1
363383
; CHECK-NEXT: ret
364-
%res = insertelement <vscale x 8 x half> poison, half %h, i64 %idx
384+
%res = insertelement <vscale x 8 x half> %a, half %h, i64 %idx
365385
ret <vscale x 8 x half> %res
366386
}
367387

368-
define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) {
388+
define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(<vscale x 2 x bfloat> %a, bfloat %h, i64 %idx) {
369389
; CHECK-LABEL: test_insert_with_index_nxv2bf16:
370390
; CHECK: // %bb.0:
371-
; CHECK-NEXT: index z1.d, #0, #1
372-
; CHECK-NEXT: mov z2.d, x0
391+
; CHECK-NEXT: index z2.d, #0, #1
392+
; CHECK-NEXT: mov z3.d, x0
373393
; CHECK-NEXT: ptrue p0.d
374-
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
375-
; CHECK-NEXT: mov z0.h, p0/m, h0
394+
; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
395+
; CHECK-NEXT: mov z0.h, p0/m, h1
376396
; CHECK-NEXT: ret
377-
%res = insertelement <vscale x 2 x bfloat> poison, bfloat %h, i64 %idx
397+
%res = insertelement <vscale x 2 x bfloat> %a, bfloat %h, i64 %idx
378398
ret <vscale x 2 x bfloat> %res
379399
}
380400

381-
define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) {
401+
define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(<vscale x 4 x bfloat> %a, bfloat %h, i64 %idx) {
382402
; CHECK-LABEL: test_insert_with_index_nxv4bf16:
383403
; CHECK: // %bb.0:
384-
; CHECK-NEXT: index z1.s, #0, #1
385-
; CHECK-NEXT: mov z2.s, w0
404+
; CHECK-NEXT: index z2.s, #0, #1
405+
; CHECK-NEXT: mov z3.s, w0
386406
; CHECK-NEXT: ptrue p0.s
387-
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
388-
; CHECK-NEXT: mov z0.h, p0/m, h0
407+
; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
408+
; CHECK-NEXT: mov z0.h, p0/m, h1
389409
; CHECK-NEXT: ret
390-
%res = insertelement <vscale x 4 x bfloat> poison, bfloat %h, i64 %idx
410+
%res = insertelement <vscale x 4 x bfloat> %a, bfloat %h, i64 %idx
391411
ret <vscale x 4 x bfloat> %res
392412
}
393413

394-
define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) {
414+
define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(<vscale x 8 x bfloat> %a, bfloat %h, i64 %idx) {
395415
; CHECK-LABEL: test_insert_with_index_nxv8bf16:
396416
; CHECK: // %bb.0:
397-
; CHECK-NEXT: index z1.h, #0, #1
398-
; CHECK-NEXT: mov z2.h, w0
417+
; CHECK-NEXT: index z2.h, #0, #1
418+
; CHECK-NEXT: mov z3.h, w0
399419
; CHECK-NEXT: ptrue p0.h
400-
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
401-
; CHECK-NEXT: mov z0.h, p0/m, h0
420+
; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
421+
; CHECK-NEXT: mov z0.h, p0/m, h1
402422
; CHECK-NEXT: ret
403-
%res = insertelement <vscale x 8 x bfloat> poison, bfloat %h, i64 %idx
423+
%res = insertelement <vscale x 8 x bfloat> %a, bfloat %h, i64 %idx
404424
ret <vscale x 8 x bfloat> %res
405425
}
406426

407-
define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) {
427+
define <vscale x 2 x float> @test_insert_with_index_nxv2f32(<vscale x 2 x float> %a, float %f, i64 %idx) {
408428
; CHECK-LABEL: test_insert_with_index_nxv2f32:
409429
; CHECK: // %bb.0:
410-
; CHECK-NEXT: index z1.d, #0, #1
411-
; CHECK-NEXT: mov z2.d, x0
430+
; CHECK-NEXT: index z2.d, #0, #1
431+
; CHECK-NEXT: mov z3.d, x0
412432
; CHECK-NEXT: ptrue p0.d
413-
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
414-
; CHECK-NEXT: mov z0.s, p0/m, s0
433+
; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
434+
; CHECK-NEXT: mov z0.s, p0/m, s1
415435
; CHECK-NEXT: ret
416-
%res = insertelement <vscale x 2 x float> poison, float %f, i64 %idx
436+
%res = insertelement <vscale x 2 x float> %a, float %f, i64 %idx
417437
ret <vscale x 2 x float> %res
418438
}
419439

420-
define <vscale x 4 x float> @test_insert_with_index_nxv4f32(float %f, i64 %idx) {
440+
define <vscale x 4 x float> @test_insert_with_index_nxv4f32(<vscale x 4 x float> %a, float %f, i64 %idx) {
421441
; CHECK-LABEL: test_insert_with_index_nxv4f32:
422442
; CHECK: // %bb.0:
423-
; CHECK-NEXT: index z1.s, #0, #1
424-
; CHECK-NEXT: mov z2.s, w0
443+
; CHECK-NEXT: index z2.s, #0, #1
444+
; CHECK-NEXT: mov z3.s, w0
425445
; CHECK-NEXT: ptrue p0.s
426-
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
427-
; CHECK-NEXT: mov z0.s, p0/m, s0
446+
; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
447+
; CHECK-NEXT: mov z0.s, p0/m, s1
428448
; CHECK-NEXT: ret
429-
%res = insertelement <vscale x 4 x float> poison, float %f, i64 %idx
449+
%res = insertelement <vscale x 4 x float> %a, float %f, i64 %idx
430450
ret <vscale x 4 x float> %res
431451
}
432452

433-
define <vscale x 2 x double> @test_insert_with_index_nxv2f64(double %d, i64 %idx) {
453+
define <vscale x 2 x double> @test_insert_with_index_nxv2f64(<vscale x 2 x double> %a, double %d, i64 %idx) {
434454
; CHECK-LABEL: test_insert_with_index_nxv2f64:
435455
; CHECK: // %bb.0:
436-
; CHECK-NEXT: index z1.d, #0, #1
437-
; CHECK-NEXT: mov z2.d, x0
456+
; CHECK-NEXT: index z2.d, #0, #1
457+
; CHECK-NEXT: mov z3.d, x0
438458
; CHECK-NEXT: ptrue p0.d
439-
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
440-
; CHECK-NEXT: mov z0.d, p0/m, d0
459+
; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
460+
; CHECK-NEXT: mov z0.d, p0/m, d1
441461
; CHECK-NEXT: ret
442-
%res = insertelement <vscale x 2 x double> poison, double %d, i64 %idx
462+
%res = insertelement <vscale x 2 x double> %a, double %d, i64 %idx
443463
ret <vscale x 2 x double> %res
444464
}
445465

0 commit comments

Comments
 (0)