diff --git a/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.cpp b/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.cpp index 30cba70498aa..80234318b518 100644 --- a/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.cpp @@ -247,6 +247,54 @@ unsigned AIE2PSInstrInfo::getOpCode(MachineInstr &I) const { return isSigned ? AIE2PS::VUNPACK_mv_unpack_x_unpackSign1 : AIE2PS::VUNPACK_mv_unpack_x_unpackSign0; } + // Cascade stream read (SCD) + case Intrinsic::aie2ps_scd_read_vec: + return AIE2PS::VMOV_alu_mv_alu_mv_scd_x; + case Intrinsic::aie2ps_scd_read_acc32: + return AIE2PS::VMOV_alu_mv_alu_mv_scd_bm; + case Intrinsic::aie2ps_scd_expand_lo: + return AIE2PS::VMOV_0_mv_scd_cm; + case Intrinsic::aie2ps_scd_expand_hi: + return AIE2PS::VMOV_1_mv_scd_cm; + case Intrinsic::aie2ps_scd_ACC2048: { + Register SrcReg = I.getOperand(3).getReg(); + if (auto Src = getIConstantVRegValWithLookThrough(SrcReg, MRI)) { + unsigned SrcConstVal = Src->Value.getZExtValue(); + switch (SrcConstVal) { + case 0: + return AIE2PS::VMOV_0_mv_scd_dm_imm; + case 1: + return AIE2PS::VMOV_1_mv_scd_dm_imm; + case 2: + return AIE2PS::VMOV_2; + case 3: + return AIE2PS::VMOV_3; + default: + llvm_unreachable("Unexpected SrcConstVal for SCD"); + } + } + llvm_unreachable("Unexpected non-constant for SCD"); + } + case Intrinsic::aie2ps_scd_expand_ACC1024: + case Intrinsic::aie2ps_scd_expand_ACC2048: + return AIE2PS::VMOV_alu_mv_alu_mv_scd_dm_reg; + case Intrinsic::aie2ps_scd_expand_ACC1024_incr: + case Intrinsic::aie2ps_scd_expand_ACC2048_incr: + return AIE2PS::VMOV_alu_mv_alu_mv_scd_dm_dyn; + // Cascade stream write (MCD) + case Intrinsic::aie2ps_mcd_write_vec: + return AIE2PS::VMOV_st_mv_mcd_x; + case Intrinsic::aie2ps_mcd_write_acc32: + return AIE2PS::VMOV_st_mv_mcd_bm; + // Scalar stream intrinsics + case Intrinsic::aie2ps_get_ss: + return AIE2PS::MOV_lda; + case Intrinsic::aie2ps_get_ss_nb: + return AIE2PS::MOV_nb_lda; + case Intrinsic::aie2ps_put_ms: + return AIE2PS::MOV_st_mMStream_tlast_reg; + case Intrinsic::aie2ps_put_ms_nb: + return AIE2PS::MOV_nb_st_mMStream_tlast_reg; default: llvm_unreachable("Unexpected Intrinsic ID"); } @@ -1417,6 +1465,25 @@ Register AIE2PSInstrInfo::getUnpackSignCReg() const { return AIE2PS::unpackSign0; } +Register AIE2PSInstrInfo::getSSStatusReg() const { return AIE2PS::srSS0; } + +Register AIE2PSInstrInfo::getMSStatusReg() const { return AIE2PS::srMS0; } + +unsigned AIE2PSInstrInfo::getMoveToMSOpcode(MachineInstr &I, + unsigned ConstTLastVal) const { + const bool UseTLastImm = (ConstTLastVal == 0); + const unsigned IntrinsicID = cast(I).getIntrinsicID(); + switch (IntrinsicID) { + case Intrinsic::aie2ps_put_ms: + return UseTLastImm ? AIE2PS::MOV_st_mMStream_tlast_imm : AIE2PS::MOV_tlast; + case Intrinsic::aie2ps_put_ms_nb: + return UseTLastImm ? AIE2PS::MOV_nb_st_mMStream_tlast_imm + : AIE2PS::MOV_nb_tlast; + default: + llvm_unreachable("Unexpected Intrinsic ID"); + } +} + unsigned AIE2PSInstrInfo::getScalarRegSize() const { return 32; } unsigned AIE2PSInstrInfo::getBasicVecRegSize() const { return 256; } diff --git a/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.h b/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.h index c23361dd1776..83a71c20eb40 100644 --- a/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.h +++ b/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.h @@ -131,6 +131,10 @@ class AIE2PSInstrInfo : public AIE2PSGenInstrInfo { bool IsTailCall) const override; Register getPackSignCReg() const override; Register getUnpackSignCReg() const override; + Register getSSStatusReg() const override; + Register getMSStatusReg() const override; + unsigned getMoveToMSOpcode(MachineInstr &I, + unsigned ConstTLastVal) const override; unsigned getCycleSeparatorOpcode() const override; unsigned getGenericAddVectorEltOpcode() const override; unsigned getGenericInsertVectorEltOpcode() const override; diff --git a/llvm/lib/Target/AIE/aie2ps/AIE2PSInstructionSelector.cpp b/llvm/lib/Target/AIE/aie2ps/AIE2PSInstructionSelector.cpp index 3ad30a44b974..2ef957da71fc 100644 --- a/llvm/lib/Target/AIE/aie2ps/AIE2PSInstructionSelector.cpp +++ b/llvm/lib/Target/AIE/aie2ps/AIE2PSInstructionSelector.cpp @@ -318,6 +318,8 @@ class AIE2PSInstructionSelector : public AIEBaseInstructionSelector { unsigned Opcode); bool selectBFP16_ADDMAC_CONF(MachineInstr &I, MachineRegisterInfo &MRI, unsigned Opcode); + bool selectCascadeStreamInsn(MachineInstr &I, MachineRegisterInfo &MRI, + bool IsWrite); private: bool selectImpl(MachineInstr &I, @@ -2056,6 +2058,26 @@ bool AIE2PSInstructionSelector::select(MachineInstr &I) { return selectBFP16_ADDMAC_CONF(I, MRI, AIE2PS::VADDMAC_f_vaddmac_bfp16); case Intrinsic::aie2ps_BFP640_BFP2560_ACC2048_bf_addmsc_conf: return selectBFP16_ADDMAC_CONF(I, MRI, AIE2PS::VADDMSC_f_vaddmac_bfp16); + case Intrinsic::aie2ps_scd_read_vec: + case Intrinsic::aie2ps_scd_read_acc32: + case Intrinsic::aie2ps_scd_expand_lo: + case Intrinsic::aie2ps_scd_expand_hi: + case Intrinsic::aie2ps_scd_ACC2048: + case Intrinsic::aie2ps_scd_expand_ACC1024: + case Intrinsic::aie2ps_scd_expand_ACC2048: + case Intrinsic::aie2ps_scd_expand_ACC1024_incr: + case Intrinsic::aie2ps_scd_expand_ACC2048_incr: + return selectCascadeStreamInsn(I, MRI, false); + case Intrinsic::aie2ps_mcd_write_vec: + case Intrinsic::aie2ps_mcd_write_acc32: + return selectCascadeStreamInsn(I, MRI, true); + case Intrinsic::aie2ps_get_ss: + case Intrinsic::aie2ps_get_ss_nb: + return selectGetSS(I, MRI, MIB); + case Intrinsic::aie2ps_put_ms: + return selectPutMSB(I, MRI, MIB); + case Intrinsic::aie2ps_put_ms_nb: + return selectPutMSNB(I, MRI, MIB); default: return selectImpl(I, *CoverageInfo); } @@ -4802,6 +4824,89 @@ bool AIE2PSInstructionSelector::selectVST_FIFO(MachineInstr &I, return false; } +bool AIE2PSInstructionSelector::selectCascadeStreamInsn( + MachineInstr &I, MachineRegisterInfo &MRI, bool IsWrite) { + const Register CascadeReg = I.getOperand(IsWrite ? 1 : 0).getReg(); + Register EnableReg = I.getOperand(I.getNumOperands() - 1).getReg(); + MachineInstrBuilder CascadeMV; + const unsigned OpCode = TII.getOpCode(I); + + // Helper to extract ACC1024 sub-register from an ACC2048 result. + auto ExtractACC1024 = [&]() { + auto DestMI = MIB.buildInstr(TargetOpcode::COPY, {CascadeReg}, {}) + .addReg(CascadeMV->getOperand(0).getReg(), 0, + AIE2PS::sub_1024_acc_lo); + constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *DestMI, + AIE2PS::ACC1024RegClass, DestMI->getOperand(0)); + }; + + if (IsWrite) { + CascadeMV = MIB.buildInstr(OpCode, {}, {}).addReg(CascadeReg); + } else { + auto IntrinsicID = cast(I).getIntrinsicID(); + switch (IntrinsicID) { + case Intrinsic::aie2ps_scd_expand_ACC2048: { + EnableReg = I.getOperand(I.getNumOperands() - 2).getReg(); + const Register PosReg = I.getOperand(I.getNumOperands() - 1).getReg(); + auto CopyPosReg = MIB.buildInstr(TargetOpcode::COPY, + {&AIE2PS::mR31_scdRegClass}, {PosReg}); + if (!selectCopy(*CopyPosReg, MRI)) { + return false; + } + CascadeMV = + MIB.buildInstr(OpCode, {CascadeReg}, {}).addReg(CopyPosReg.getReg(0)); + break; + } + case Intrinsic::aie2ps_scd_expand_ACC1024: { + EnableReg = I.getOperand(I.getNumOperands() - 2).getReg(); + const Register PosReg = I.getOperand(I.getNumOperands() - 1).getReg(); + auto CopyPosReg = MIB.buildInstr(TargetOpcode::COPY, + {&AIE2PS::mR31_scdRegClass}, {PosReg}); + if (!selectCopy(*CopyPosReg, MRI)) { + return false; + } + Register DstReg = MRI.createVirtualRegister(&AIE2PS::ACC2048RegClass); + CascadeMV = + MIB.buildInstr(OpCode, {DstReg}, {}).addReg(CopyPosReg.getReg(0)); + ExtractACC1024(); + break; + } + case Intrinsic::aie2ps_scd_expand_ACC2048_incr: { + const Register R31 = I.getOperand(1).getReg(); + EnableReg = I.getOperand(I.getNumOperands() - 2).getReg(); + const Register PosPtrInReg = + I.getOperand(I.getNumOperands() - 1).getReg(); + CascadeMV = + MIB.buildInstr(OpCode, {CascadeReg, R31}, {}).addReg(PosPtrInReg); + break; + } + case Intrinsic::aie2ps_scd_expand_ACC1024_incr: { + const Register R31 = I.getOperand(1).getReg(); + EnableReg = I.getOperand(I.getNumOperands() - 2).getReg(); + const Register PosPtrInReg = + I.getOperand(I.getNumOperands() - 1).getReg(); + Register DstReg = MRI.createVirtualRegister(&AIE2PS::ACC2048RegClass); + auto CopyPosPtrInReg = MIB.buildInstr( + TargetOpcode::COPY, {&AIE2PS::mR31_scdRegClass}, {PosPtrInReg}); + CascadeMV = MIB.buildInstr(OpCode, {DstReg, R31}, {}) + .addReg(CopyPosPtrInReg.getReg(0)); + RBI.constrainGenericRegister(R31, AIE2PS::mR31_scdRegClass, MRI); + ExtractACC1024(); + break; + } + default: + CascadeMV = MIB.buildInstr(OpCode, {CascadeReg}, {}); + break; + } + } + setUnsetCtrlRegister(MIB, *CascadeMV, MRI, + (IsWrite ? AIE2PS::crMCDEn : AIE2PS::crSCDEn), EnableReg, + 1); + + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*CascadeMV, TII, TRI, RBI); +} + namespace llvm { InstructionSelector * createAIE2PSInstructionSelector(const AIE2PSTargetMachine &TM, diff --git a/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/inst-select-streams.mir b/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/inst-select-streams.mir new file mode 100644 index 000000000000..df7ffc4d173a --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/inst-select-streams.mir @@ -0,0 +1,536 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2ps -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s +--- +name: test_get_scd_v128int4 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0 + + ; CHECK-LABEL: name: test_get_scd_v128int4 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crscden = COPY [[COPY]] + ; CHECK-NEXT: [[VMOV_alu_mv_alu_mv_scd_x:%[0-9]+]]:vec512 = VMOV_alu_mv_alu_mv_scd_x implicit $crscden + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: $x0 = COPY [[VMOV_alu_mv_alu_mv_scd_x]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:gprregbank(s32) = COPY $r0 + %2:vregbank(<16 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.read.vec), %1(s32) + %0:vregbank(<64 x s8>) = G_BITCAST %2(<16 x s32>) + $x0 = COPY %0(<64 x s8>) + PseudoRET implicit $lr, implicit $x0 + +... +--- +name: test_get_scd_v16acc32 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0 + + ; CHECK-LABEL: name: test_get_scd_v16acc32 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crscden = COPY [[COPY]] + ; CHECK-NEXT: [[VMOV_alu_mv_alu_mv_scd_bm:%[0-9]+]]:acc512 = VMOV_alu_mv_alu_mv_scd_bm implicit $crscden + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: $bmll0 = COPY [[VMOV_alu_mv_alu_mv_scd_bm]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $bmll0 + %1:gprregbank(s32) = COPY $r0 + %0:accregbank(<16 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.read.acc32), %1(s32) + $bmll0 = COPY %0(<16 x s32>) + PseudoRET implicit $lr, implicit $bmll0 + +... +--- +name: test_get_scd_v32acc32_lo +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0 + + ; CHECK-LABEL: name: test_get_scd_v32acc32_lo + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crscden = COPY [[COPY]] + ; CHECK-NEXT: [[VMOV_0_mv_scd_cm:%[0-9]+]]:acc1024 = VMOV_0_mv_scd_cm implicit $crscden + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: $cml0 = COPY [[VMOV_0_mv_scd_cm]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $cml0 + %1:gprregbank(s32) = COPY $r0 + %0:accregbank(<32 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.expand.lo), %1(s32) + $cml0 = COPY %0(<32 x s32>) + PseudoRET implicit $lr, implicit $cml0 + +... +--- +name: test_get_scd_v32acc32 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0 + + ; CHECK-LABEL: name: test_get_scd_v32acc32 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crscden = COPY [[COPY]] + ; CHECK-NEXT: [[VMOV_1_mv_scd_cm:%[0-9]+]]:acc1024 = VMOV_1_mv_scd_cm implicit $crscden + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: $cml0 = COPY [[VMOV_1_mv_scd_cm]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $cml0 + %1:gprregbank(s32) = COPY $r0 + %0:accregbank(<32 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.expand.hi), %1(s32) + $cml0 = COPY %0(<32 x s32>) + PseudoRET implicit $lr, implicit $cml0 + +... +--- +name: test_get_scd_v64acc32_0 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0 + + ; CHECK-LABEL: name: test_get_scd_v64acc32_0 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 0 + ; CHECK-NEXT: [[VMOV_0_mv_scd_dm_imm:%[0-9]+]]:acc2048 = VMOV_0_mv_scd_dm_imm implicit $crscden + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: $dm0 = COPY [[VMOV_0_mv_scd_dm_imm]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $dm0 + %1:gprregbank(s32) = COPY $r0 + %2:gprregbank(s32) = G_CONSTANT i32 0 + %0:accregbank(<64 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.ACC2048), %1(s32), %2(s32) + $dm0 = COPY %0(<64 x s32>) + PseudoRET implicit $lr, implicit $dm0 + +... +--- +name: test_get_ss +legalized: true +regBankSelected: true +body: | + bb.1.entry: + + ; CHECK-LABEL: name: test_get_ss + ; CHECK: [[MOV_lda:%[0-9]+]]:mlockid_reg = MOV_lda implicit-def $srss0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $srss0 + ; CHECK-NEXT: $r0 = COPY [[MOV_lda]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $r0 + %1:gprregbank(s32), %2:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.get.ss) + $r0 = COPY %1(s32) + PseudoRET implicit $lr, implicit $r0 + +... +--- +name: test_get_ss_nb +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $p1 + + ; CHECK-LABEL: name: test_get_ss_nb + ; CHECK: liveins: $p0, $p1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p1 + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:mlockid_reg = MOV_RLC_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_nb_lda:%[0-9]+]]:mlockid_reg = MOV_nb_lda implicit-def $srss0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mlockid_reg = COPY $srss0 + ; CHECK-NEXT: [[AND:%[0-9]+]]:mlockid_reg = AND [[COPY2]], [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: ST_s8_idx_imm [[AND]], [[COPY1]], 0, implicit-def dead $pe2_ads, implicit $pe2_ads :: (store (s8)) + ; CHECK-NEXT: [[EXTEND_u8_:%[0-9]+]]:mlockid_reg = EXTEND_u8 [[COPY2]] + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:mlockid_reg = MOV_RLC_imm11_pseudo -1 + ; CHECK-NEXT: [[LSHL:%[0-9]+]]:mlockid_reg = LSHL [[EXTEND_u8_]], [[MOV_RLC_imm11_pseudo1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:mlockid_reg = AND [[LSHL]], [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: ST_s8_idx_imm [[AND1]], [[COPY]], 0, implicit-def dead $pe2_ads, implicit $pe2_ads :: (store (s8)) + ; CHECK-NEXT: $r0 = COPY [[MOV_nb_lda]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $r0 + %1:ptrregbank(p0) = COPY $p0 + %2:ptrregbank(p0) = COPY $p1 + %20:gprregbank(s32) = G_CONSTANT i32 1 + %3:gprregbank(s32), %4:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.get.ss.nb) + %19:gprregbank(s32) = G_AND %4, %20 + G_STORE %19(s32), %2(p0) :: (store (s8)) + %21:gprregbank(s32) = G_CONSTANT i32 255 + %14:gprregbank(s32) = G_AND %4, %21 + %15:gprregbank(s32) = G_LSHR %14, %20(s32) + %13:gprregbank(s32) = G_AND %15, %20 + G_STORE %13(s32), %1(p0) :: (store (s8)) + $r0 = COPY %3(s32) + PseudoRET implicit $lr, implicit $r0 + +... +--- +name: test_put_ms +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0, $r1 + + ; CHECK-LABEL: name: test_put_ms + ; CHECK: liveins: $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:mr28_tlast = COPY $r1 + ; CHECK-NEXT: MOV_st_mMStream_tlast_reg [[COPY]], [[COPY1]], implicit-def $srms0 + ; CHECK-NEXT: PseudoRET implicit $lr + %0:gprregbank(s32) = COPY $r0 + %1:gprregbank(s32) = COPY $r1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.put.ms), %0(s32), %1(s32) + PseudoRET implicit $lr + +... +--- +name: test_put_ms_nb +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0 + + ; CHECK-LABEL: name: test_put_ms_nb + ; CHECK: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: MOV_nb_st_mMStream_tlast_imm [[COPY]], implicit-def $srms0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mlockid_reg = COPY $srms0 + ; CHECK-NEXT: [[NEZ:%[0-9]+]]:mlockid_reg = NEZ [[COPY2]] + ; CHECK-NEXT: ST_s8_idx_imm [[NEZ]], [[COPY1]], 0, implicit-def dead $pe2_ads, implicit $pe2_ads :: (store (s8)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:gprregbank(<2 x s16>) = COPY $r0 + %1:ptrregbank(p0) = COPY $p0 + %4:gprregbank(s32) = G_CONSTANT i32 0 + %2:gprregbank(s32) = G_BITCAST %0(<2 x s16>) + %3:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.put.ms.nb), %2(s32), %4(s32) + %8:gprregbank(s32) = G_ICMP intpred(ne), %3(s32), %4 + G_STORE %8(s32), %1(p0) :: (store (s8)) + PseudoRET implicit $lr + +... +--- +name: test_put_ms_const +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0 + + ; CHECK-LABEL: name: test_put_ms_const + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0 + ; CHECK-NEXT: MOV_tlast [[MOV_RLC_imm11_pseudo]], implicit-def $srms0 + ; CHECK-NEXT: PseudoRET implicit $lr + %1:gprregbank(s32) = G_CONSTANT i32 0 + %2:gprregbank(s32) = G_CONSTANT i32 1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.put.ms), %1(s32), %2(s32) + PseudoRET implicit $lr + +... +--- +name: test_put_ms_nb_tlast +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $r1 + + ; CHECK-LABEL: name: test_put_ms_nb_tlast + ; CHECK: liveins: $p0, $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:mr28_tlast = COPY $r1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: MOV_nb_st_mMStream_tlast_reg [[COPY]], [[COPY1]], implicit-def $srms0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:mlockid_reg = COPY $srms0 + ; CHECK-NEXT: [[NEZ:%[0-9]+]]:mlockid_reg = NEZ [[COPY3]] + ; CHECK-NEXT: ST_s8_idx_imm [[NEZ]], [[COPY2]], 0, implicit-def dead $pe2_ads, implicit $pe2_ads :: (store (s8)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:gprregbank(<2 x s16>) = COPY $r0 + %1:gprregbank(s32) = COPY $r1 + %2:ptrregbank(p0) = COPY $p0 + %5:gprregbank(s32) = G_CONSTANT i32 0 + %3:gprregbank(s32) = G_BITCAST %0(<2 x s16>) + %4:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.put.ms.nb), %3(s32), %1(s32) + %9:gprregbank(s32) = G_ICMP intpred(ne), %4(s32), %5 + G_STORE %9(s32), %2(p0) :: (store (s8)) + PseudoRET implicit $lr + +... +--- +name: test_put_ms_nb_const_tlast +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0 + + ; CHECK-LABEL: name: test_put_ms_nb_const_tlast + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: MOV_nb_tlast [[COPY]], implicit-def $srms0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $srms0 + ; CHECK-NEXT: PseudoRET implicit $lr + %1:gprregbank(s32) = G_CONSTANT i32 1 + %2:gprregbank(s32) = COPY $r0 + %3:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.put.ms.nb), %2(s32), %1(s32) + PseudoRET implicit $lr + +... +--- +name: test_get_scd_v64acc32_1 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0 + + ; CHECK-LABEL: name: test_get_scd_v64acc32_1 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[VMOV_1_mv_scd_dm_imm:%[0-9]+]]:acc2048 = VMOV_1_mv_scd_dm_imm implicit $crscden + ; CHECK-NEXT: $dm0 = COPY [[VMOV_1_mv_scd_dm_imm]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $dm0 + %1:gprregbank(s32) = COPY $r0 + %2:gprregbank(s32) = G_CONSTANT i32 1 + %0:accregbank(<64 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.ACC2048), %1(s32), %2(s32) + $dm0 = COPY %0(<64 x s32>) + PseudoRET implicit $lr, implicit $dm0 + +... +--- +name: test_get_scd_v64acc32_2 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0 + + ; CHECK-LABEL: name: test_get_scd_v64acc32_2 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 2 + ; CHECK-NEXT: [[VMOV_2_:%[0-9]+]]:acc2048 = VMOV_2 implicit $crscden + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: $dm0 = COPY [[VMOV_2_]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $dm0 + %1:gprregbank(s32) = COPY $r0 + %2:gprregbank(s32) = G_CONSTANT i32 2 + %0:accregbank(<64 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.ACC2048), %1(s32), %2(s32) + $dm0 = COPY %0(<64 x s32>) + PseudoRET implicit $lr, implicit $dm0 + +... +--- +name: test_get_scd_v64acc32_3 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0 + + ; CHECK-LABEL: name: test_get_scd_v64acc32_3 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 3 + ; CHECK-NEXT: [[VMOV_3_:%[0-9]+]]:acc2048 = VMOV_3 implicit $crscden + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: $dm0 = COPY [[VMOV_3_]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $dm0 + %1:gprregbank(s32) = COPY $r0 + %2:gprregbank(s32) = G_CONSTANT i32 3 + %0:accregbank(<64 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.ACC2048), %1(s32), %2(s32) + $dm0 = COPY %0(<64 x s32>) + PseudoRET implicit $lr, implicit $dm0 + +... +--- +name: test_get_scd_expand_v32acc32 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0, $r1 + + ; CHECK-LABEL: name: test_get_scd_expand_v32acc32 + ; CHECK: liveins: $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mr31_scd = COPY [[COPY1]] + ; CHECK-NEXT: $crscden = COPY [[COPY]] + ; CHECK-NEXT: [[VMOV_alu_mv_alu_mv_scd_dm_reg:%[0-9]+]]:acc2048 = VMOV_alu_mv_alu_mv_scd_dm_reg [[COPY2]], implicit $crscden + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc1024 = COPY [[VMOV_alu_mv_alu_mv_scd_dm_reg]].sub_1024_acc_lo + ; CHECK-NEXT: $cml0 = COPY [[COPY3]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $cml0 + %1:gprregbank(s32) = COPY $r0 + %2:gprregbank(s32) = COPY $r1 + %0:accregbank(<32 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.expand.ACC1024), %1(s32), %2(s32) + $cml0 = COPY %0(<32 x s32>) + PseudoRET implicit $lr, implicit $cml0 + +... +--- +name: test_get_scd_expand_v64acc32 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0, $r1 + + ; CHECK-LABEL: name: test_get_scd_expand_v64acc32 + ; CHECK: liveins: $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mr31_scd = COPY [[COPY1]] + ; CHECK-NEXT: $crscden = COPY [[COPY]] + ; CHECK-NEXT: [[VMOV_alu_mv_alu_mv_scd_dm_reg:%[0-9]+]]:acc2048 = VMOV_alu_mv_alu_mv_scd_dm_reg [[COPY2]], implicit $crscden + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: $dm0 = COPY [[VMOV_alu_mv_alu_mv_scd_dm_reg]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $dm0 + %1:gprregbank(s32) = COPY $r0 + %2:gprregbank(s32) = COPY $r1 + %0:accregbank(<64 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.expand.ACC2048), %1(s32), %2(s32) + $dm0 = COPY %0(<64 x s32>) + PseudoRET implicit $lr, implicit $dm0 + +... +--- +name: test_put_mcd +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0, $x0 + + ; CHECK-LABEL: name: test_put_mcd + ; CHECK: liveins: $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crmcden = COPY [[COPY1]] + ; CHECK-NEXT: VMOV_st_mv_mcd_x [[COPY]], implicit $crmcden + ; CHECK-NEXT: $crmcden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<64 x s8>) = COPY $x0 + %1:gprregbank(s32) = COPY $r0 + %2:vregbank(<16 x s32>) = G_BITCAST %0(<64 x s8>) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.mcd.write.vec), %2(<16 x s32>), %1(s32) + PseudoRET implicit $lr + +... +--- +name: test_put_mcd_acc +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $bmll0, $r0 + + ; CHECK-LABEL: name: test_put_mcd_acc + ; CHECK: liveins: $bmll0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc512 = COPY $bmll0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crmcden = COPY [[COPY1]] + ; CHECK-NEXT: VMOV_st_mv_mcd_bm [[COPY]], implicit $crmcden + ; CHECK-NEXT: $crmcden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: PseudoRET implicit $lr + %0:accregbank(<8 x s64>) = COPY $bmll0 + %1:gprregbank(s32) = COPY $r0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.mcd.write.acc32), %0(<8 x s64>), %1(s32) + PseudoRET implicit $lr + +... +--- +name: test_get_scd_expand_v32acc32_incr +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0 + + ; CHECK-LABEL: name: test_get_scd_expand_v32acc32_incr + ; CHECK: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[LDA_dms_lda_scalar_ld_idx_imm:%[0-9]+]]:magusrc_and_mdm = LDA_dms_lda_scalar_ld_idx_imm [[COPY1]], 0 :: (dereferenceable load (s20), align 4) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ep = COPY [[LDA_dms_lda_scalar_ld_idx_imm]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:mr31_scd = COPY [[COPY2]] + ; CHECK-NEXT: $crscden = COPY [[COPY]] + ; CHECK-NEXT: [[VMOV_alu_mv_alu_mv_scd_dm_dyn:%[0-9]+]]:acc2048, [[VMOV_alu_mv_alu_mv_scd_dm_dyn1:%[0-9]+]]:mr31_divs = VMOV_alu_mv_alu_mv_scd_dm_dyn [[COPY3]], implicit $crscden + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc1024 = COPY [[VMOV_alu_mv_alu_mv_scd_dm_dyn]].sub_1024_acc_lo + ; CHECK-NEXT: ST_dms_sts_scalar_st_idx_imm [[VMOV_alu_mv_alu_mv_scd_dm_dyn1]], [[COPY1]], 0 :: (store (s32)) + ; CHECK-NEXT: $cml0 = COPY [[COPY4]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $cml0 + %1:gprregbank(s32) = COPY $r0 + %2:ptrregbank(p0) = COPY $p0 + %3:modregbank(s20) = G_LOAD %2(p0) :: (dereferenceable load (s20)) + %7:ptrregbank(s20) = COPY %3(s20) + %4:ptrregbank(p0) = G_INTTOPTR %7(s20) + %5:accregbank(<16 x s64>), %6:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.expand.ACC1024.incr), %1(s32), %4(p0) + G_STORE %6(s32), %2(p0) :: (store (s32)) + %0:accregbank(<32 x s32>) = G_BITCAST %5(<16 x s64>) + $cml0 = COPY %0(<32 x s32>) + PseudoRET implicit $lr, implicit $cml0 + +... +--- +name: test_get_scd_expand_v64acc32_incr +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0 + + ; CHECK-LABEL: name: test_get_scd_expand_v64acc32_incr + ; CHECK: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[LDA_dms_lda_scalar_ld_idx_imm:%[0-9]+]]:magusrc_and_mdm = LDA_dms_lda_scalar_ld_idx_imm [[COPY1]], 0 :: (dereferenceable load (s20), align 4) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ep = COPY [[LDA_dms_lda_scalar_ld_idx_imm]] + ; CHECK-NEXT: $crscden = COPY [[COPY]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:mr31_scd = COPY [[COPY2]] + ; CHECK-NEXT: [[VMOV_alu_mv_alu_mv_scd_dm_dyn:%[0-9]+]]:acc2048, [[VMOV_alu_mv_alu_mv_scd_dm_dyn1:%[0-9]+]]:mr31_divs = VMOV_alu_mv_alu_mv_scd_dm_dyn [[COPY3]], implicit $crscden + ; CHECK-NEXT: $crscden = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: ST_dms_sts_scalar_st_idx_imm [[VMOV_alu_mv_alu_mv_scd_dm_dyn1]], [[COPY1]], 0 :: (store (s32)) + ; CHECK-NEXT: $dm0 = COPY [[VMOV_alu_mv_alu_mv_scd_dm_dyn]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $dm0 + %1:gprregbank(s32) = COPY $r0 + %2:ptrregbank(p0) = COPY $p0 + %3:modregbank(s20) = G_LOAD %2(p0) :: (dereferenceable load (s20)) + %7:ptrregbank(s20) = COPY %3(s20) + %4:ptrregbank(p0) = G_INTTOPTR %7(s20) + %5:accregbank(<32 x s64>), %6:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2ps.scd.expand.ACC2048.incr), %1(s32), %4(p0) + G_STORE %6(s32), %2(p0) :: (store (s32)) + %0:accregbank(<64 x s32>) = G_BITCAST %5(<32 x s64>) + $dm0 = COPY %0(<64 x s32>) + PseudoRET implicit $lr, implicit $dm0 + +... diff --git a/llvm/test/CodeGen/AIE/aie2ps/cascade-stream.ll b/llvm/test/CodeGen/AIE/aie2ps/cascade-stream.ll new file mode 100644 index 000000000000..ba0fbbfdd02e --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2ps/cascade-stream.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates +; RUN: llc -O2 -mtriple=aie2ps %s -o - | FileCheck %s + +define dso_local noundef <64 x i8> @_Z21test_get_scd_v128int4i(i32 noundef %en) { +; CHECK-LABEL: _Z21test_get_scd_v128int4i: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: mov crscden, r0 // Delay Slot 3 +; CHECK-NEXT: vmov x0, scd // Delay Slot 2 +; CHECK-NEXT: movx crscden, #1 // Delay Slot 1 +entry: + %0 = tail call <16 x i32> @llvm.aie2ps.scd.read.vec(i32 %en) + %1 = bitcast <16 x i32> %0 to <64 x i8> + ret <64 x i8> %1 +} + + +define dso_local inreg noundef <16 x i32> @_Z21test_get_scd_v16acc32i(i32 noundef %en) { +; CHECK-LABEL: _Z21test_get_scd_v16acc32i: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: mov crscden, r0 // Delay Slot 3 +; CHECK-NEXT: vmov bmll0, scd // Delay Slot 2 +; CHECK-NEXT: movx crscden, #1 // Delay Slot 1 +entry: + %0 = tail call noundef <16 x i32> @llvm.aie2ps.scd.read.acc32(i32 %en) + ret <16 x i32> %0 +} + + +define dso_local inreg noundef <32 x i32> @_Z24test_get_scd_v32acc32_loi(i32 noundef %en) { +; CHECK-LABEL: _Z24test_get_scd_v32acc32_loi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: mov crscden, r0 // Delay Slot 3 +; CHECK-NEXT: vmov.0 cml0, scd // Delay Slot 2 +; CHECK-NEXT: movx crscden, #1 // Delay Slot 1 +entry: + %0 = tail call noundef <32 x i32> @llvm.aie2ps.scd.expand.lo(i32 %en) + ret <32 x i32> %0 +} + + +define dso_local inreg noundef <32 x i32> @_Z24test_get_scd_v32acc32_hii(i32 noundef %en) { +; CHECK-LABEL: _Z24test_get_scd_v32acc32_hii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: mov crscden, r0 // Delay Slot 3 +; CHECK-NEXT: vmov.1 cml0, scd // Delay Slot 2 +; CHECK-NEXT: movx crscden, #1 // Delay Slot 1 +entry: + %0 = tail call noundef <32 x i32> @llvm.aie2ps.scd.expand.hi(i32 %en) + ret <32 x i32> %0 +} + + +define dso_local inreg noundef <64 x i32> @_Z23test_get_scd_v64acc32_0i(i32 noundef %en) { +; CHECK-LABEL: _Z23test_get_scd_v64acc32_0i: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: movx crscden, #0 // Delay Slot 3 +; CHECK-NEXT: vmov.0 dm0, scd // Delay Slot 2 +; CHECK-NEXT: movx crscden, #1 // Delay Slot 1 +entry: + %0 = tail call noundef <64 x i32> @llvm.aie2ps.scd.ACC2048(i32 %en, i32 0) + ret <64 x i32> %0 +} + + +define dso_local inreg noundef <64 x i32> @_Z23test_get_scd_v64acc32_1i(i32 noundef %en) { +; CHECK-LABEL: _Z23test_get_scd_v64acc32_1i: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vmov.1 dm0, scd // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %0 = tail call noundef <64 x i32> @llvm.aie2ps.scd.ACC2048(i32 %en, i32 1) + ret <64 x i32> %0 +} + + +define dso_local inreg noundef <64 x i32> @_Z23test_get_scd_v64acc32_2i(i32 noundef %en) { +; CHECK-LABEL: _Z23test_get_scd_v64acc32_2i: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: movx crscden, #2 // Delay Slot 3 +; CHECK-NEXT: vmov.2 dm0, scd // Delay Slot 2 +; CHECK-NEXT: movx crscden, #1 // Delay Slot 1 +entry: + %0 = tail call noundef <64 x i32> @llvm.aie2ps.scd.ACC2048(i32 %en, i32 2) + ret <64 x i32> %0 +} + + +define dso_local inreg noundef <64 x i32> @_Z23test_get_scd_v64acc32_3i(i32 noundef %en) { +; CHECK-LABEL: _Z23test_get_scd_v64acc32_3i: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: movx crscden, #3 // Delay Slot 3 +; CHECK-NEXT: vmov.3 dm0, scd // Delay Slot 2 +; CHECK-NEXT: movx crscden, #1 // Delay Slot 1 +entry: + %0 = tail call noundef <64 x i32> @llvm.aie2ps.scd.ACC2048(i32 %en, i32 3) + ret <64 x i32> %0 +} + + +define dso_local inreg noundef <32 x i32> @_Z28test_get_scd_expand_v32acc32ii(i32 noundef %en, i32 noundef %pos) { +; CHECK-LABEL: _Z28test_get_scd_expand_v32acc32ii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; ret lr; nopm ; nops +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: or r31, r1, r1; mov crscden, r0 // Delay Slot 3 +; CHECK-NEXT: vmov dm0, scd, r31 // Delay Slot 2 +; CHECK-NEXT: movx crscden, #1 // Delay Slot 1 +entry: + %0 = tail call noundef <32 x i32> @llvm.aie2ps.scd.expand.ACC1024(i32 %en, i32 %pos) + ret <32 x i32> %0 +} + + +define dso_local inreg noundef <64 x i32> @_Z28test_get_scd_expand_v64acc32ii(i32 noundef %en, i32 noundef %pos) { +; CHECK-LABEL: _Z28test_get_scd_expand_v64acc32ii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; ret lr; nopm ; nops +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: or r31, r1, r1; mov crscden, r0 // Delay Slot 3 +; CHECK-NEXT: vmov dm0, scd, r31 // Delay Slot 2 +; CHECK-NEXT: movx crscden, #1 // Delay Slot 1 +entry: + %0 = tail call noundef <64 x i32> @llvm.aie2ps.scd.expand.ACC2048(i32 %en, i32 %pos) + ret <64 x i32> %0 +} + +define dso_local inreg noundef <32 x i32> @_Z33test_get_scd_expand_v32acc32_incriRi(i32 noundef %en, ptr nocapture nonnull align 4 dereferenceable(4) %pos) { +; CHECK-LABEL: _Z33test_get_scd_expand_v32acc32_incriRi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lda r31, [p0, #0] +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: mov crscden, r0 // Delay Slot 4 +; CHECK-NEXT: vmov dm0, scd, r31++ // Delay Slot 3 +; CHECK-NEXT: st r31, [p0, #0] // Delay Slot 2 +; CHECK-NEXT: movx crscden, #1 // Delay Slot 1 +entry: + %0 = load i20, ptr %pos, align 4 + %1 = inttoptr i20 %0 to ptr + %2 = tail call { <16 x i64>, i32 } @llvm.aie2ps.scd.expand.ACC1024.incr(i32 %en, ptr %1) + %3 = extractvalue { <16 x i64>, i32 } %2, 1 + store i32 %3, ptr %pos, align 4 + %4 = extractvalue { <16 x i64>, i32 } %2, 0 + %5 = bitcast <16 x i64> %4 to <32 x i32> + ret <32 x i32> %5 +} + +define dso_local inreg noundef <64 x i32> @_Z33test_get_scd_expand_v64acc32_incriRi(i32 noundef %en, ptr nocapture nonnull align 4 dereferenceable(4) %pos) { +; CHECK-LABEL: _Z33test_get_scd_expand_v64acc32_incriRi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lda r31, [p0, #0] +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: mov crscden, r0 // Delay Slot 4 +; CHECK-NEXT: vmov dm0, scd, r31++ // Delay Slot 3 +; CHECK-NEXT: st r31, [p0, #0] // Delay Slot 2 +; CHECK-NEXT: movx crscden, #1 // Delay Slot 1 +entry: + %0 = load i20, ptr %pos, align 4 + %1 = inttoptr i20 %0 to ptr + %2 = tail call { <32 x i64>, i32 } @llvm.aie2ps.scd.expand.ACC2048.incr(i32 %en, ptr %1) + %3 = extractvalue { <32 x i64>, i32 } %2, 1 + store i32 %3, ptr %pos, align 4 + %4 = extractvalue { <32 x i64>, i32 } %2, 0 + %5 = bitcast <32 x i64> %4 to <64 x i32> + ret <64 x i32> %5 +} + +define dso_local void @_Z12test_put_mcdDv64_DB8_i(<64 x i8> noundef %a, i32 noundef %en) { +; CHECK-LABEL: _Z12test_put_mcdDv64_DB8_i: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: mov crmcden, r0 // Delay Slot 2 +; CHECK-NEXT: vmov mcd, x0; movx crmcden, #1 // Delay Slot 1 +entry: + %0 = bitcast <64 x i8> %a to <16 x i32> + tail call void @llvm.aie2ps.mcd.write.vec(<16 x i32> %0, i32 %en) + ret void +} + + +define dso_local void @_Z12test_put_mcdDv8_u7__acc64i(<8 x i64> inreg noundef %a, i32 noundef %en) { +; CHECK-LABEL: _Z12test_put_mcdDv8_u7__acc64i: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: mov crmcden, r0 // Delay Slot 2 +; CHECK-NEXT: vmov mcd, bmll0; movx crmcden, #1 // Delay Slot 1 +entry: + tail call void @llvm.aie2ps.mcd.write.acc32(<8 x i64> %a, i32 %en) + ret void +} + +declare <16 x i32> @llvm.aie2ps.scd.read.vec(i32) +declare <16 x i32> @llvm.aie2ps.scd.read.acc32(i32) +declare <32 x i32> @llvm.aie2ps.scd.expand.lo(i32) +declare <32 x i32> @llvm.aie2ps.scd.expand.hi(i32) +declare <64 x i32> @llvm.aie2ps.scd.ACC2048(i32, i32) +declare <32 x i32> @llvm.aie2ps.scd.expand.ACC1024(i32, i32) +declare <64 x i32> @llvm.aie2ps.scd.expand.ACC2048(i32, i32) +declare { <16 x i64>, i32 } @llvm.aie2ps.scd.expand.ACC1024.incr(i32, ptr) +declare { <32 x i64>, i32 } @llvm.aie2ps.scd.expand.ACC2048.incr(i32, ptr) +declare void @llvm.aie2ps.mcd.write.vec(<16 x i32>, i32) +declare void @llvm.aie2ps.mcd.write.acc32(<8 x i64>, i32) \ No newline at end of file diff --git a/llvm/test/CodeGen/AIE/aie2ps/streams.ll b/llvm/test/CodeGen/AIE/aie2ps/streams.ll new file mode 100644 index 000000000000..88712a263420 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2ps/streams.ll @@ -0,0 +1,211 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates +; RUN: llc < %s -verify-machineinstrs -mtriple=aie2ps | FileCheck %s + +define dso_local noundef i32 @_Z11test_get_ssv() { +; CHECK-LABEL: _Z11test_get_ssv: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov r0, ss; nopb ; nops ; nopxm ; nopv +; CHECK-NEXT: nop +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %0 = tail call { i32, i32 } @llvm.aie2ps.get.ss() + %1 = extractvalue { i32, i32 } %0, 0 + ret i32 %1 +} + + +define dso_local noundef i32 @_Z14test_get_ss_nbRbS_(ptr nocapture nonnull writeonly align 1 dereferenceable(1) %success, ptr nocapture nonnull writeonly align 1 dereferenceable(1) %tlast) { +; CHECK-LABEL: _Z14test_get_ss_nbRbS_: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov.nb r0, ss; nopb ; nopx +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: st.s8 r6, [p1, #0] +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: mova r4, #1; mov r2, srss0 +; CHECK-NEXT: and r6, r2, r4 +; CHECK-NEXT: nop +; CHECK-NEXT: st.s8 r2, [p0, #0] +; CHECK-NEXT: nop +; CHECK-NEXT: ret lr +; CHECK-NEXT: mova r6, #-1; extend.u8 r2, r2 // Delay Slot 5 +; CHECK-NEXT: lshl r2, r2, r6 // Delay Slot 4 +; CHECK-NEXT: and r2, r2, r4 // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %0 = tail call { i32, i32 } @llvm.aie2ps.get.ss.nb() + %1 = extractvalue { i32, i32 } %0, 1 + %2 = extractvalue { i32, i32 } %0, 0 + %3 = trunc i32 %1 to i8 + %frombool.i = and i8 %3, 1 + store i8 %frombool.i, ptr %tlast, align 1 + %4 = lshr i8 %3, 1 + %frombool3.i = and i8 %4, 1 + store i8 %frombool3.i, ptr %success, align 1 + ret i32 %2 +} + + +define dso_local void @_Z11test_put_msii(i32 noundef %a, i32 noundef %tlast) { +; CHECK-LABEL: _Z11test_put_msii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: mov r28, r1 // Delay Slot 4 +; CHECK-NEXT: mov ms, r0, r28 // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + tail call void @llvm.aie2ps.put.ms(i32 %a, i32 %tlast) + ret void +} + + +define dso_local void @_Z14test_put_ms_nbDv2_u6__bf16Rb(<2 x bfloat> noundef %val, ptr nocapture nonnull writeonly align 1 dereferenceable(1) %success) { +; CHECK-LABEL: _Z14test_put_ms_nbDv2_u6__bf16Rb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st.s8 r0, [p0, #0]; nopb ; nopx ; mov.nb ms, r0 +; CHECK-NEXT: nop +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: mov r0, srms0 // Delay Slot 4 +; CHECK-NEXT: nez r0, r0 // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %0 = bitcast <2 x bfloat> %val to i32 + %1 = tail call i32 @llvm.aie2ps.put.ms.nb(i32 %0, i32 0) + %tobool.i = icmp ne i32 %1, 0 + %frombool.i = zext i1 %tobool.i to i8 + store i8 %frombool.i, ptr %success, align 1 + ret void +} + + +define dso_local void @_Z19test_put_ms_v64bf16Dv64_u6__bf16ii(<64 x bfloat> noundef %a, i32 noundef %en, i32 noundef %tlast) { +; CHECK-LABEL: _Z19test_put_ms_v64bf16Dv64_u6__bf16ii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nops ; vextract.64 r3:r2, x4, #0, vaddsign1 +; CHECK-NEXT: nop +; CHECK-NEXT: mov ms, r2; vextract.64 r5:r4, x4, #1, vaddsign1 +; CHECK-NEXT: mov ms, r3 +; CHECK-NEXT: mov ms, r4; vextract.64 r3:r2, x4, #2, vaddsign1 +; CHECK-NEXT: mov ms, r5 +; CHECK-NEXT: mov ms, r2; vextract.64 r5:r4, x4, #3, vaddsign1 +; CHECK-NEXT: mov ms, r3 +; CHECK-NEXT: mov ms, r4; vextract.64 r3:r2, x4, #4, vaddsign1 +; CHECK-NEXT: mov ms, r5 +; CHECK-NEXT: mov ms, r2; vextract.64 r5:r4, x4, #5, vaddsign1 +; CHECK-NEXT: mov ms, r3 +; CHECK-NEXT: mov ms, r4; vextract.64 r3:r2, x4, #6, vaddsign1 +; CHECK-NEXT: mov ms, r5 +; CHECK-NEXT: mov ms, r2; vextract.64 r5:r4, x4, #7, vaddsign1 +; CHECK-NEXT: mov ms, r3 +; CHECK-NEXT: mov ms, r4; vextract.64 r3:r2, x5, #0, vaddsign1 +; CHECK-NEXT: mov ms, r5 +; CHECK-NEXT: mov ms, r2; vextract.64 r5:r4, x5, #1, vaddsign1 +; CHECK-NEXT: mov ms, r3 +; CHECK-NEXT: mov ms, r4; vextract.64 r3:r2, x5, #2, vaddsign1 +; CHECK-NEXT: mov ms, r5 +; CHECK-NEXT: mov ms, r2; vextract.64 r5:r4, x5, #3, vaddsign1 +; CHECK-NEXT: mov ms, r3 +; CHECK-NEXT: mov ms, r4; vextract.64 r3:r2, x5, #4, vaddsign1 +; CHECK-NEXT: mov ms, r5 +; CHECK-NEXT: mov ms, r2; vextract.64 r5:r4, x5, #5, vaddsign1 +; CHECK-NEXT: mov ms, r3 +; CHECK-NEXT: mov ms, r4; vextract.64 r3:r2, x5, #6, vaddsign1 +; CHECK-NEXT: mov ms, r5 +; CHECK-NEXT: mov ms, r2; ret lr; vextract.64 r5:r4, x5, #7, vaddsign1 +; CHECK-NEXT: mov ms, r3 // Delay Slot 5 +; CHECK-NEXT: mov ms, r4; mov r28, r1 // Delay Slot 4 +; CHECK-NEXT: mov ms, r5, r28 // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %0 = bitcast <64 x bfloat> %a to <32 x i32> + %vecext.i.i.i5.i.i.i = extractelement <32 x i32> %0, i64 0 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.i.i.i, i32 0) + %vecext.i.i.i5.1.i.i.i = extractelement <32 x i32> %0, i64 1 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.1.i.i.i, i32 0) + %vecext.i.i.i5.2.i.i.i = extractelement <32 x i32> %0, i64 2 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.2.i.i.i, i32 0) + %vecext.i.i.i5.3.i.i.i = extractelement <32 x i32> %0, i64 3 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.3.i.i.i, i32 0) + %vecext.i.i.i5.4.i.i.i = extractelement <32 x i32> %0, i64 4 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.4.i.i.i, i32 0) + %vecext.i.i.i5.5.i.i.i = extractelement <32 x i32> %0, i64 5 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.5.i.i.i, i32 0) + %vecext.i.i.i5.6.i.i.i = extractelement <32 x i32> %0, i64 6 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.6.i.i.i, i32 0) + %vecext.i.i.i5.7.i.i.i = extractelement <32 x i32> %0, i64 7 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.7.i.i.i, i32 0) + %vecext.i.i.i5.8.i.i.i = extractelement <32 x i32> %0, i64 8 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.8.i.i.i, i32 0) + %vecext.i.i.i5.9.i.i.i = extractelement <32 x i32> %0, i64 9 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.9.i.i.i, i32 0) + %vecext.i.i.i5.10.i.i.i = extractelement <32 x i32> %0, i64 10 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.10.i.i.i, i32 0) + %vecext.i.i.i5.11.i.i.i = extractelement <32 x i32> %0, i64 11 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.11.i.i.i, i32 0) + %vecext.i.i.i5.12.i.i.i = extractelement <32 x i32> %0, i64 12 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.12.i.i.i, i32 0) + %vecext.i.i.i5.13.i.i.i = extractelement <32 x i32> %0, i64 13 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.13.i.i.i, i32 0) + %vecext.i.i.i5.14.i.i.i = extractelement <32 x i32> %0, i64 14 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.14.i.i.i, i32 0) + %vecext.i.i.i.i.i.i = extractelement <32 x i32> %0, i64 15 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i.i.i.i, i32 0) + %vecext.i.i.i5.i.i4.i = extractelement <32 x i32> %0, i64 16 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.i.i4.i, i32 0) + %vecext.i.i.i5.1.i.i5.i = extractelement <32 x i32> %0, i64 17 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.1.i.i5.i, i32 0) + %vecext.i.i.i5.2.i.i6.i = extractelement <32 x i32> %0, i64 18 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.2.i.i6.i, i32 0) + %vecext.i.i.i5.3.i.i7.i = extractelement <32 x i32> %0, i64 19 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.3.i.i7.i, i32 0) + %vecext.i.i.i5.4.i.i8.i = extractelement <32 x i32> %0, i64 20 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.4.i.i8.i, i32 0) + %vecext.i.i.i5.5.i.i9.i = extractelement <32 x i32> %0, i64 21 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.5.i.i9.i, i32 0) + %vecext.i.i.i5.6.i.i10.i = extractelement <32 x i32> %0, i64 22 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.6.i.i10.i, i32 0) + %vecext.i.i.i5.7.i.i11.i = extractelement <32 x i32> %0, i64 23 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.7.i.i11.i, i32 0) + %vecext.i.i.i5.8.i.i12.i = extractelement <32 x i32> %0, i64 24 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.8.i.i12.i, i32 0) + %vecext.i.i.i5.9.i.i13.i = extractelement <32 x i32> %0, i64 25 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.9.i.i13.i, i32 0) + %vecext.i.i.i5.10.i.i14.i = extractelement <32 x i32> %0, i64 26 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.10.i.i14.i, i32 0) + %vecext.i.i.i5.11.i.i15.i = extractelement <32 x i32> %0, i64 27 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.11.i.i15.i, i32 0) + %vecext.i.i.i5.12.i.i16.i = extractelement <32 x i32> %0, i64 28 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.12.i.i16.i, i32 0) + %vecext.i.i.i5.13.i.i17.i = extractelement <32 x i32> %0, i64 29 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.13.i.i17.i, i32 0) + %vecext.i.i.i5.14.i.i18.i = extractelement <32 x i32> %0, i64 30 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i5.14.i.i18.i, i32 0) + %vecext.i.i.i.i.i19.i = extractelement <32 x i32> %0, i64 31 + tail call void @llvm.aie2ps.put.ms(i32 %vecext.i.i.i.i.i19.i, i32 %tlast) + ret void +} + +declare { i32, i32 } @llvm.aie2ps.get.ss() +declare { i32, i32 } @llvm.aie2ps.get.ss.nb() +declare void @llvm.aie2ps.put.ms(i32, i32) +declare i32 @llvm.aie2ps.put.ms.nb(i32, i32)