diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt index 6c079517e22d6..9091cc0b3f6a5 100644 --- a/llvm/lib/Target/DirectX/CMakeLists.txt +++ b/llvm/lib/Target/DirectX/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_target(DirectXCodeGen DXILForwardHandleAccesses.cpp DXILFlattenArrays.cpp DXILIntrinsicExpansion.cpp + DXILMemIntrinsics.cpp DXILOpBuilder.cpp DXILOpLowering.cpp DXILPostOptimizationValidation.cpp @@ -37,7 +38,7 @@ add_llvm_target(DirectXCodeGen DXILTranslateMetadata.cpp DXILRootSignature.cpp DXILLegalizePass.cpp - + LINK_COMPONENTS Analysis AsmPrinter diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp index 3427968d199f6..71e52d608c8bb 100644 --- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp +++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp @@ -269,8 +269,8 @@ static bool upcastI8AllocasAndUses(Instruction &I, if (CastInst *Cast = dyn_cast(LU)) Ty = Cast->getType(); else if (CallInst *CI = dyn_cast(LU)) { - if (CI->getIntrinsicID() == Intrinsic::memset) - Ty = Type::getInt32Ty(CI->getContext()); + assert(CI->getIntrinsicID() != Intrinsic::memset && + "memset should have been eliminated in an earlier pass"); } if (!Ty) @@ -346,168 +346,6 @@ downcastI64toI32InsertExtractElements(Instruction &I, return false; } -static void emitMemcpyExpansion(IRBuilder<> &Builder, Value *Dst, Value *Src, - ConstantInt *Length) { - - uint64_t ByteLength = Length->getZExtValue(); - // If length to copy is zero, no memcpy is needed. - if (ByteLength == 0) - return; - - const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); - - auto GetArrTyFromVal = [](Value *Val) -> ArrayType * { - assert(isa(Val) || - isa(Val) && - "Expected Val to be an Alloca or Global Variable"); - if (auto *Alloca = dyn_cast(Val)) - return dyn_cast(Alloca->getAllocatedType()); - if (auto *GlobalVar = dyn_cast(Val)) - return dyn_cast(GlobalVar->getValueType()); - return nullptr; - }; - - ArrayType *DstArrTy = GetArrTyFromVal(Dst); - assert(DstArrTy && "Expected Dst of memcpy to be a Pointer to an Array Type"); - if (auto *DstGlobalVar = dyn_cast(Dst)) - assert(!DstGlobalVar->isConstant() && - "The Dst of memcpy must not be a constant Global Variable"); - [[maybe_unused]] ArrayType *SrcArrTy = GetArrTyFromVal(Src); - assert(SrcArrTy && "Expected Src of memcpy to be a Pointer to an Array Type"); - - Type *DstElemTy = DstArrTy->getElementType(); - uint64_t DstElemByteSize = DL.getTypeStoreSize(DstElemTy); - assert(DstElemByteSize > 0 && "Dst element type store size must be set"); - Type *SrcElemTy = SrcArrTy->getElementType(); - [[maybe_unused]] uint64_t SrcElemByteSize = DL.getTypeStoreSize(SrcElemTy); - assert(SrcElemByteSize > 0 && "Src element type store size must be set"); - - // This assumption simplifies implementation and covers currently-known - // use-cases for DXIL. It may be relaxed in the future if required. - assert(DstElemTy == SrcElemTy && - "The element types of Src and Dst arrays must match"); - - [[maybe_unused]] uint64_t DstArrNumElems = DstArrTy->getArrayNumElements(); - assert(DstElemByteSize * DstArrNumElems >= ByteLength && - "Dst array size must be at least as large as the memcpy length"); - [[maybe_unused]] uint64_t SrcArrNumElems = SrcArrTy->getArrayNumElements(); - assert(SrcElemByteSize * SrcArrNumElems >= ByteLength && - "Src array size must be at least as large as the memcpy length"); - - uint64_t NumElemsToCopy = ByteLength / DstElemByteSize; - assert(ByteLength % DstElemByteSize == 0 && - "memcpy length must be divisible by array element type"); - for (uint64_t I = 0; I < NumElemsToCopy; ++I) { - SmallVector Indices = {Builder.getInt32(0), - Builder.getInt32(I)}; - Value *SrcPtr = Builder.CreateInBoundsGEP(SrcArrTy, Src, Indices, "gep"); - Value *SrcVal = Builder.CreateLoad(SrcElemTy, SrcPtr); - Value *DstPtr = Builder.CreateInBoundsGEP(DstArrTy, Dst, Indices, "gep"); - Builder.CreateStore(SrcVal, DstPtr); - } -} - -static void emitMemsetExpansion(IRBuilder<> &Builder, Value *Dst, Value *Val, - ConstantInt *SizeCI, - DenseMap &ReplacedValues) { - [[maybe_unused]] const DataLayout &DL = - Builder.GetInsertBlock()->getModule()->getDataLayout(); - [[maybe_unused]] uint64_t OrigSize = SizeCI->getZExtValue(); - - AllocaInst *Alloca = dyn_cast(Dst); - - assert(Alloca && "Expected memset on an Alloca"); - assert(OrigSize == Alloca->getAllocationSize(DL)->getFixedValue() && - "Expected for memset size to match DataLayout size"); - - Type *AllocatedTy = Alloca->getAllocatedType(); - ArrayType *ArrTy = dyn_cast(AllocatedTy); - assert(ArrTy && "Expected Alloca for an Array Type"); - - Type *ElemTy = ArrTy->getElementType(); - uint64_t Size = ArrTy->getArrayNumElements(); - - [[maybe_unused]] uint64_t ElemSize = DL.getTypeStoreSize(ElemTy); - - assert(ElemSize > 0 && "Size must be set"); - assert(OrigSize == ElemSize * Size && "Size in bytes must match"); - - Value *TypedVal = Val; - - if (Val->getType() != ElemTy) { - if (ReplacedValues[Val]) { - // Note for i8 replacements if we know them we should use them. - // Further if this is a constant ReplacedValues will return null - // so we will stick to TypedVal = Val - TypedVal = ReplacedValues[Val]; - - } else { - // This case Val is a ConstantInt so the cast folds away. - // However if we don't do the cast the store below ends up being - // an i8. - TypedVal = Builder.CreateIntCast(Val, ElemTy, false); - } - } - - for (uint64_t I = 0; I < Size; ++I) { - Value *Zero = Builder.getInt32(0); - Value *Offset = Builder.getInt32(I); - Value *Ptr = Builder.CreateGEP(ArrTy, Dst, {Zero, Offset}, "gep"); - Builder.CreateStore(TypedVal, Ptr); - } -} - -// Expands the instruction `I` into corresponding loads and stores if it is a -// memcpy call. In that case, the call instruction is added to the `ToRemove` -// vector. `ReplacedValues` is unused. -static bool legalizeMemCpy(Instruction &I, - SmallVectorImpl &ToRemove, - DenseMap &ReplacedValues) { - - CallInst *CI = dyn_cast(&I); - if (!CI) - return false; - - Intrinsic::ID ID = CI->getIntrinsicID(); - if (ID != Intrinsic::memcpy) - return false; - - IRBuilder<> Builder(&I); - Value *Dst = CI->getArgOperand(0); - Value *Src = CI->getArgOperand(1); - ConstantInt *Length = dyn_cast(CI->getArgOperand(2)); - assert(Length && "Expected Length to be a ConstantInt"); - [[maybe_unused]] ConstantInt *IsVolatile = - dyn_cast(CI->getArgOperand(3)); - assert(IsVolatile && "Expected IsVolatile to be a ConstantInt"); - assert(IsVolatile->getZExtValue() == 0 && "Expected IsVolatile to be false"); - emitMemcpyExpansion(Builder, Dst, Src, Length); - ToRemove.push_back(CI); - return true; -} - -static bool legalizeMemSet(Instruction &I, - SmallVectorImpl &ToRemove, - DenseMap &ReplacedValues) { - - CallInst *CI = dyn_cast(&I); - if (!CI) - return false; - - Intrinsic::ID ID = CI->getIntrinsicID(); - if (ID != Intrinsic::memset) - return false; - - IRBuilder<> Builder(&I); - Value *Dst = CI->getArgOperand(0); - Value *Val = CI->getArgOperand(1); - ConstantInt *Size = dyn_cast(CI->getArgOperand(2)); - assert(Size && "Expected Size to be a ConstantInt"); - emitMemsetExpansion(Builder, Dst, Val, Size, ReplacedValues); - ToRemove.push_back(CI); - return true; -} - static bool updateFnegToFsub(Instruction &I, SmallVectorImpl &ToRemove, DenseMap &) { @@ -660,8 +498,6 @@ class DXILLegalizationPipeline { LegalizationPipeline[Stage1].push_back(fixI8UseChain); LegalizationPipeline[Stage1].push_back(legalizeGetHighLowi64Bytes); LegalizationPipeline[Stage1].push_back(legalizeFreeze); - LegalizationPipeline[Stage1].push_back(legalizeMemCpy); - LegalizationPipeline[Stage1].push_back(legalizeMemSet); LegalizationPipeline[Stage1].push_back(updateFnegToFsub); // Note: legalizeGetHighLowi64Bytes and // downcastI64toI32InsertExtractElements both modify extractelement, so they diff --git a/llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp b/llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp new file mode 100644 index 0000000000000..2542fb315f89a --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp @@ -0,0 +1,188 @@ +//===- DXILMemIntrinsics.cpp - Eliminate Memory Intrinsics ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DXILMemIntrinsics.h" +#include "DirectX.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" + +#define DEBUG_TYPE "dxil-mem-intrinsics" + +using namespace llvm; + +void expandMemSet(MemSetInst *MemSet) { + IRBuilder<> Builder(MemSet); + Value *Dst = MemSet->getDest(); + Value *Val = MemSet->getValue(); + ConstantInt *LengthCI = dyn_cast(MemSet->getLength()); + assert(LengthCI && "Expected length to be a ConstantInt"); + + [[maybe_unused]] const DataLayout &DL = + Builder.GetInsertBlock()->getModule()->getDataLayout(); + [[maybe_unused]] uint64_t OrigLength = LengthCI->getZExtValue(); + + AllocaInst *Alloca = dyn_cast(Dst); + + assert(Alloca && "Expected memset on an Alloca"); + assert(OrigLength == Alloca->getAllocationSize(DL)->getFixedValue() && + "Expected for memset size to match DataLayout size"); + + Type *AllocatedTy = Alloca->getAllocatedType(); + ArrayType *ArrTy = dyn_cast(AllocatedTy); + assert(ArrTy && "Expected Alloca for an Array Type"); + + Type *ElemTy = ArrTy->getElementType(); + uint64_t Size = ArrTy->getArrayNumElements(); + + [[maybe_unused]] uint64_t ElemSize = DL.getTypeStoreSize(ElemTy); + + assert(ElemSize > 0 && "Size must be set"); + assert(OrigLength == ElemSize * Size && "Size in bytes must match"); + + Value *TypedVal = Val; + + if (Val->getType() != ElemTy) + TypedVal = Builder.CreateIntCast(Val, ElemTy, false); + + for (uint64_t I = 0; I < Size; ++I) { + Value *Zero = Builder.getInt32(0); + Value *Offset = Builder.getInt32(I); + Value *Ptr = Builder.CreateGEP(ArrTy, Dst, {Zero, Offset}, "gep"); + Builder.CreateStore(TypedVal, Ptr); + } + + MemSet->eraseFromParent(); +} + +void expandMemCpy(MemCpyInst *MemCpy) { + IRBuilder<> Builder(MemCpy); + Value *Dst = MemCpy->getDest(); + Value *Src = MemCpy->getSource(); + ConstantInt *LengthCI = dyn_cast(MemCpy->getLength()); + assert(LengthCI && "Expected Length to be a ConstantInt"); + assert(!MemCpy->isVolatile() && "Handling for volatile not implemented"); + + uint64_t ByteLength = LengthCI->getZExtValue(); + // If length to copy is zero, no memcpy is needed. + if (ByteLength == 0) + return; + + const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); + + auto GetArrTyFromVal = [](Value *Val) -> ArrayType * { + assert(isa(Val) || + isa(Val) && + "Expected Val to be an Alloca or Global Variable"); + if (auto *Alloca = dyn_cast(Val)) + return dyn_cast(Alloca->getAllocatedType()); + if (auto *GlobalVar = dyn_cast(Val)) + return dyn_cast(GlobalVar->getValueType()); + return nullptr; + }; + + ArrayType *DstArrTy = GetArrTyFromVal(Dst); + assert(DstArrTy && "Expected Dst of memcpy to be a Pointer to an Array Type"); + if (auto *DstGlobalVar = dyn_cast(Dst)) + assert(!DstGlobalVar->isConstant() && + "The Dst of memcpy must not be a constant Global Variable"); + [[maybe_unused]] ArrayType *SrcArrTy = GetArrTyFromVal(Src); + assert(SrcArrTy && "Expected Src of memcpy to be a Pointer to an Array Type"); + + Type *DstElemTy = DstArrTy->getElementType(); + uint64_t DstElemByteSize = DL.getTypeStoreSize(DstElemTy); + assert(DstElemByteSize > 0 && "Dst element type store size must be set"); + Type *SrcElemTy = SrcArrTy->getElementType(); + [[maybe_unused]] uint64_t SrcElemByteSize = DL.getTypeStoreSize(SrcElemTy); + assert(SrcElemByteSize > 0 && "Src element type store size must be set"); + + // This assumption simplifies implementation and covers currently-known + // use-cases for DXIL. It may be relaxed in the future if required. + assert(DstElemTy == SrcElemTy && + "The element types of Src and Dst arrays must match"); + + [[maybe_unused]] uint64_t DstArrNumElems = DstArrTy->getArrayNumElements(); + assert(DstElemByteSize * DstArrNumElems >= ByteLength && + "Dst array size must be at least as large as the memcpy length"); + [[maybe_unused]] uint64_t SrcArrNumElems = SrcArrTy->getArrayNumElements(); + assert(SrcElemByteSize * SrcArrNumElems >= ByteLength && + "Src array size must be at least as large as the memcpy length"); + + uint64_t NumElemsToCopy = ByteLength / DstElemByteSize; + assert(ByteLength % DstElemByteSize == 0 && + "memcpy length must be divisible by array element type"); + for (uint64_t I = 0; I < NumElemsToCopy; ++I) { + SmallVector Indices = {Builder.getInt32(0), + Builder.getInt32(I)}; + Value *SrcPtr = Builder.CreateInBoundsGEP(SrcArrTy, Src, Indices, "gep"); + Value *SrcVal = Builder.CreateLoad(SrcElemTy, SrcPtr); + Value *DstPtr = Builder.CreateInBoundsGEP(DstArrTy, Dst, Indices, "gep"); + Builder.CreateStore(SrcVal, DstPtr); + } + + MemCpy->eraseFromParent(); +} + +void expandMemMove(MemMoveInst *MemMove) { + report_fatal_error("memmove expansion is not implemented yet."); +} + +static bool eliminateMemIntrinsics(Module &M) { + bool HadMemIntrinsicUses = false; + for (auto &F : make_early_inc_range(M.functions())) { + Intrinsic::ID IID = F.getIntrinsicID(); + switch (IID) { + case Intrinsic::memcpy: + case Intrinsic::memcpy_inline: + case Intrinsic::memmove: + case Intrinsic::memset: + case Intrinsic::memset_inline: + break; + default: + continue; + } + for (User *U : make_early_inc_range(F.users())) { + HadMemIntrinsicUses = true; + if (auto *MemSet = dyn_cast(U)) + expandMemSet(MemSet); + else if (auto *MemCpy = dyn_cast(U)) + expandMemCpy(MemCpy); + else if (auto *MemMove = dyn_cast(U)) + expandMemMove(MemMove); + else + llvm_unreachable("Unhandled memory intrinsic"); + } + assert(F.user_empty() && "Mem intrinsic not eliminated?"); + F.eraseFromParent(); + } + return HadMemIntrinsicUses; +} + +PreservedAnalyses DXILMemIntrinsics::run(Module &M, ModuleAnalysisManager &) { + if (eliminateMemIntrinsics(M)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + +class DXILMemIntrinsicsLegacy : public ModulePass { +public: + bool runOnModule(Module &M) override { return eliminateMemIntrinsics(M); } + DXILMemIntrinsicsLegacy() : ModulePass(ID) {} + + static char ID; // Pass identification. +}; +char DXILMemIntrinsicsLegacy::ID = 0; + +INITIALIZE_PASS_BEGIN(DXILMemIntrinsicsLegacy, DEBUG_TYPE, + "DXIL Memory Intrinsic Elimination", false, false) +INITIALIZE_PASS_END(DXILMemIntrinsicsLegacy, DEBUG_TYPE, + "DXIL Memory Intrinsic Elimination", false, false) + +ModulePass *llvm::createDXILMemIntrinsicsLegacyPass() { + return new DXILMemIntrinsicsLegacy(); +} diff --git a/llvm/lib/Target/DirectX/DXILMemIntrinsics.h b/llvm/lib/Target/DirectX/DXILMemIntrinsics.h new file mode 100644 index 0000000000000..46f105026d909 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILMemIntrinsics.h @@ -0,0 +1,25 @@ +//===- DXILMemIntrinsics.h - Eliminate Memory Intrinsics -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_DIRECTX_DXILMEMINTRINSICS_H +#define LLVM_TARGET_DIRECTX_DXILMEMINTRINSICS_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// Transform all llvm memory intrinsics to explicit loads and stores. +class DXILMemIntrinsics : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); +}; + +} // namespace llvm + +#endif // LLVM_TARGET_DIRECTX_DXILMEMINTRINSICS_H diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h index e31c2ffa4f761..dfeb1ab12665d 100644 --- a/llvm/lib/Target/DirectX/DirectX.h +++ b/llvm/lib/Target/DirectX/DirectX.h @@ -66,6 +66,12 @@ void initializeDXILLegalizeLegacyPass(PassRegistry &); /// elements FunctionPass *createDXILLegalizeLegacyPass(); +/// Initializer for DXIL Mem Intrinsics. +void initializeDXILMemIntrinsicsLegacyPass(PassRegistry &); + +/// Pass to transform all llvm memory intrinsics to explicit loads and stores. +ModulePass *createDXILMemIntrinsicsLegacyPass(); + /// Initializer for DXILOpLowering void initializeDXILOpLoweringLegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/DirectX/DirectXPassRegistry.def b/llvm/lib/Target/DirectX/DirectXPassRegistry.def index b4b48a166800e..f594546f98901 100644 --- a/llvm/lib/Target/DirectX/DirectXPassRegistry.def +++ b/llvm/lib/Target/DirectX/DirectXPassRegistry.def @@ -28,6 +28,7 @@ MODULE_PASS("dxil-finalize-linkage", DXILFinalizeLinkage()) MODULE_PASS("dxil-data-scalarization", DXILDataScalarization()) MODULE_PASS("dxil-flatten-arrays", DXILFlattenArrays()) MODULE_PASS("dxil-intrinsic-expansion", DXILIntrinsicExpansion()) +MODULE_PASS("dxil-mem-intrinsics", DXILMemIntrinsics()) MODULE_PASS("dxil-op-lower", DXILOpLowering()) MODULE_PASS("dxil-pretty-printer", DXILPrettyPrinterPass(dbgs())) MODULE_PASS("dxil-translate-metadata", DXILTranslateMetadata()) diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index fae9cbf9832fe..c0a92f92e1fba 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -19,6 +19,7 @@ #include "DXILForwardHandleAccesses.h" #include "DXILIntrinsicExpansion.h" #include "DXILLegalizePass.h" +#include "DXILMemIntrinsics.h" #include "DXILOpLowering.h" #include "DXILPostOptimizationValidation.h" #include "DXILPrettyPrinter.h" @@ -58,6 +59,7 @@ LLVMInitializeDirectXTarget() { RegisterTargetMachine X(getTheDirectXTarget()); auto *PR = PassRegistry::getPassRegistry(); initializeDXILIntrinsicExpansionLegacyPass(*PR); + initializeDXILMemIntrinsicsLegacyPass(*PR); initializeDXILDataScalarizationLegacyPass(*PR); initializeDXILFlattenArraysLegacyPass(*PR); initializeScalarizerLegacyPassPass(*PR); @@ -110,6 +112,7 @@ class DirectXPassConfig : public TargetPassConfig { void addCodeGenPrepare() override { addPass(createDXILFinalizeLinkageLegacyPass()); addPass(createGlobalDCEPass()); + addPass(createDXILMemIntrinsicsLegacyPass()); addPass(createDXILCBufferAccessLegacyPass()); addPass(createDXILResourceAccessLegacyPass()); addPass(createDXILIntrinsicExpansionLegacyPass()); diff --git a/llvm/test/CodeGen/DirectX/legalize-memcpy.ll b/llvm/test/CodeGen/DirectX/MemIntrinsics/memcpy.ll similarity index 98% rename from llvm/test/CodeGen/DirectX/legalize-memcpy.ll rename to llvm/test/CodeGen/DirectX/MemIntrinsics/memcpy.ll index 55bdefc12aa77..c3acd757e2180 100644 --- a/llvm/test/CodeGen/DirectX/legalize-memcpy.ll +++ b/llvm/test/CodeGen/DirectX/MemIntrinsics/memcpy.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -dxil-legalize -dxil-finalize-linkage -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -dxil-mem-intrinsics -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s define void @replace_int_memcpy_test() #0 { ; CHECK-LABEL: define void @replace_int_memcpy_test( diff --git a/llvm/test/CodeGen/DirectX/legalize-memset.ll b/llvm/test/CodeGen/DirectX/MemIntrinsics/memset.ll similarity index 92% rename from llvm/test/CodeGen/DirectX/legalize-memset.ll rename to llvm/test/CodeGen/DirectX/MemIntrinsics/memset.ll index ad45ac67cda00..fcb9899506167 100644 --- a/llvm/test/CodeGen/DirectX/legalize-memset.ll +++ b/llvm/test/CodeGen/DirectX/MemIntrinsics/memset.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -dxil-legalize -dxil-finalize-linkage -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -dxil-mem-intrinsics -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s define void @replace_float_memset_test() #0 { ; CHECK-LABEL: define void @replace_float_memset_test( @@ -78,9 +78,10 @@ define void @replace_int_memset_to_var_test() #0 { ; CHECK-LABEL: define void @replace_int_memset_to_var_test( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [1 x i32], align 4 -; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 1, ptr [[I]], align 4 -; CHECK-NEXT: [[I8_LOAD:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[I:%.*]] = alloca i8, align 4 +; CHECK-NEXT: store i8 1, ptr [[I]], align 1 +; CHECK-NEXT: [[I8_LOAD1:%.*]] = load i8, ptr [[I]], align 1 +; CHECK-NEXT: [[I8_LOAD:%.*]] = zext i8 [[I8_LOAD1]] to i32 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr [1 x i32], ptr [[ACCUM_I_FLAT]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[I8_LOAD]], ptr [[GEP]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index 3933ef8a5a62b..0af783a778812 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -16,6 +16,7 @@ ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: DXIL Finalize Linkage ; CHECK-NEXT: Dead Global Elimination +; CHECK-NEXT: DXIL Memory Intrinsic Elimination ; CHECK-NEXT: DXIL CBuffer Access ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: DXIL Resource Access