Skip to content

Commit 42ea774

Browse files
committed
[SLP]Enable float point math ops as copyables elements.
Patch enables support for float point math operations as base instructions for copyable elements. It also fixes some scheduling issues, found during testing Reviewers: hiraditya, RKSimon Pull Request: llvm#169857 Recommit after reverts in 9008922 and c244168. c244168 was caused by other issues, not related to this patch directly
1 parent 60189b3 commit 42ea774

21 files changed

+351
-344
lines changed

llvm/include/llvm/IR/Instruction.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,12 @@ class Instruction : public User,
762762
/// applied to any type.
763763
///
764764
LLVM_ABI bool isCommutative() const LLVM_READONLY;
765+
766+
/// Checks if the operand is commutative. In commutative operations, not all
767+
/// operands might commutable, e.g. for fmuladd only 2 first operands are
768+
/// commutable.
769+
LLVM_ABI bool isCommutableOperand(unsigned Op) const LLVM_READONLY;
770+
765771
static bool isCommutative(unsigned Opcode) {
766772
switch (Opcode) {
767773
case Add: case FAdd:

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ class IntrinsicInst : public CallInst {
101101
}
102102
}
103103

104+
/// Return true if the operand is commutable.
105+
bool isCommutableOperand(unsigned Op) const {
106+
constexpr unsigned NumCommutativeOps = 2;
107+
return isCommutative() && Op < NumCommutativeOps;
108+
}
109+
104110
/// Checks if the intrinsic is an annotation.
105111
bool isAssumeLikeIntrinsic() const {
106112
switch (getIntrinsicID()) {

llvm/lib/IR/Instruction.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,6 +1293,13 @@ bool Instruction::isCommutative() const {
12931293
return isCommutative(getOpcode());
12941294
}
12951295

1296+
bool Instruction::isCommutableOperand(unsigned Op) const {
1297+
if (auto *II = dyn_cast<IntrinsicInst>(this))
1298+
return II->isCommutableOperand(Op);
1299+
// TODO: Should allow icmp/fcmp?
1300+
return isCommutative(getOpcode());
1301+
}
1302+
12961303
unsigned Instruction::getNumSuccessors() const {
12971304
switch (getOpcode()) {
12981305
#define HANDLE_TERM_INST(N, OPC, CLASS) \

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 67 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,27 @@ static bool isCommutative(Instruction *I, Value *ValWithUses,
575575
return I->isCommutative();
576576
}
577577

578+
/// Checks if the operand is commutative. In commutative operations, not all
579+
/// operands might commutable, e.g. for fmuladd only 2 first operands are
580+
/// commutable.
581+
static bool isCommutableOperand(Instruction *I, Value *ValWithUses, unsigned Op,
582+
bool IsCopyable = false) {
583+
assert(::isCommutative(I, ValWithUses, IsCopyable) &&
584+
"The instruction is not commutative.");
585+
if (isa<CmpInst>(I))
586+
return true;
587+
if (auto *BO = dyn_cast<BinaryOperator>(I)) {
588+
switch (BO->getOpcode()) {
589+
case Instruction::Sub:
590+
case Instruction::FSub:
591+
return true;
592+
default:
593+
break;
594+
}
595+
}
596+
return I->isCommutableOperand(Op);
597+
}
598+
578599
/// This is a helper function to check whether \p I is commutative.
579600
/// This is a convenience wrapper that calls the two-parameter version of
580601
/// isCommutative with the same instruction for both parameters. This is
@@ -5328,13 +5349,14 @@ class slpvectorizer::BoUpSLP {
53285349
if (ScheduleCopyableDataMap.empty())
53295350
return false;
53305351
SmallDenseMap<TreeEntry *, unsigned> PotentiallyReorderedEntriesCount;
5331-
SmallDenseMap<const TreeEntry *, unsigned> OrderedEntriesCount;
53325352
ArrayRef<TreeEntry *> Entries = SLP.getTreeEntries(User);
53335353
if (Entries.empty())
53345354
return false;
5355+
unsigned CurNumOps = 0;
53355356
for (const Use &U : User->operands()) {
53365357
if (U.get() != Op)
53375358
continue;
5359+
++CurNumOps;
53385360
// Check all tree entries, if they have operands replaced by copyable
53395361
// data.
53405362
for (TreeEntry *TE : Entries) {
@@ -5367,27 +5389,43 @@ class slpvectorizer::BoUpSLP {
53675389
// Same applies even for non-commutative cmps, because we can invert
53685390
// their predicate potentially and, thus, reorder the operands.
53695391
bool IsCommutativeUser =
5370-
::isCommutative(User) ||
5371-
::isCommutative(TE->getMatchingMainOpOrAltOp(User), User);
5372-
if (!IsCommutativeUser && !isa<CmpInst>(User)) {
5373-
unsigned &OpCnt =
5374-
OrderedEntriesCount.try_emplace(TE, 0).first->getSecond();
5392+
::isCommutative(User) &&
5393+
::isCommutableOperand(User, User, U.getOperandNo());
5394+
if (!IsCommutativeUser) {
5395+
Instruction *MainOp = TE->getMatchingMainOpOrAltOp(User);
5396+
IsCommutativeUser =
5397+
::isCommutative(MainOp, User) &&
5398+
::isCommutableOperand(MainOp, User, U.getOperandNo());
5399+
}
5400+
// The commutative user with the same operands can be safely
5401+
// considered as non-commutative, operands reordering does not change
5402+
// the semantics.
5403+
assert(
5404+
(!IsCommutativeUser ||
5405+
(((::isCommutative(User) &&
5406+
::isCommutableOperand(User, User, 0) &&
5407+
::isCommutableOperand(User, User, 1)) ||
5408+
(::isCommutative(TE->getMatchingMainOpOrAltOp(User), User) &&
5409+
::isCommutableOperand(TE->getMatchingMainOpOrAltOp(User),
5410+
User, 0) &&
5411+
::isCommutableOperand(TE->getMatchingMainOpOrAltOp(User),
5412+
User, 1))))) &&
5413+
"Expected commutative user with 2 first commutable operands");
5414+
bool IsCommutativeWithSameOps =
5415+
IsCommutativeUser && User->getOperand(0) == User->getOperand(1);
5416+
if ((!IsCommutativeUser || IsCommutativeWithSameOps) &&
5417+
!isa<CmpInst>(User)) {
53755418
EdgeInfo EI(TE, U.getOperandNo());
5376-
if (!getScheduleCopyableData(EI, Op))
5419+
if (CurNumOps != NumOps || getScheduleCopyableData(EI, Op))
53775420
continue;
5378-
// Found copyable operand - continue.
5379-
OpCnt += Inc;
5380-
continue;
5421+
return false;
53815422
}
53825423
PotentiallyReorderedEntriesCount.try_emplace(TE, 0)
53835424
.first->getSecond() += Inc;
53845425
}
53855426
}
53865427
if (PotentiallyReorderedEntriesCount.empty())
5387-
return all_of(OrderedEntriesCount,
5388-
[&](const std::pair<const TreeEntry *, unsigned> &P) {
5389-
return P.second == NumOps;
5390-
});
5428+
return true;
53915429
// Check the commutative/cmp entries.
53925430
for (auto &P : PotentiallyReorderedEntriesCount) {
53935431
SmallPtrSet<Value *, 4> ParentsUniqueUsers;
@@ -5433,10 +5471,6 @@ class slpvectorizer::BoUpSLP {
54335471
return all_of(PotentiallyReorderedEntriesCount,
54345472
[&](const std::pair<const TreeEntry *, unsigned> &P) {
54355473
return P.second == NumOps - 1;
5436-
}) &&
5437-
all_of(OrderedEntriesCount,
5438-
[&](const std::pair<const TreeEntry *, unsigned> &P) {
5439-
return P.second == NumOps;
54405474
});
54415475
}
54425476

@@ -5647,25 +5681,29 @@ class slpvectorizer::BoUpSLP {
56475681
auto It = OperandsUses.find(I);
56485682
assert(It != OperandsUses.end() && "Operand not found");
56495683
if (It->second > 0) {
5650-
--It->getSecond();
5651-
assert(TotalOpCount > 0 && "No more operands to decrement");
5652-
--TotalOpCount;
56535684
if (ScheduleData *OpSD = getScheduleData(I)) {
56545685
if (!Checked.insert(std::make_pair(OpSD, OpIdx)).second)
56555686
return;
5687+
--It->getSecond();
5688+
assert(TotalOpCount > 0 && "No more operands to decrement");
5689+
--TotalOpCount;
56565690
DecrUnsched(OpSD, /*IsControl=*/false);
5691+
} else {
5692+
--It->getSecond();
5693+
assert(TotalOpCount > 0 && "No more operands to decrement");
5694+
--TotalOpCount;
56575695
}
56585696
}
56595697
};
56605698

5699+
SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
56615700
for (ScheduleBundle *Bundle : Bundles) {
56625701
if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0)
56635702
break;
56645703
SmallPtrSet<Value *, 4> ParentsUniqueUsers;
56655704
// Need to search for the lane since the tree entry can be
56665705
// reordered.
56675706
auto *It = find(Bundle->getTreeEntry()->Scalars, In);
5668-
SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
56695707
bool IsNonSchedulableWithParentPhiNode =
56705708
Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
56715709
Bundle->getTreeEntry()->UserTreeIndex &&
@@ -10876,7 +10914,9 @@ class InstructionsCompatibilityAnalysis {
1087610914
Opcode == Instruction::LShr || Opcode == Instruction::Shl ||
1087710915
Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
1087810916
Opcode == Instruction::And || Opcode == Instruction::Or ||
10879-
Opcode == Instruction::Xor;
10917+
Opcode == Instruction::Xor || Opcode == Instruction::FAdd ||
10918+
Opcode == Instruction::FSub || Opcode == Instruction::FMul ||
10919+
Opcode == Instruction::FDiv;
1088010920
}
1088110921

1088210922
/// Identifies the best candidate value, which represents main opcode
@@ -11217,6 +11257,10 @@ class InstructionsCompatibilityAnalysis {
1121711257
case Instruction::And:
1121811258
case Instruction::Or:
1121911259
case Instruction::Xor:
11260+
case Instruction::FAdd:
11261+
case Instruction::FMul:
11262+
case Instruction::FSub:
11263+
case Instruction::FDiv:
1122011264
VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
1122111265
break;
1122211266
default:

llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ define void @p(double %0) {
1111
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], zeroinitializer
1212
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <2 x i32> <i32 1, i32 7>
1313
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> zeroinitializer, [[TMP5]]
14-
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
14+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
15+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> <double 1.000000e+00, double 1.000000e+00, double poison, double poison>, <4 x double> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
16+
; CHECK-NEXT: [[TMP10:%.*]] = fmul <4 x double> zeroinitializer, [[TMP9]]
1517
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP4]], zeroinitializer
16-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
17-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison>, <4 x double> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1818
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[TMP8]], [[TMP10]]
1919
; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x double> [[TMP11]], zeroinitializer
2020
; CHECK-NEXT: [[TMP13:%.*]] = fptosi <4 x double> [[TMP12]] to <4 x i32>

llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44
define void @test() {
55
; CHECK-LABEL: define void @test() {
66
; CHECK-NEXT: [[BB:.*]]:
7-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
8-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x float> <float poison, float 0.000000e+00, float poison, float poison>, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
97
; CHECK-NEXT: br label %[[BB1:.*]]
108
; CHECK: [[BB1]]:
119
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP9:%.*]], %[[BB1]] ]
12-
; CHECK-NEXT: [[FMUL:%.*]] = fmul float 0.000000e+00, 0.000000e+00
10+
; CHECK-NEXT: [[FMUL:%.*]] = sitofp i32 0 to float
11+
; CHECK-NEXT: [[SITOFP:%.*]] = sitofp i32 0 to float
12+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> <float poison, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[SITOFP]], i32 0
13+
; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP0]]
14+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, <4 x i32> <i32 0, i32 0, i32 poison, i32 7>
1315
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[FMUL]], i32 2
14-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
15-
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP0]], [[TMP3]]
16+
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
1617
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP4]], zeroinitializer
1718
; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[TMP5]], zeroinitializer
1819
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer

llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,36 +7,30 @@
77
define void @main(i1 %arg) {
88
; CHECK-LABEL: @main(
99
; CHECK-NEXT: entry:
10-
; CHECK-NEXT: br i1 %arg, label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
10+
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
1111
; CHECK: cond.true:
1212
; CHECK-NEXT: unreachable
1313
; CHECK: cond.end:
1414
; CHECK-NEXT: br label [[INVOKE_CONT:%.*]]
1515
; CHECK: invoke.cont:
16-
; CHECK-NEXT: br i1 %arg, label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]]
16+
; CHECK-NEXT: br i1 [[ARG]], label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]]
1717
; CHECK: arrayctor.cont:
1818
; CHECK-NEXT: [[AGG_TMP101211_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0
1919
; CHECK-NEXT: br label [[FOR_COND36_PREHEADER:%.*]]
2020
; CHECK: for.cond36.preheader:
21-
; CHECK-NEXT: br i1 %arg, label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]]
21+
; CHECK-NEXT: br i1 [[ARG]], label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]]
2222
; CHECK: cond.false51.us:
2323
; CHECK-NEXT: unreachable
2424
; CHECK: cond.true48.us:
25-
; CHECK-NEXT: br i1 %arg, label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]]
25+
; CHECK-NEXT: br i1 [[ARG]], label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]]
2626
; CHECK: cond.false66.us:
27-
; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, 0x3EB0C6F7A0B5ED8D
28-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> <double poison, double 0xBFA5CC2D1960285F>, double [[ADD_I276_US]], i32 0
29-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> <double 0.000000e+00, double 1.000000e-01>, [[TMP0]]
30-
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.400000e+02)
31-
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 5.000000e+01, double 5.200000e+01>
32-
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr undef, align 8
33-
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> <double 2.000000e-01, double 3.000000e-01>, [[TMP1]]
34-
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8
27+
; CHECK-NEXT: store <2 x double> <double 0x404900049667B5F2, double 0x404E0515D587DA7B>, ptr undef, align 8
28+
; CHECK-NEXT: store <2 x double> <double 2.000000e-07, double 0x3F91A436DC4B6CE6>, ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8
3529
; CHECK-NEXT: ret void
3630
; CHECK: cond.true63.us:
3731
; CHECK-NEXT: unreachable
3832
; CHECK: for.body42.lr.ph.us:
39-
; CHECK-NEXT: br i1 %arg, label [[COND_TRUE48_US:%.*]], label [[COND_FALSE51_US:%.*]]
33+
; CHECK-NEXT: br i1 [[ARG]], label [[COND_TRUE48_US:%.*]], label [[COND_FALSE51_US:%.*]]
4034
; CHECK: _Z5clampd.exit.1:
4135
; CHECK-NEXT: br label [[FOR_COND36_PREHEADER]]
4236
;
@@ -96,7 +90,7 @@ _Z5clampd.exit.1:
9690
define void @test(i1 %arg) {
9791
; CHECK-LABEL: @test(
9892
; CHECK-NEXT: entry:
99-
; CHECK-NEXT: br i1 %arg, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
93+
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
10094
; CHECK: if.then38:
10195
; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0
10296
; CHECK-NEXT: store <2 x double> <double 0x3FFA356C1D8A7F76, double 0x3FFDC4F38B38BEF4>, ptr [[AGG_TMP74663_SROA_0_0_IDX]], align 8

0 commit comments

Comments
 (0)