@@ -575,6 +575,27 @@ static bool isCommutative(Instruction *I, Value *ValWithUses,
575575 return I->isCommutative();
576576}
577577
578+ /// Checks if the operand is commutative. In commutative operations, not all
579+ /// operands might commutable, e.g. for fmuladd only 2 first operands are
580+ /// commutable.
581+ static bool isCommutableOperand(Instruction *I, Value *ValWithUses, unsigned Op,
582+ bool IsCopyable = false) {
583+ assert(::isCommutative(I, ValWithUses, IsCopyable) &&
584+ "The instruction is not commutative.");
585+ if (isa<CmpInst>(I))
586+ return true;
587+ if (auto *BO = dyn_cast<BinaryOperator>(I)) {
588+ switch (BO->getOpcode()) {
589+ case Instruction::Sub:
590+ case Instruction::FSub:
591+ return true;
592+ default:
593+ break;
594+ }
595+ }
596+ return I->isCommutableOperand(Op);
597+ }
598+
578599/// This is a helper function to check whether \p I is commutative.
579600/// This is a convenience wrapper that calls the two-parameter version of
580601/// isCommutative with the same instruction for both parameters. This is
@@ -5328,13 +5349,14 @@ class slpvectorizer::BoUpSLP {
53285349 if (ScheduleCopyableDataMap.empty())
53295350 return false;
53305351 SmallDenseMap<TreeEntry *, unsigned> PotentiallyReorderedEntriesCount;
5331- SmallDenseMap<const TreeEntry *, unsigned> OrderedEntriesCount;
53325352 ArrayRef<TreeEntry *> Entries = SLP.getTreeEntries(User);
53335353 if (Entries.empty())
53345354 return false;
5355+ unsigned CurNumOps = 0;
53355356 for (const Use &U : User->operands()) {
53365357 if (U.get() != Op)
53375358 continue;
5359+ ++CurNumOps;
53385360 // Check all tree entries, if they have operands replaced by copyable
53395361 // data.
53405362 for (TreeEntry *TE : Entries) {
@@ -5367,27 +5389,43 @@ class slpvectorizer::BoUpSLP {
53675389 // Same applies even for non-commutative cmps, because we can invert
53685390 // their predicate potentially and, thus, reorder the operands.
53695391 bool IsCommutativeUser =
5370- ::isCommutative(User) ||
5371- ::isCommutative(TE->getMatchingMainOpOrAltOp(User), User);
5372- if (!IsCommutativeUser && !isa<CmpInst>(User)) {
5373- unsigned &OpCnt =
5374- OrderedEntriesCount.try_emplace(TE, 0).first->getSecond();
5392+ ::isCommutative(User) &&
5393+ ::isCommutableOperand(User, User, U.getOperandNo());
5394+ if (!IsCommutativeUser) {
5395+ Instruction *MainOp = TE->getMatchingMainOpOrAltOp(User);
5396+ IsCommutativeUser =
5397+ ::isCommutative(MainOp, User) &&
5398+ ::isCommutableOperand(MainOp, User, U.getOperandNo());
5399+ }
5400+ // The commutative user with the same operands can be safely
5401+ // considered as non-commutative, operands reordering does not change
5402+ // the semantics.
5403+ assert(
5404+ (!IsCommutativeUser ||
5405+ (((::isCommutative(User) &&
5406+ ::isCommutableOperand(User, User, 0) &&
5407+ ::isCommutableOperand(User, User, 1)) ||
5408+ (::isCommutative(TE->getMatchingMainOpOrAltOp(User), User) &&
5409+ ::isCommutableOperand(TE->getMatchingMainOpOrAltOp(User),
5410+ User, 0) &&
5411+ ::isCommutableOperand(TE->getMatchingMainOpOrAltOp(User),
5412+ User, 1))))) &&
5413+ "Expected commutative user with 2 first commutable operands");
5414+ bool IsCommutativeWithSameOps =
5415+ IsCommutativeUser && User->getOperand(0) == User->getOperand(1);
5416+ if ((!IsCommutativeUser || IsCommutativeWithSameOps) &&
5417+ !isa<CmpInst>(User)) {
53755418 EdgeInfo EI(TE, U.getOperandNo());
5376- if (! getScheduleCopyableData(EI, Op))
5419+ if (CurNumOps != NumOps || getScheduleCopyableData(EI, Op))
53775420 continue;
5378- // Found copyable operand - continue.
5379- OpCnt += Inc;
5380- continue;
5421+ return false;
53815422 }
53825423 PotentiallyReorderedEntriesCount.try_emplace(TE, 0)
53835424 .first->getSecond() += Inc;
53845425 }
53855426 }
53865427 if (PotentiallyReorderedEntriesCount.empty())
5387- return all_of(OrderedEntriesCount,
5388- [&](const std::pair<const TreeEntry *, unsigned> &P) {
5389- return P.second == NumOps;
5390- });
5428+ return true;
53915429 // Check the commutative/cmp entries.
53925430 for (auto &P : PotentiallyReorderedEntriesCount) {
53935431 SmallPtrSet<Value *, 4> ParentsUniqueUsers;
@@ -5433,10 +5471,6 @@ class slpvectorizer::BoUpSLP {
54335471 return all_of(PotentiallyReorderedEntriesCount,
54345472 [&](const std::pair<const TreeEntry *, unsigned> &P) {
54355473 return P.second == NumOps - 1;
5436- }) &&
5437- all_of(OrderedEntriesCount,
5438- [&](const std::pair<const TreeEntry *, unsigned> &P) {
5439- return P.second == NumOps;
54405474 });
54415475 }
54425476
@@ -5647,25 +5681,29 @@ class slpvectorizer::BoUpSLP {
56475681 auto It = OperandsUses.find(I);
56485682 assert(It != OperandsUses.end() && "Operand not found");
56495683 if (It->second > 0) {
5650- --It->getSecond();
5651- assert(TotalOpCount > 0 && "No more operands to decrement");
5652- --TotalOpCount;
56535684 if (ScheduleData *OpSD = getScheduleData(I)) {
56545685 if (!Checked.insert(std::make_pair(OpSD, OpIdx)).second)
56555686 return;
5687+ --It->getSecond();
5688+ assert(TotalOpCount > 0 && "No more operands to decrement");
5689+ --TotalOpCount;
56565690 DecrUnsched(OpSD, /*IsControl=*/false);
5691+ } else {
5692+ --It->getSecond();
5693+ assert(TotalOpCount > 0 && "No more operands to decrement");
5694+ --TotalOpCount;
56575695 }
56585696 }
56595697 };
56605698
5699+ SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
56615700 for (ScheduleBundle *Bundle : Bundles) {
56625701 if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0)
56635702 break;
56645703 SmallPtrSet<Value *, 4> ParentsUniqueUsers;
56655704 // Need to search for the lane since the tree entry can be
56665705 // reordered.
56675706 auto *It = find(Bundle->getTreeEntry()->Scalars, In);
5668- SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
56695707 bool IsNonSchedulableWithParentPhiNode =
56705708 Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
56715709 Bundle->getTreeEntry()->UserTreeIndex &&
@@ -10876,7 +10914,9 @@ class InstructionsCompatibilityAnalysis {
1087610914 Opcode == Instruction::LShr || Opcode == Instruction::Shl ||
1087710915 Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
1087810916 Opcode == Instruction::And || Opcode == Instruction::Or ||
10879- Opcode == Instruction::Xor;
10917+ Opcode == Instruction::Xor || Opcode == Instruction::FAdd ||
10918+ Opcode == Instruction::FSub || Opcode == Instruction::FMul ||
10919+ Opcode == Instruction::FDiv;
1088010920 }
1088110921
1088210922 /// Identifies the best candidate value, which represents main opcode
@@ -11217,6 +11257,10 @@ class InstructionsCompatibilityAnalysis {
1121711257 case Instruction::And:
1121811258 case Instruction::Or:
1121911259 case Instruction::Xor:
11260+ case Instruction::FAdd:
11261+ case Instruction::FMul:
11262+ case Instruction::FSub:
11263+ case Instruction::FDiv:
1122011264 VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
1122111265 break;
1122211266 default:
0 commit comments