Skip to content

Commit c9eb572

Browse files
[LoopVectorize] Support vectorization of frexp intrinsic (llvm#172957)
This patch enables the vectorization of the llvm.frexp intrinsic. Following the suggestion in llvm#112408, frexp is moved from isTriviallyScalarizable to isTriviallyVectorizable. Fixes llvm#112408
1 parent 60e5b86 commit c9eb572

File tree

5 files changed

+98
-35
lines changed

5 files changed

+98
-35
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
8181
case Intrinsic::exp:
8282
case Intrinsic::exp10:
8383
case Intrinsic::exp2:
84+
case Intrinsic::frexp:
8485
case Intrinsic::ldexp:
8586
case Intrinsic::log:
8687
case Intrinsic::log10:
@@ -129,10 +130,7 @@ bool llvm::isTriviallyScalarizable(Intrinsic::ID ID,
129130
if (TTI && Intrinsic::isTargetIntrinsic(ID))
130131
return TTI->isTargetIntrinsicTriviallyScalarizable(ID);
131132

132-
// TODO: Move frexp to isTriviallyVectorizable.
133-
// https://github.com/llvm/llvm-project/issues/112408
134133
switch (ID) {
135-
case Intrinsic::frexp:
136134
case Intrinsic::uadd_with_overflow:
137135
case Intrinsic::sadd_with_overflow:
138136
case Intrinsic::ssub_with_overflow:

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -800,18 +800,6 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
800800
return Scalarize;
801801
}
802802

803-
/// Returns true if the call return type `Ty` can be widened by the loop
804-
/// vectorizer.
805-
static bool canWidenCallReturnType(Type *Ty) {
806-
auto *StructTy = dyn_cast<StructType>(Ty);
807-
// TODO: Remove the homogeneous types restriction. This is just an initial
808-
// simplification. When we want to support things like the overflow intrinsics
809-
// we will have to lift this restriction.
810-
if (StructTy && !StructTy->containsHomogeneousTypes())
811-
return false;
812-
return canVectorizeTy(StructTy);
813-
}
814-
815803
bool LoopVectorizationLegality::canVectorizeInstrs() {
816804
bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
817805
bool Result = true;
@@ -1026,7 +1014,7 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
10261014
// For now, we only recognize struct values returned from calls where
10271015
// all users are extractvalue as vectorizable. All element types of the
10281016
// struct must be types that can be widened.
1029-
return isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
1017+
return isa<CallInst>(Inst) && canVectorizeTy(InstTy) &&
10301018
all_of(Inst.users(), IsaPred<ExtractValueInst>);
10311019
};
10321020

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1695,8 +1695,11 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
16951695

16961696
SmallVector<Type *, 2> TysForDecl;
16971697
// Add return type if intrinsic is overloaded on it.
1698-
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
1699-
TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
1698+
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1,
1699+
State.TTI)) {
1700+
Type *RetTy = toVectorizedTy(getResultType(), State.VF);
1701+
append_range(TysForDecl, getContainedTypes(RetTy));
1702+
}
17001703
SmallVector<Value *, 4> Args;
17011704
for (const auto &I : enumerate(operands())) {
17021705
// Some intrinsics have a scalar argument - don't replace it with a

llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|modf|extract|store)" --version 5
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|frexp|modf|extract|store)" --version 5
22
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s -S -o - | FileCheck %s
33

44
define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@@ -277,3 +277,74 @@ exit:
277277
ret void
278278
}
279279

280+
define void @frexp_f32(ptr noalias %in, ptr noalias writeonly %out_mantissa, ptr noalias writeonly %out_exponent) {
281+
; CHECK-LABEL: define void @frexp_f32(
282+
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_MANTISSA:%.*]], ptr noalias writeonly [[OUT_EXPONENT:%.*]]) {
283+
; CHECK: [[ENTRY:.*:]]
284+
; CHECK: [[FOR_BODY:.*:]]
285+
; CHECK: [[EXIT:.*:]]
286+
; CHECK: [[TMP1:%.*]] = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> [[WIDE_LOAD:%.*]])
287+
; CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x i32> } [[TMP1]], 0
288+
; CHECK: [[TMP3:%.*]] = extractvalue { <2 x float>, <2 x i32> } [[TMP1]], 1
289+
; CHECK: store <2 x float> [[TMP2]], ptr [[TMP4:%.*]], align 4
290+
; CHECK: store <2 x i32> [[TMP3]], ptr [[TMP5:%.*]], align 4
291+
; CHECK: [[MIDDLE_BLOCK:.*:]]
292+
; CHECK: [[EXIT1:.*:]]
293+
;
294+
entry:
295+
br label %for.body
296+
297+
for.body:
298+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
299+
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
300+
%in_val = load float, ptr %arrayidx, align 4
301+
%call = tail call { float, i32 } @llvm.frexp.f32.i32(float %in_val)
302+
%mantissa = extractvalue { float, i32 } %call, 0
303+
%exponent = extractvalue { float, i32 } %call, 1
304+
%arrayidx2 = getelementptr inbounds float, ptr %out_mantissa, i64 %iv
305+
store float %mantissa, ptr %arrayidx2, align 4
306+
%arrayidx4 = getelementptr inbounds i32, ptr %out_exponent, i64 %iv
307+
store i32 %exponent, ptr %arrayidx4, align 4
308+
%iv.next = add nuw nsw i64 %iv, 1
309+
%exitcond.not = icmp eq i64 %iv.next, 1024
310+
br i1 %exitcond.not, label %exit, label %for.body
311+
312+
exit:
313+
ret void
314+
}
315+
316+
define void @frexp_f64(ptr noalias %in, ptr noalias writeonly %out_mantissa, ptr noalias writeonly %out_exponent) {
317+
; CHECK-LABEL: define void @frexp_f64(
318+
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_MANTISSA:%.*]], ptr noalias writeonly [[OUT_EXPONENT:%.*]]) {
319+
; CHECK: [[ENTRY:.*:]]
320+
; CHECK: [[FOR_BODY:.*:]]
321+
; CHECK: [[EXIT:.*:]]
322+
; CHECK: [[TMP1:%.*]] = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> [[WIDE_LOAD:%.*]])
323+
; CHECK: [[TMP2:%.*]] = extractvalue { <2 x double>, <2 x i32> } [[TMP1]], 0
324+
; CHECK: [[TMP3:%.*]] = extractvalue { <2 x double>, <2 x i32> } [[TMP1]], 1
325+
; CHECK: store <2 x double> [[TMP2]], ptr [[TMP4:%.*]], align 8
326+
; CHECK: store <2 x i32> [[TMP3]], ptr [[TMP5:%.*]], align 4
327+
; CHECK: [[MIDDLE_BLOCK:.*:]]
328+
; CHECK: [[EXIT1:.*:]]
329+
;
330+
entry:
331+
br label %for.body
332+
333+
for.body:
334+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
335+
%arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
336+
%in_val = load double, ptr %arrayidx, align 8
337+
%call = tail call { double, i32 } @llvm.frexp.f64.i32(double %in_val)
338+
%mantissa = extractvalue { double, i32 } %call, 0
339+
%exponent = extractvalue { double, i32 } %call, 1
340+
%arrayidx2 = getelementptr inbounds double, ptr %out_mantissa, i64 %iv
341+
store double %mantissa, ptr %arrayidx2, align 8
342+
%arrayidx4 = getelementptr inbounds i32, ptr %out_exponent, i64 %iv
343+
store i32 %exponent, ptr %arrayidx4, align 4
344+
%iv.next = add nuw nsw i64 %iv, 1
345+
%exitcond.not = icmp eq i64 %iv.next, 1024
346+
br i1 %exitcond.not, label %exit, label %for.body
347+
348+
exit:
349+
ret void
350+
}

llvm/test/Transforms/LoopVectorize/struct-return.ll

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -378,27 +378,30 @@ exit:
378378
ret void
379379
}
380380

381-
; Negative test. Widening structs with mixed element types is not supported.
382-
; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
383-
define void @negative_mixed_element_type_struct_return(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
384-
; CHECK-LABEL: define void @negative_mixed_element_type_struct_return(
381+
; CHECK-REMARKS: remark: {{.*}} vectorized loop (vectorization width: 2, interleaved count: 1)
382+
define void @mixed_element_type_struct_return(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
383+
; CHECK-LABEL: define void @mixed_element_type_struct_return(
385384
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) {
386-
; CHECK-NEXT: [[ENTRY:.*]]:
387-
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
388-
; CHECK: [[FOR_BODY]]:
389-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
385+
; CHECK-NEXT: [[ENTRY:.*:]]
386+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
387+
; CHECK: [[VECTOR_PH]]:
388+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
389+
; CHECK: [[VECTOR_BODY]]:
390+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[IV_NEXT:%.*]], %[[VECTOR_BODY]] ]
390391
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[IV]]
391-
; CHECK-NEXT: [[IN_VAL:%.*]] = load float, ptr [[ARRAYIDX]], align 4
392-
; CHECK-NEXT: [[CALL:%.*]] = tail call { float, i32 } @baz(float [[IN_VAL]]) #[[ATTR3:[0-9]+]]
393-
; CHECK-NEXT: [[EXTRACT_A:%.*]] = extractvalue { float, i32 } [[CALL]], 0
394-
; CHECK-NEXT: [[EXTRACT_B:%.*]] = extractvalue { float, i32 } [[CALL]], 1
392+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 4
393+
; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x float>, <2 x i32> } @fixed_vec_baz(<2 x float> [[WIDE_LOAD]])
394+
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x i32> } [[TMP1]], 0
395+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x float>, <2 x i32> } [[TMP1]], 1
395396
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[OUT_A]], i64 [[IV]]
396-
; CHECK-NEXT: store float [[EXTRACT_A]], ptr [[ARRAYIDX2]], align 4
397+
; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4
397398
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[OUT_B]], i64 [[IV]]
398-
; CHECK-NEXT: store i32 [[EXTRACT_B]], ptr [[ARRAYIDX4]], align 4
399-
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
399+
; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[ARRAYIDX4]], align 4
400+
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 2
400401
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
401-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
402+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
403+
; CHECK: [[MIDDLE_BLOCK]]:
404+
; CHECK-NEXT: br label %[[EXIT:.*]]
402405
; CHECK: [[EXIT]]:
403406
; CHECK-NEXT: ret void
404407
;

0 commit comments

Comments
 (0)