[LoopVectorize] Support vectorization of frexp intrinsic (llvm#172957)

Michael-Chen-NJU · web-flow · commit c9eb572b1417 · 2025-12-26T21:57:57.000Z
This patch enables the vectorization of the llvm.frexp intrinsic. Following the suggestion in llvm#112408, frexp is moved from isTriviallyScalarizable to isTriviallyVectorizable. Fixes llvm#112408
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
@@ -81,6 +81,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::exp:
   case Intrinsic::exp10:
   case Intrinsic::exp2:
+  case Intrinsic::frexp:
   case Intrinsic::ldexp:
   case Intrinsic::log:
   case Intrinsic::log10:
@@ -129,10 +130,7 @@ bool llvm::isTriviallyScalarizable(Intrinsic::ID ID,
   if (TTI && Intrinsic::isTargetIntrinsic(ID))
     return TTI->isTargetIntrinsicTriviallyScalarizable(ID);
 
-  // TODO: Move frexp to isTriviallyVectorizable.
-  // https://github.com/llvm/llvm-project/issues/112408
   switch (ID) {
-  case Intrinsic::frexp:
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -800,18 +800,6 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
   return Scalarize;
 }
 
-/// Returns true if the call return type `Ty` can be widened by the loop
-/// vectorizer.
-static bool canWidenCallReturnType(Type *Ty) {
-  auto *StructTy = dyn_cast<StructType>(Ty);
-  // TODO: Remove the homogeneous types restriction. This is just an initial
-  // simplification. When we want to support things like the overflow intrinsics
-  // we will have to lift this restriction.
-  if (StructTy && !StructTy->containsHomogeneousTypes())
-    return false;
-  return canVectorizeTy(StructTy);
-}
-
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
   bool Result = true;
@@ -1026,7 +1014,7 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
     // For now, we only recognize struct values returned from calls where
     // all users are extractvalue as vectorizable. All element types of the
     // struct must be types that can be widened.
-    return isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
+    return isa<CallInst>(Inst) && canVectorizeTy(InstTy) &&
            all_of(Inst.users(), IsaPred<ExtractValueInst>);
   };
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1695,8 +1695,11 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
 
   SmallVector<Type *, 2> TysForDecl;
   // Add return type if intrinsic is overloaded on it.
-  if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
-    TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
+  if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1,
+                                             State.TTI)) {
+    Type *RetTy = toVectorizedTy(getResultType(), State.VF);
+    append_range(TysForDecl, getContainedTypes(RetTy));
+  }
   SmallVector<Value *, 4> Args;
   for (const auto &I : enumerate(operands())) {
     // Some intrinsics have a scalar argument - don't replace it with a
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|modf|extract|store)" --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|frexp|modf|extract|store)" --version 5
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s -S -o - | FileCheck %s
 
 define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@@ -277,3 +277,74 @@ exit:
   ret void
 }
 
+define void @frexp_f32(ptr noalias %in, ptr noalias writeonly %out_mantissa, ptr noalias writeonly %out_exponent) {
+; CHECK-LABEL: define void @frexp_f32(
+; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_MANTISSA:%.*]], ptr noalias writeonly [[OUT_EXPONENT:%.*]]) {
+; CHECK:  [[ENTRY:.*:]]
+; CHECK:  [[FOR_BODY:.*:]]
+; CHECK:  [[EXIT:.*:]]
+; CHECK:    [[TMP1:%.*]] = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x i32> } [[TMP1]], 0
+; CHECK:    [[TMP3:%.*]] = extractvalue { <2 x float>, <2 x i32> } [[TMP1]], 1
+; CHECK:    store <2 x float> [[TMP2]], ptr [[TMP4:%.*]], align 4
+; CHECK:    store <2 x i32> [[TMP3]], ptr [[TMP5:%.*]], align 4
+; CHECK:  [[MIDDLE_BLOCK:.*:]]
+; CHECK:  [[EXIT1:.*:]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+  %in_val = load float, ptr %arrayidx, align 4
+  %call = tail call { float, i32 } @llvm.frexp.f32.i32(float %in_val)
+  %mantissa = extractvalue { float, i32 } %call, 0
+  %exponent = extractvalue { float, i32 } %call, 1
+  %arrayidx2 = getelementptr inbounds float, ptr %out_mantissa, i64 %iv
+  store float %mantissa, ptr %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %out_exponent, i64 %iv
+  store i32 %exponent, ptr %arrayidx4, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @frexp_f64(ptr noalias %in, ptr noalias writeonly %out_mantissa, ptr noalias writeonly %out_exponent) {
+; CHECK-LABEL: define void @frexp_f64(
+; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_MANTISSA:%.*]], ptr noalias writeonly [[OUT_EXPONENT:%.*]]) {
+; CHECK:  [[ENTRY:.*:]]
+; CHECK:  [[FOR_BODY:.*:]]
+; CHECK:  [[EXIT:.*:]]
+; CHECK:    [[TMP1:%.*]] = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[TMP2:%.*]] = extractvalue { <2 x double>, <2 x i32> } [[TMP1]], 0
+; CHECK:    [[TMP3:%.*]] = extractvalue { <2 x double>, <2 x i32> } [[TMP1]], 1
+; CHECK:    store <2 x double> [[TMP2]], ptr [[TMP4:%.*]], align 8
+; CHECK:    store <2 x i32> [[TMP3]], ptr [[TMP5:%.*]], align 4
+; CHECK:  [[MIDDLE_BLOCK:.*:]]
+; CHECK:  [[EXIT1:.*:]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
+  %in_val = load double, ptr %arrayidx, align 8
+  %call = tail call { double, i32 } @llvm.frexp.f64.i32(double %in_val)
+  %mantissa = extractvalue { double, i32 } %call, 0
+  %exponent = extractvalue { double, i32 } %call, 1
+  %arrayidx2 = getelementptr inbounds double, ptr %out_mantissa, i64 %iv
+  store double %mantissa, ptr %arrayidx2, align 8
+  %arrayidx4 = getelementptr inbounds i32, ptr %out_exponent, i64 %iv
+  store i32 %exponent, ptr %arrayidx4, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll
@@ -378,27 +378,30 @@ exit:
   ret void
 }
 
-; Negative test. Widening structs with mixed element types is not supported.
-; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
-define void @negative_mixed_element_type_struct_return(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
-; CHECK-LABEL: define void @negative_mixed_element_type_struct_return(
+; CHECK-REMARKS: remark: {{.*}} vectorized loop (vectorization width: 2, interleaved count: 1)
+define void @mixed_element_type_struct_return(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @mixed_element_type_struct_return(
 ; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
-; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[IV_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[IV]]
-; CHECK-NEXT:    [[IN_VAL:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CALL:%.*]] = tail call { float, i32 } @baz(float [[IN_VAL]]) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    [[EXTRACT_A:%.*]] = extractvalue { float, i32 } [[CALL]], 0
-; CHECK-NEXT:    [[EXTRACT_B:%.*]] = extractvalue { float, i32 } [[CALL]], 1
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call { <2 x float>, <2 x i32> } @fixed_vec_baz(<2 x float> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x i32> } [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <2 x float>, <2 x i32> } [[TMP1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[OUT_A]], i64 [[IV]]
-; CHECK-NEXT:    store float [[EXTRACT_A]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[OUT_B]], i64 [[IV]]
-; CHECK-NEXT:    store i32 [[EXTRACT_B]], ptr [[ARRAYIDX4]], align 4
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw i64 [[IV]], 2
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;