gongchensu
diff --git a/‎include/infiniop/ops/binary_ops_api.h‎
Lines changed: 21 additions & 0 deletions b/‎include/infiniop/ops/binary_ops_api.h‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎include/infiniop/ops/unary_ops_api.h‎
Lines changed: 11 additions & 0 deletions b/‎include/infiniop/ops/unary_ops_api.h‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎src/infiniop/elementwise/binary.h‎
Lines changed: 542 additions & 11 deletions b/‎src/infiniop/elementwise/binary.h‎
Lines changed: 542 additions & 11 deletions
diff --git a/‎src/infiniop/elementwise/cpu/elementwise_cpu_impl.h‎
Lines changed: 35 additions & 0 deletions b/‎src/infiniop/elementwise/cpu/elementwise_cpu_impl.h‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎src/infiniop/elementwise/nvidia/elementwise_nvidia_impl.cuh‎
Lines changed: 34 additions & 0 deletions b/‎src/infiniop/elementwise/nvidia/elementwise_nvidia_impl.cuh‎
Lines changed: 34 additions & 0 deletions
@@ -15,9 +15,30 @@
 
 // Declare all binary operator APIs
 BINARY_OP_API_DECLARE(div, Div)
+BINARY_OP_API_DECLARE(floor_divide, FloorDivide)
 BINARY_OP_API_DECLARE(pow, Pow)
+BINARY_OP_API_DECLARE(copysign, CopySign)
+BINARY_OP_API_DECLARE(hypot, Hypot)
+BINARY_OP_API_DECLARE(atan2, Atan2)
 BINARY_OP_API_DECLARE(mod, Mod)
+BINARY_OP_API_DECLARE(remainder, Remainder)
 BINARY_OP_API_DECLARE(max, Max)
 BINARY_OP_API_DECLARE(min, Min)
+BINARY_OP_API_DECLARE(fmax, Fmax)
+BINARY_OP_API_DECLARE(fmin, Fmin)
+BINARY_OP_API_DECLARE(gt, Gt)
+BINARY_OP_API_DECLARE(lt, Lt)
+BINARY_OP_API_DECLARE(ge, Ge)
+BINARY_OP_API_DECLARE(le, Le)
+BINARY_OP_API_DECLARE(eq, Eq)
+BINARY_OP_API_DECLARE(ne, Ne)
+BINARY_OP_API_DECLARE(logical_and, LogicalAnd)
+BINARY_OP_API_DECLARE(logical_or, LogicalOr)
+BINARY_OP_API_DECLARE(logical_xor, LogicalXor)
+BINARY_OP_API_DECLARE(bitwise_and, BitwiseAnd)
+BINARY_OP_API_DECLARE(bitwise_or, BitwiseOr)
+BINARY_OP_API_DECLARE(bitwise_xor, BitwiseXor)
+BINARY_OP_API_DECLARE(bitwise_left_shift, BitwiseLeftShift)
+BINARY_OP_API_DECLARE(bitwise_right_shift, BitwiseRightShift)
 
 #endif // __INFINIOP_BINARY_OPS_API_H__
@@ -17,7 +17,12 @@
 // Declare all unary operator APIs
 UNARY_OP_API_DECLARE(abs, Abs)
 UNARY_OP_API_DECLARE(log, Log)
+UNARY_OP_API_DECLARE(log2, Log2)
+UNARY_OP_API_DECLARE(log10, Log10)
+UNARY_OP_API_DECLARE(log1p, Log1p)
 UNARY_OP_API_DECLARE(sqrt, Sqrt)
+UNARY_OP_API_DECLARE(square, Square)
+UNARY_OP_API_DECLARE(rsqrt, Rsqrt)
 UNARY_OP_API_DECLARE(reciprocal, Reciprocal)
 UNARY_OP_API_DECLARE(neg, Neg)
 UNARY_OP_API_DECLARE(round, Round)
@@ -36,6 +41,12 @@ UNARY_OP_API_DECLARE(atan, Atan)
 UNARY_OP_API_DECLARE(acos, Acos)
 UNARY_OP_API_DECLARE(ceil, Ceil)
 UNARY_OP_API_DECLARE(exp, Exp)
+UNARY_OP_API_DECLARE(exp2, Exp2)
 UNARY_OP_API_DECLARE(hardswish, Hardswish)
+UNARY_OP_API_DECLARE(isnan, IsNan)
+UNARY_OP_API_DECLARE(isinf, IsInf)
+UNARY_OP_API_DECLARE(isfinite, IsFinite)
+UNARY_OP_API_DECLARE(sinc, Sinc)
+UNARY_OP_API_DECLARE(sin, Sin)
 
 #endif // __INFINIOP_UNARY_OPS_API_H__
@@ -48,6 +48,18 @@
     case INFINI_DTYPE_BF16: \
         return _device_info->template calculate<Op, bf16_t>(_info, output, inputs, stream);
 
+/**
+ * @brief Integral Calculate Switch Cases (I32, I64, U8)
+ * For bitwise operations that only support integral types
+ */
+#define _IMPL_CALC_CASES_INTEGRAL \
+    case INFINI_DTYPE_I32: \
+        return _device_info->template calculate<Op, int32_t>(_info, output, inputs, stream); \
+    case INFINI_DTYPE_I64: \
+        return _device_info->template calculate<Op, int64_t>(_info, output, inputs, stream); \
+    case INFINI_DTYPE_U8: \
+        return _device_info->template calculate<Op, uint8_t>(_info, output, inputs, stream);
+
 /**
  * @brief Generic Template for the Calculate method
  * @param CASES_MACRO The macro containing the switch cases to use
@@ -156,4 +168,27 @@
     ) \
     _IMPL_CALCULATE_METHOD(_IMPL_CALC_CASES_EXTENDED)
 
+/**
+ * @brief Implementation for Binary Operators with Integral Types (I32, I64, U8)
+ *
+ * This macro generates the Descriptor destructor, create, and calculate methods
+ * for binary operators that only support integral types (e.g., bitwise operations).
+ *
+ * Usage:
+ *   namespace op::bitwise_and::cpu {
+ *       using Op = op::elementwise::binary::BinaryOp<BinaryMode::BitwiseAnd>;
+ *       ELEMENTWISE_CPU_IMPL_BINARY_INTEGRAL(bitwise_and)
+ *   }
+ */
+#define ELEMENTWISE_CPU_IMPL_BINARY_INTEGRAL(OP) \
+    _IMPL_CREATE_METHOD( \
+        const auto &a_desc = input_desc_vec.at(0); \
+        const auto &b_desc = input_desc_vec.at(1); \
+        const auto &a_shape = a_desc->shape(); \
+        const auto &b_shape = b_desc->shape(); \
+        CHECK_SAME_SHAPE(out_shape, a_shape, b_shape);, \
+        INFINI_DTYPE_I32, INFINI_DTYPE_I64, INFINI_DTYPE_U8 \
+    ) \
+    _IMPL_CALCULATE_METHOD(_IMPL_CALC_CASES_INTEGRAL)
+
 #endif // __INFINIOP_ELEMENTWISE_CPU_IMPL_H__
@@ -52,6 +52,18 @@
     case INFINI_DTYPE_F64: \
         return _device_info->calculate<256, cuda::Op, double>(_info, workspace, output, inputs, stream);
 
+/**
+ * @brief Integral Calculate Switch Cases (I32, I64, U8)
+ * For bitwise operations that only support integral types
+ */
+#define _IMPL_CALC_CASES_INTEGRAL \
+    case INFINI_DTYPE_I32: \
+        return _device_info->calculate<256, cuda::Op, int32_t>(_info, workspace, output, inputs, stream); \
+    case INFINI_DTYPE_I64: \
+        return _device_info->calculate<256, cuda::Op, int64_t>(_info, workspace, output, inputs, stream); \
+    case INFINI_DTYPE_U8: \
+        return _device_info->calculate<256, cuda::Op, uint8_t>(_info, workspace, output, inputs, stream);
+
 /**
  * @brief Generic Template for the Calculate method
  * @param CASES_MACRO The macro containing the switch cases to use
@@ -160,4 +172,26 @@
     ) \
     _IMPL_CALCULATE_METHOD(_IMPL_CALC_CASES_EXTENDED)
 
+/**
+ * @brief Implementation for Binary Operators with Integral Types (I32, I64, U8)
+ *
+ * This macro generates the Descriptor destructor, create, and calculate methods
+ * for binary operators that only support integral types (e.g., bitwise operations).
+ *
+ * Usage:
+ *   namespace op::bitwise_and::nvidia {
+ *       ELEMENTWISE_NVIDIA_IMPL_BINARY_INTEGRAL(bitwise_and)
+ *   }
+ */
+#define ELEMENTWISE_NVIDIA_IMPL_BINARY_INTEGRAL(OP) \
+    _IMPL_CREATE_METHOD( \
+        const auto &a_desc = input_desc_vec.at(0); \
+        const auto &b_desc = input_desc_vec.at(1); \
+        const auto &a_shape = a_desc->shape(); \
+        const auto &b_shape = b_desc->shape(); \
+        CHECK_SAME_SHAPE(out_shape, a_shape, b_shape);, \
+        INFINI_DTYPE_I32, INFINI_DTYPE_I64, INFINI_DTYPE_U8 \
+    ) \
+    _IMPL_CALCULATE_METHOD(_IMPL_CALC_CASES_INTEGRAL)
+
 #endif // __INFINIOP_ELEMENTWISE_NVIDIA_IMPL_CUH__