apache
diff --git a/‎docs/en/connectors/sink/Kafka.md‎
Lines changed: 62 additions & 0 deletions b/‎docs/en/connectors/sink/Kafka.md‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎docs/zh/connectors/sink/Kafka.md‎
Lines changed: 18 additions & 0 deletions b/‎docs/zh/connectors/sink/Kafka.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/KafkaSinkOptions.java‎
Lines changed: 8 additions & 0 deletions b/‎seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/KafkaSinkOptions.java‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/serialize/DefaultSeaTunnelRowSerializer.java‎
Lines changed: 106 additions & 5 deletions b/‎seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/serialize/DefaultSeaTunnelRowSerializer.java‎
Lines changed: 106 additions & 5 deletions
@@ -39,6 +39,7 @@ They can be downloaded via install-plugin.sh or from the Maven central repositor
 | kafka.config          | Map    | No       | -       | In addition to the above parameters that must be specified by the `Kafka producer` client, the user can also specify multiple non-mandatory parameters for the `producer` client, covering [all the producer parameters specified in the official Kafka document](https://kafka.apache.org/documentation.html#producerconfigs).                                                                                                                              |
 | semantics             | String | No       | NON     | Semantics that can be chosen EXACTLY_ONCE/AT_LEAST_ONCE/NON, default NON.                                                                                                                                                                                                                                                                                                                                                                                    |
 | partition_key_fields  | Array  | No       | -       | Configure which fields are used as the key of the kafka message.                                                                                                                                                                                                                                                                                                                                                                                             |
+| kafka_headers_fields  | Array  | No       | -       | Configure which fields are used as the headers of the kafka message. The field value will be converted to a string and used as the header value.                                                                                                                                                                                                                                                                                                             |
 | partition             | Int    | No       | -       | We can specify the partition, all messages will be sent to this partition.                                                                                                                                                                                                                                                                                                                                                                                   |
 | assign_partitions     | Array  | No       | -       | We can decide which partition to send based on the content of the message. The function of this parameter is to distribute information.                                                                                                                                                                                                                                                                                                                      |
 | transaction_prefix    | String | No       | -       | If semantic is specified as EXACTLY_ONCE, the producer will write all messages in a Kafka transaction,kafka distinguishes different transactions by different transactionId. This parameter is prefix of  kafka  transactionId, make sure different job use different prefix.                                                                                                                                                                                |
@@ -90,6 +91,23 @@ If not set partition key fields, the null message key will be sent to.
 The format of the message key is json, If name is set as the key, for example '{"name":"Jack"}'.
 The selected field must be an existing field in the upstream.
 
+### Kafka Headers Fields
+
+For example, if you want to use value of fields from upstream data as kafka message headers, you can assign field names to this property.
+
+Upstream data is the following:
+
+| name | age |     data      | source | traceId   |
+|------|-----|---------------|--------|-----------|
+| Jack | 16  | data-example1 | web    | trace-123 |
+| Mary | 23  | data-example2 | mobile | trace-456 |
+
+If source and traceId are set as the kafka headers fields, then these field values will be added as headers to the kafka message.
+For example, the first row will have headers: `source=web` and `traceId=trace-123`.
+The field values will be converted to strings and used as header values.
+If a field value is null, it will not be added to the headers.
+The selected fields must be existing fields in the upstream.
+
 ### Assign Partitions
 
 For example, there are five partitions in total, and the assign_partitions field in config is as follows:
@@ -140,6 +158,50 @@ sink {
 }
 ```
 
+### Using Kafka Headers
+
+This example shows how to use kafka_headers_fields to set Kafka message headers:
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  FakeSource {
+    parallelism = 1
+    plugin_output = "fake"
+    row.num = 16
+    schema = {
+      fields {
+        name = "string"
+        age = "int"
+        source = "string"
+        traceId = "string"
+      }
+    }
+  }
+}
+
+sink {
+  kafka {
+      topic = "test_topic"
+      bootstrap.servers = "localhost:9092"
+      format = json
+      partition_key_fields = ["name"]
+      kafka_headers_fields = ["source", "traceId"]
+      kafka.request.timeout.ms = 60000
+      semantics = EXACTLY_ONCE
+      kafka.config = {
+        acks = "all"
+        request.timeout.ms = 60000
+        buffer.memory = 33554432
+      }
+  }
+}
+```
+
 ### AWS MSK SASL/SCRAM
 
 Replace the following `${username}` and `${password}` with the configuration values in AWS MSK.
 
@@ -39,6 +39,7 @@ import ChangeLog from '../changelog/connector-kafka.md';
 | kafka.config         | Map    | 否    | -    | 除了上述 Kafka Producer 客户端必须指定的参数外，用户还可以为 Producer 客户端指定多个非强制参数，涵盖 [Kafka官方文档中指定的所有生产者参数](https://kafka.apache.org/documentation.html#producerconfigs)                                                                                                                |
 | semantics            | String | 否    | NON  | 可以选择的语义是 EXACTLY_ONCE/AT_LEAST_ONCE/NON，默认 NON。                                                                                                                                                                                                                    |
 | partition_key_fields | Array  | 否    | -    | 配置字段用作 kafka 消息的key                                                                                                                                                                                                                                                |
+| kafka_headers_fields | Array  | 否    | -    | 配置字段用作 kafka 消息的headers。字段值将被转换为字符串并用作 header 值                                                                                                                                                                                                                   |
 | partition            | Int    | 否    | -    | 可以指定分区，所有消息都会发送到此分区                                                                                                                                                                                                                                                |
 | assign_partitions    | Array  | 否    | -    | 可以根据消息的内容决定发送哪个分区,该参数的作用是分发信息                                                                                                                                                                                                                                      |
 | transaction_prefix   | String | 否    | -    | 如果语义指定为EXACTLY_ONCE，生产者将把所有消息写入一个 Kafka 事务中，kafka 通过不同的 transactionId 来区分不同的事务。该参数是kafka transactionId的前缀，确保不同的作业使用不同的前缀                                                                                                                                           |
@@ -89,6 +90,23 @@ NON 不提供任何保证：如果 Kafka 代理出现问题，消息可能会丢
 消息 key 的格式为 json，如果设置 name 为 key，例如 `{"name":"Jack"}`。
 所选的字段必须是上游数据中已存在的字段。
 
+### Kafka Headers 字段
+
+例如，如果你想使用上游数据中的字段值作为 kafka 消息的 headers，可以将这些字段名指定给此属性。
+
+上游数据如下所示：
+
+| name | age |     data      | source | traceId   |
+|------|-----|---------------|--------|-----------|
+| Jack | 16  | data-example1 | web    | trace-123 |
+| Mary | 23  | data-example2 | mobile | trace-456 |
+
+如果将 source 和 traceId 设置为 kafka headers 字段，那么这些字段值将作为 headers 添加到 kafka 消息中。
+例如，第一行将具有 headers：`source=web` 和 `traceId=trace-123`。
+字段值将被转换为字符串并用作 header 值。
+如果字段值为 null，则不会添加到 headers 中。
+所选的字段必须是上游数据中已存在的字段。
+
 ### 分区分配
 
 假设总有五个分区，配置中的 assign_partitions 字段设置为：
 
@@ -46,6 +46,14 @@ public class KafkaSinkOptions extends KafkaBaseOptions {
                     .withDescription(
                             "Configure which fields are used as the key of the kafka message.");
 
+    public static final Option<List<String>> KAFKA_HEADERS_FIELDS =
+            Options.key("kafka_headers_fields")
+                    .listType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Configure which fields are used as the headers of the kafka message. "
+                                    + "The field value will be converted to a string and used as the header value.");
+
     public static final Option<KafkaSemantics> SEMANTICS =
             Options.key("semantics")
                     .enumType(KafkaSemantics.class)
 
@@ -119,12 +119,23 @@ public static DefaultSeaTunnelRowSerializer create(
                 timestampExtractor(),
                 keyExtractor(null, rowType, format, delimiter, pluginConfig),
                 valueExtractor(rowType, format, delimiter, pluginConfig),
-                headersExtractor());
+                headersExtractor(null, rowType));
+    }
+
+    public static DefaultSeaTunnelRowSerializer create(
+            String topic,
+            Integer partition,
+            SeaTunnelRowType rowType,
+            MessageFormat format,
+            String delimiter,
+            ReadonlyConfig pluginConfig) {
+        return create(topic, partition, null, rowType, format, delimiter, pluginConfig);
     }
 
     public static DefaultSeaTunnelRowSerializer create(
             String topic,
             Integer partition,
+            List<String> headerFields,
             SeaTunnelRowType rowType,
             MessageFormat format,
             String delimiter,
@@ -134,13 +145,24 @@ public static DefaultSeaTunnelRowSerializer create(
                 partitionExtractor(partition),
                 timestampExtractor(),
                 keyExtractor(null, rowType, format, delimiter, pluginConfig),
-                valueExtractor(rowType, format, delimiter, pluginConfig),
-                headersExtractor());
+                valueExtractor(headerFields, rowType, format, delimiter, pluginConfig),
+                headersExtractor(headerFields, rowType));
+    }
+
+    public static DefaultSeaTunnelRowSerializer create(
+            String topic,
+            List<String> keyFields,
+            SeaTunnelRowType rowType,
+            MessageFormat format,
+            String delimiter,
+            ReadonlyConfig pluginConfig) {
+        return create(topic, keyFields, null, rowType, format, delimiter, pluginConfig);
     }
 
     public static DefaultSeaTunnelRowSerializer create(
             String topic,
             List<String> keyFields,
+            List<String> headerFields,
             SeaTunnelRowType rowType,
             MessageFormat format,
             String delimiter,
@@ -150,8 +172,8 @@ public static DefaultSeaTunnelRowSerializer create(
                 partitionExtractor(null),
                 timestampExtractor(),
                 keyExtractor(keyFields, rowType, format, delimiter, pluginConfig),
-                valueExtractor(rowType, format, delimiter, pluginConfig),
-                headersExtractor());
+                valueExtractor(headerFields, rowType, format, delimiter, pluginConfig),
+                headersExtractor(headerFields, rowType));
     }
 
     private static Function<SeaTunnelRow, Integer> partitionNativeExtractor(
@@ -182,6 +204,32 @@ private static Function<SeaTunnelRow, Iterable<Header>> headersExtractor(
                 convertToKafkaHeaders((Map<String, String>) row.getField(rowType.indexOf(HEADERS)));
     }
 
+    private static Function<SeaTunnelRow, Iterable<Header>> headersExtractor(
+            List<String> headerFields, SeaTunnelRowType rowType) {
+        if (headerFields == null || headerFields.isEmpty()) {
+            return row -> null;
+        }
+
+        int[] headerFieldIndexes = new int[headerFields.size()];
+        for (int i = 0; i < headerFields.size(); i++) {
+            headerFieldIndexes[i] = rowType.indexOf(headerFields.get(i));
+        }
+
+        return row -> {
+            RecordHeaders kafkaHeaders = new RecordHeaders();
+            for (int i = 0; i < headerFields.size(); i++) {
+                String headerName = headerFields.get(i);
+                Object headerValue = row.getField(headerFieldIndexes[i]);
+                // Write "null" string for null values to keep fields in headers
+                // (consistent with partition_key_fields behavior)
+                String valueStr = headerValue != null ? headerValue.toString() : "null";
+                kafkaHeaders.add(
+                        new RecordHeader(headerName, valueStr.getBytes(StandardCharsets.UTF_8)));
+            }
+            return kafkaHeaders.toArray().length > 0 ? kafkaHeaders : null;
+        };
+    }
+
     private static Function<SeaTunnelRow, String> topicExtractor(
             String topic, SeaTunnelRowType rowType, MessageFormat format) {
         if ((MessageFormat.COMPATIBLE_DEBEZIUM_JSON.equals(format)
@@ -256,6 +304,25 @@ private static Function<SeaTunnelRow, byte[]> valueExtractor(
         return row -> serializationSchema.serialize(row);
     }
 
+    private static Function<SeaTunnelRow, byte[]> valueExtractor(
+            List<String> headerFields,
+            SeaTunnelRowType rowType,
+            MessageFormat format,
+            String delimiter,
+            ReadonlyConfig pluginConfig) {
+        if (headerFields == null || headerFields.isEmpty()) {
+            return valueExtractor(rowType, format, delimiter, pluginConfig);
+        }
+
+        // Create a new row type excluding header fields
+        SeaTunnelRowType valueRowType = createValueRowType(headerFields, rowType);
+        Function<SeaTunnelRow, SeaTunnelRow> valueRowExtractor =
+                createValueRowExtractor(valueRowType, headerFields, rowType);
+        SerializationSchema serializationSchema =
+                createSerializationSchema(valueRowType, format, delimiter, false, pluginConfig);
+        return row -> serializationSchema.serialize(valueRowExtractor.apply(row));
+    }
+
     private static Function<SeaTunnelRow, byte[]> valueExtractor(SeaTunnelRowType rowType) {
         return row -> (byte[]) row.getField(rowType.indexOf(VALUE));
     }
@@ -273,6 +340,25 @@ private static SeaTunnelRowType createKeyType(
         return new SeaTunnelRowType(keyFieldNames.toArray(new String[0]), keyFieldDataTypeArr);
     }
 
+    private static SeaTunnelRowType createValueRowType(
+            List<String> headerFieldNames, SeaTunnelRowType rowType) {
+        // Create a row type excluding header fields
+        List<String> valueFieldNames = new java.util.ArrayList<>();
+        List<SeaTunnelDataType> valueFieldTypes = new java.util.ArrayList<>();
+
+        for (int i = 0; i < rowType.getTotalFields(); i++) {
+            String fieldName = rowType.getFieldName(i);
+            if (!headerFieldNames.contains(fieldName)) {
+                valueFieldNames.add(fieldName);
+                valueFieldTypes.add(rowType.getFieldType(i));
+            }
+        }
+
+        return new SeaTunnelRowType(
+                valueFieldNames.toArray(new String[0]),
+                valueFieldTypes.toArray(new SeaTunnelDataType[0]));
+    }
+
     private static Function<SeaTunnelRow, SeaTunnelRow> createKeyRowExtractor(
             SeaTunnelRowType keyType, SeaTunnelRowType rowType) {
         int[] keyIndex = new int[keyType.getTotalFields()];
@@ -288,6 +374,21 @@ private static Function<SeaTunnelRow, SeaTunnelRow> createKeyRowExtractor(
         };
     }
 
+    private static Function<SeaTunnelRow, SeaTunnelRow> createValueRowExtractor(
+            SeaTunnelRowType valueType, List<String> headerFieldNames, SeaTunnelRowType rowType) {
+        int[] valueIndex = new int[valueType.getTotalFields()];
+        for (int i = 0; i < valueType.getTotalFields(); i++) {
+            valueIndex[i] = rowType.indexOf(valueType.getFieldName(i));
+        }
+        return row -> {
+            Object[] fields = new Object[valueType.getTotalFields()];
+            for (int i = 0; i < valueIndex.length; i++) {
+                fields[i] = row.getField(valueIndex[i]);
+            }
+            return new SeaTunnelRow(fields);
+        };
+    }
+
     private static SerializationSchema createSerializationSchema(
             SeaTunnelRowType rowType,
             MessageFormat format,