apache
diff --git a/‎hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java‎
Lines changed: 1 addition & 65 deletions b/‎hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java‎
Lines changed: 1 addition & 65 deletions
diff --git a/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetReader.java‎
Lines changed: 1 addition & 1 deletion b/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetReader.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java‎
Lines changed: 1 addition & 14 deletions b/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java‎
Lines changed: 1 addition & 14 deletions
diff --git a/‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala‎
Lines changed: 5 additions & 8 deletions b/‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaCache.java‎
Lines changed: 47 additions & 0 deletions b/‎hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaCache.java‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java‎
Lines changed: 76 additions & 1 deletion b/‎hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java‎
Lines changed: 76 additions & 1 deletion
diff --git a/‎hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java‎
Lines changed: 13 additions & 0 deletions b/‎hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java‎
Lines changed: 0 additions & 1 deletion b/‎hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java‎
Lines changed: 2 additions & 0 deletions b/‎hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java‎
Lines changed: 2 additions & 0 deletions
@@ -46,6 +46,7 @@
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecordLocation;
 import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -108,7 +109,6 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
-import static org.apache.hudi.avro.AvroSchemaUtils.getNonNullTypeFromUnion;
 import static org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy.EAGER;
 import static org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy.LAZY;
 import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PARTITIONS;
@@ -891,70 +891,6 @@ private void validateSchema() throws HoodieUpsertException, HoodieInsertExceptio
     }
   }
 
-  /**
-   * Validates that columns with secondary indexes are not evolved in an incompatible way.
-   *
-   * @param tableSchema the current table schema
-   * @param writerSchema the new writer schema
-   * @param indexMetadata the index metadata containing all index definitions
-   * @throws SchemaCompatibilityException if a secondary index column has incompatible evolution
-   */
-  static void validateSecondaryIndexSchemaEvolution(
-      Schema tableSchema,
-      Schema writerSchema,
-      HoodieIndexMetadata indexMetadata) throws SchemaCompatibilityException {
-    
-    // Filter for secondary index definitions
-    List<HoodieIndexDefinition> secondaryIndexDefs = indexMetadata.getIndexDefinitions().values().stream()
-        .filter(indexDef -> MetadataPartitionType.fromPartitionPath(indexDef.getIndexName()).equals(MetadataPartitionType.SECONDARY_INDEX))
-        .collect(Collectors.toList());
-    
-    if (secondaryIndexDefs.isEmpty()) {
-      return;
-    }
-    
-    // Create a map from source field to index name for efficient lookup
-    Map<String, String> columnToIndexName = new HashMap<>();
-    for (HoodieIndexDefinition indexDef : secondaryIndexDefs) {
-      String indexName = indexDef.getIndexName();
-      for (String sourceField : indexDef.getSourceFields()) {
-        // Note: If a column is part of multiple indexes, this will use the last one
-        // This is fine since we just need any index name for error reporting
-        columnToIndexName.put(sourceField, indexName);
-      }
-    }
-    
-    // Check each indexed column for schema evolution
-    for (Map.Entry<String, String> entry : columnToIndexName.entrySet()) {
-      String columnName = entry.getKey();
-      String indexName = entry.getValue();
-      
-      Schema.Field tableField = tableSchema.getField(columnName);
-      
-      if (tableField == null) {
-        // This shouldn't happen as indexed columns should exist in table schema
-        LOG.warn("Secondary index '{}' references non-existent column: {}", indexName, columnName);
-        continue;
-      }
-      
-      // Use AvroSchemaCompatibility's field lookup logic to handle aliases
-      Schema.Field writerField = AvroSchemaCompatibility.lookupWriterField(writerSchema, tableField);
-      
-      if (writerField != null && !tableField.schema().equals(writerField.schema())) {
-        // Check if this is just making the field nullable/non-nullable, which is safe from SI perspective
-        if (getNonNullTypeFromUnion(tableField.schema()).equals(getNonNullTypeFromUnion(writerField.schema()))) {
-          continue;
-        }
-        
-        String errorMessage = String.format(
-            "Column '%s' has secondary index '%s' and cannot evolve from schema '%s' to '%s'. "
-            + "Please drop the secondary index before changing the column type.",
-            columnName, indexName, tableField.schema(), writerField.schema());
-        throw new SchemaCompatibilityException(errorMessage);
-      }
-    }
-  }
-
   public void validateUpsertSchema() throws HoodieUpsertException {
     if (isMetadataTable) {
       return;
 
@@ -177,7 +177,7 @@ private StructType convertToStruct(MessageType messageType) {
 
   @Override
   public void close() {
-    readerIterators.forEach(ParquetReaderIterator::close);
+    readerIterators.forEach(it -> it.close());
   }
 
   @Override
 
@@ -18,32 +18,19 @@
 
 package org.apache.hudi.io.storage.row;
 
-import org.apache.avro.LogicalTypes;
-import org.apache.avro.Schema;
-import org.apache.hadoop.conf.Configuration;
-
-import org.apache.hudi.SparkAdapterSupport$;
 import org.apache.hudi.avro.HoodieBloomFilterWriteSupport;
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.config.HoodieConfig;
 import org.apache.hudi.common.config.HoodieStorageConfig;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ReflectionUtils;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.parquet.hadoop.api.WriteSupport;
-import org.apache.parquet.schema.GroupType;
-import org.apache.parquet.schema.LogicalTypeAnnotation;
-import org.apache.parquet.schema.Type;
-import org.apache.parquet.schema.Types;
-import org.apache.spark.sql.execution.datasources.parquet.ParquetUtils;
 import org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Decimal;
-import org.apache.spark.sql.types.Metadata;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.unsafe.types.UTF8String;
 
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Map;
 
 
@@ -19,19 +19,16 @@
 package org.apache.spark.sql.hudi
 
 import org.apache.avro.Schema
-import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hudi.client.utils.SparkRowSerDe
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.storage.StoragePath
 
-import org.apache.avro.Schema
-import org.apache.hadoop.conf.Configuration
 import org.apache.spark.sql._
 import org.apache.spark.sql.avro.{HoodieAvroDeserializer, HoodieAvroSchemaConverters, HoodieAvroSerializer}
+import org.apache.spark.sql.HoodieUnsafeUtils
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, InterpretedPredicate}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, InterpretedPredicate}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
@@ -53,7 +50,7 @@ import java.util.{Locale, TimeZone}
 trait SparkAdapter extends Serializable {
 
   /**
-   * Checks whether provided instance of [[InternalRow]] is actually an instance of [[ColumnarBatchRow]]
+   * Checks whether provided instance of [[InternalRow]] is actually an instance of [[org.apache.spark.sql.vectorized.ColumnarBatchRow]]
    */
   def isColumnarBatchRow(r: InternalRow): Boolean
 
@@ -72,7 +69,7 @@ trait SparkAdapter extends Serializable {
 
   /**
    * Returns an instance of [[HoodieCatalogUtils]] providing for common utils operating on Spark's
-   * [[TableCatalog]]s
+   * [[org.apache.spark.sql.connector.catalog.TableCatalog]]s
    */
   def getCatalogUtils: HoodieCatalogUtils
 
@@ -207,7 +204,7 @@ trait SparkAdapter extends Serializable {
                               metadataColumns: Seq[AttributeReference] = Seq.empty): FileScanRDD
 
   /**
-   * Extract condition in [[DeleteFromTable]]
+   * Extract condition in [[org.apache.spark.sql.catalyst.plans.logical.DeleteFromTable]]
    * SPARK-38626 condition is no longer Option in Spark 3.3
    */
   def extractDeleteCondition(deleteFromTable: Command): Expression
 
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.avro;
+
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.LoadingCache;
+import org.apache.avro.Schema;
+
+/**
+ * An avro schema cache implementation for reusing avro schema instantces in JVM/process scope.
+ * This is a global cache which works for a JVM lifecycle.
+ * A collection of schema instants are maintained.
+ *
+ * <p> NOTE: The schema which be used frequently should be cached through this cache.
+ */
+public class AvroSchemaCache {
+
+
+  // Ensure that there is only one variable instance of the same schema within an entire JVM lifetime
+  private static final LoadingCache<Schema, Schema> SCHEMA_CACHE = Caffeine.newBuilder().weakValues().maximumSize(1024).build(k -> k);
+
+  /**
+   * Get schema variable from global cache. If not found, put it into the cache and then return it.
+   * @param schema schema to get
+   * @return if found, return the exist schema variable, otherwise return the param itself.
+   */
+  public static Schema intern(Schema schema) {
+    return SCHEMA_CACHE.get(schema);
+  }
+
+}
@@ -54,6 +54,7 @@
 import org.apache.avro.LogicalTypes.Decimal;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
+import org.apache.avro.Schema.Field.Order;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericData.Record;
 import org.apache.avro.generic.GenericDatumReader;
@@ -78,6 +79,7 @@
 import java.math.BigInteger;
 import java.math.RoundingMode;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.sql.Date;
 import java.sql.Timestamp;
 import java.time.Instant;
@@ -105,7 +107,6 @@
 import static org.apache.avro.Schema.Type.UNION;
 import static org.apache.hudi.avro.AvroSchemaUtils.createNullableSchema;
 import static org.apache.hudi.avro.AvroSchemaUtils.isNullable;
-import static org.apache.hudi.avro.AvroSchemaUtils.resolveNullableSchema;
 import static org.apache.hudi.avro.AvroSchemaUtils.resolveUnionSchema;
 import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
 import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
@@ -739,6 +740,7 @@ public static Schema getNestedFieldSchemaFromRecord(GenericRecord record, String
    */
   public static Schema getNestedFieldSchemaFromWriteSchema(Schema writeSchema, String fieldName) {
     String[] parts = fieldName.split("\\.");
+    Schema currentSchema = writeSchema;
     int i = 0;
     for (; i < parts.length; i++) {
       String part = parts[i];
@@ -1375,6 +1377,10 @@ public static boolean gteqAvro1_10() {
     return StringUtils.compareVersions(AVRO_VERSION, "1.10") >= 0;
   }
 
+  static boolean gteqAvro1_12() {
+    return StringUtils.compareVersions(AVRO_VERSION, "1.12") >= 0;
+  }
+
   /**
    * Wraps a value into Avro type wrapper.
    *
@@ -1502,4 +1508,73 @@ private static boolean isLocalTimestampMicros(LogicalType logicalType) {
     }
   }
 
+  private static Object convertDefaultValueForAvroCompatibility(Object defaultValue) {
+    if (gteqAvro1_12() && defaultValue instanceof byte[]) {
+      // For Avro 1.12.0 compatibility, we need to convert the default value in byte array
+      // to String so that correct JsonNode is used for the default value for validation,
+      // instead of directly relying on Avro's JacksonUtils.toJsonNode which is called
+      // by `Schema.Field` constructor
+      // The logic of getting the String value is copied from JacksonUtils.toJsonNode in Avro 1.11.4
+      return new String((byte[]) defaultValue, StandardCharsets.ISO_8859_1);
+    }
+    return defaultValue;
+  }
+
+  /**
+   * Creates a new Avro Schema.Field from an existing field, with special handling for
+   * default values to ensure compatibility with Avro 1.12.0 and later versions.
+   *
+   * @param field the original Schema.Field to create a new field from
+   * @return a new Schema.Field with the same properties but properly formatted default value
+   */
+  public static Schema.Field createNewSchemaField(Schema.Field field) {
+    return createNewSchemaField(field.name(), field.schema(), field.doc(), field.defaultVal());
+  }
+
+  /**
+   * Creates a new Avro Schema.Field with special handling for default values to ensure
+   * compatibility with Avro 1.12.0 and later versions.
+   *
+   * <p>In Avro 1.12.0+, the validation of default values for bytes fields is stricter.
+   * When the default value is a byte array, it needs to be converted to a String using
+   * ISO-8859-1 encoding so that the correct JsonNode type (TextNode) is used for validation,
+   * rather than BinaryNode which would fail validation. Changes in Avro 1.12.0 that
+   * lead to this behavior: [AVRO-3876] https://github.com/apache/avro/pull/2529
+   *
+   * <p>This conversion ensures that schemas with bytes fields having default values
+   * can be properly constructed without AvroTypeException in Avro 1.12.0+.
+   *
+   * @param name         the name of the field
+   * @param schema       the schema of the field
+   * @param doc          the documentation for the field (can be null)
+   * @param defaultValue the default value for the field (can be null)
+   * @return a new Schema.Field with properly formatted default value for Avro 1.12.0+ compatibility
+   */
+  public static Schema.Field createNewSchemaField(String name, Schema schema, String doc, Object defaultValue) {
+    return new Schema.Field(name, schema, doc, convertDefaultValueForAvroCompatibility(defaultValue));
+  }
+
+  /**
+   * Creates a new Avro Schema.Field with special handling for default values to ensure
+   * compatibility with Avro 1.12.0 and later versions.
+   *
+   * <p>In Avro 1.12.0+, the validation of default values for bytes fields is stricter.
+   * When the default value is a byte array, it needs to be converted to a String using
+   * ISO-8859-1 encoding so that the correct JsonNode type (TextNode) is used for validation,
+   * rather than BinaryNode which would fail validation. Changes in Avro 1.12.0 that
+   * lead to this behavior: [AVRO-3876] https://github.com/apache/avro/pull/2529
+   *
+   * <p>This conversion ensures that schemas with bytes fields having default values
+   * can be properly constructed without AvroTypeException in Avro 1.12.0+.
+   *
+   * @param name         the name of the field
+   * @param schema       the schema of the field
+   * @param doc          the documentation for the field (can be null)
+   * @param defaultValue the default value for the field (can be null)
+   * @param order        the sort order for this field (can be null, defaults to ascending)
+   * @return a new Schema.Field with properly formatted default value for Avro 1.12.0+ compatibility
+   */
+  public static Schema.Field createNewSchemaField(String name, Schema schema, String doc, Object defaultValue, Order order) {
+    return new Schema.Field(name, schema, doc, convertDefaultValueForAvroCompatibility(defaultValue), order);
+  }
 }
@@ -637,6 +637,19 @@ public final List<StoragePath> getPartitionPaths() {
     }
   }
 
+  public final List<String> getPartitionNames() {
+    try {
+      readLock.lock();
+      return fetchAllStoredFileGroups()
+          .filter(fg -> !isFileGroupReplaced(fg))
+          .map(HoodieFileGroup::getPartitionPath)
+          .distinct()
+          .collect(Collectors.toList());
+    } finally {
+      readLock.unlock();
+    }
+  }
+
   @Override
   public final Stream<Pair<String, CompactionOperation>> getPendingLogCompactionOperations() {
     try {
 
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.metadata;
 
-import org.apache.hudi.avro.AvroSchemaUtils;
 import org.apache.hudi.avro.ConvertingGenericData;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 
@@ -89,6 +89,7 @@
 import java.util.stream.IntStream;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.avro.HoodieAvroUtils.createNewSchemaField;
 import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
 import static org.apache.hudi.common.util.ValidationUtils.checkState;
 
@@ -216,6 +217,7 @@ public class HoodieTestDataGenerator implements AutoCloseable {
   private final String[] partitionPaths;
   //maintains the count of existing keys schema wise
   private Map<String, Integer> numKeysBySchema;
+  private Option<Schema> extendedSchema = Option.empty();
 
   public HoodieTestDataGenerator(long seed) {
     this(seed, DEFAULT_PARTITION_PATHS, new HashMap<>());
Original file line number	Diff line number	Diff line change
`@@ -177,7 +177,7 @@ private StructType convertToStruct(MessageType messageType) {`
`177`	`177`
`178`	`178`	`@Override`
`179`	`179`	`public void close() {`
`180`		`- readerIterators.forEach(ParquetReaderIterator::close);`
	`180`	`+ readerIterators.forEach(it -> it.close());`
`181`	`181`	`}`
`182`	`182`
`183`	`183`	`@Override`