diff --git a/pinot-plugins/pinot-input-format/pinot-parquet/src/main/java/org/apache/pinot/plugin/inputformat/parquet/ParquetAvroRecordExtractor.java b/pinot-plugins/pinot-input-format/pinot-parquet/src/main/java/org/apache/pinot/plugin/inputformat/parquet/ParquetAvroRecordExtractor.java
index 8f8ba25a1cd3..8d8f282eef18 100644
--- a/pinot-plugins/pinot-input-format/pinot-parquet/src/main/java/org/apache/pinot/plugin/inputformat/parquet/ParquetAvroRecordExtractor.java
+++ b/pinot-plugins/pinot-input-format/pinot-parquet/src/main/java/org/apache/pinot/plugin/inputformat/parquet/ParquetAvroRecordExtractor.java
@@ -21,7 +21,6 @@
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.avro.Schema;
-import org.apache.parquet.schema.PrimitiveType;
import org.apache.pinot.plugin.inputformat.avro.AvroRecordExtractor;
import org.apache.pinot.spi.data.readers.RecordExtractorConfig;
@@ -40,10 +39,10 @@ protected Object transformValue(Object value, Schema.Field field) {
Object handleDeprecatedTypes(Object value, Schema.Field field) {
Schema.Type avroColumnType = field.schema().getType();
- if (avroColumnType == org.apache.avro.Schema.Type.UNION) {
- org.apache.avro.Schema nonNullSchema = null;
- for (org.apache.avro.Schema childFieldSchema : field.schema().getTypes()) {
- if (childFieldSchema.getType() != org.apache.avro.Schema.Type.NULL) {
+ if (avroColumnType == Schema.Type.UNION) {
+ Schema nonNullSchema = null;
+ for (Schema childFieldSchema : field.schema().getTypes()) {
+ if (childFieldSchema.getType() != Schema.Type.NULL) {
if (nonNullSchema == null) {
nonNullSchema = childFieldSchema;
} else {
@@ -51,10 +50,15 @@ Object handleDeprecatedTypes(Object value, Schema.Field field) {
}
}
}
+ assert nonNullSchema != null;
- //INT96 is deprecated. We convert to long as we do in the native parquet extractor.
- if (nonNullSchema.getName().equals(PrimitiveType.PrimitiveTypeName.INT96.name())) {
- return ParquetNativeRecordExtractor.convertInt96ToLong((byte[]) value);
+ // NOTE:
+ // INT96 is deprecated. We convert to long as we do in the native parquet extractor.
+ // See org.apache.parquet.avro.AvroSchemaConverter about how INT96 is converted into Avro schema.
+ // We have to rely on the doc to determine whether a field is INT96.
+ if (nonNullSchema.getType() == Schema.Type.FIXED && nonNullSchema.getFixedSize() == 12
+ && "INT96 represented as byte[12]".equals(nonNullSchema.getDoc())) {
+ return ParquetNativeRecordExtractor.convertInt96ToLong((byte[]) value);
}
}
return value;
diff --git a/pom.xml b/pom.xml
index c144e970cb1b..f194ad393ce8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -140,7 +140,7 @@
18.3.0
1.12.1
- 1.16.0
+ 1.17.0
1.9.8
2.8.1
1.3.2