[ https://issues.apache.org/jira/browse/PARQUET-2347?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17768415#comment-17768415 ]
ASF GitHub Bot commented on PARQUET-2347: ----------------------------------------- amousavigourabi commented on code in PR #1141: URL: https://github.com/apache/parquet-mr/pull/1141#discussion_r1335213650 ########## parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftReadSupport.java: ########## @@ -254,29 +273,64 @@ public RecordMaterializer<T> prepareForRead(Configuration configuration, configuration); } - @SuppressWarnings("unchecked") + @Override + public RecordMaterializer<T> prepareForRead(ParquetConfiguration configuration, + Map<String, String> keyValueMetaData, MessageType fileSchema, + org.apache.parquet.hadoop.api.ReadSupport.ReadContext readContext) { + ThriftMetaData thriftMetaData = ThriftMetaData.fromExtraMetaData(keyValueMetaData); + try { + initThriftClass(thriftMetaData, configuration); + } catch (ClassNotFoundException e) { + throw new RuntimeException("Cannot find Thrift object class for metadata: " + thriftMetaData, e); + } + + // if there was not metadata in the file, get it from requested class + if (thriftMetaData == null) { + thriftMetaData = ThriftMetaData.fromThriftClass(thriftClass); + } + + String converterClassName = configuration.get(RECORD_CONVERTER_CLASS_KEY, RECORD_CONVERTER_DEFAULT); + return getRecordConverterInstance(converterClassName, thriftClass, + readContext.getRequestedSchema(), thriftMetaData.getDescriptor(), + configuration); + } + private static <T> ThriftRecordConverter<T> getRecordConverterInstance( String converterClassName, Class<T> thriftClass, MessageType requestedSchema, StructType descriptor, Configuration conf) { - Class<ThriftRecordConverter<T>> converterClass; + return getRecordConverterInstance(converterClassName, thriftClass, requestedSchema, descriptor, conf, Configuration.class); + } + + private static <T> ThriftRecordConverter<T> getRecordConverterInstance( + String converterClassName, Class<T> thriftClass, + MessageType requestedSchema, StructType descriptor, ParquetConfiguration conf) { + return getRecordConverterInstance(converterClassName, thriftClass, requestedSchema, descriptor, conf, ParquetConfiguration.class); + } + + @SuppressWarnings("unchecked") + private static <T1, T2> ThriftRecordConverter<T1> getRecordConverterInstance( Review Comment: Done > Add interface layer between Parquet and Hadoop Configuration > ------------------------------------------------------------ > > Key: PARQUET-2347 > URL: https://issues.apache.org/jira/browse/PARQUET-2347 > Project: Parquet > Issue Type: Improvement > Components: parquet-mr > Reporter: Atour Mousavi Gourabi > Priority: Minor > > Parquet relies heavily on a few Hadoop classes, such as its Configuration > class, which is used throughout Parquet's reading and writing logic. If we > include our own interface for this, this could potentially allow users to use > Parquet's readers and writers without the Hadoop dependency later on. > In order to preserve backward compatibility and avoid breaking downstream > projects, the constructors and methods using Hadoop's constructor should be > preserved for the time being, though I would favour deprecation in the near > future. > This is part of an effort that has been [discussed on the dev mailing > list|https://lists.apache.org/thread/4wl0l3d9dkpx4w69jx3rwnjk034dtqr8]. -- This message was sent by Atlassian Jira (v8.20.10#820010)