luocooong commented on code in PR #2515:
URL: https://github.com/apache/drill/pull/2515#discussion_r851624724
##########
contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java:
##########
@@ -171,107 +168,109 @@ public HDF5ReaderConfig(HDF5FormatPlugin plugin,
HDF5FormatConfig formatConfig)
}
}
- public HDF5BatchReader(HDF5ReaderConfig readerConfig, int maxRecords) {
- this.readerConfig = readerConfig;
- this.maxRecords = maxRecords;
+ public HDF5BatchReader(HDF5ReaderConfig config, EasySubScan scan,
FileSchemaNegotiator negotiator) {
+ errorContext = negotiator.parentErrorContext();
+ file = negotiator.file();
+ readerConfig = config;
dataWriters = new ArrayList<>();
- this.showMetadataPreview = readerConfig.formatConfig.showPreview();
- }
+ showMetadataPreview = readerConfig.formatConfig.showPreview();
- @Override
- public boolean open(FileSchemaNegotiator negotiator) {
- split = negotiator.split();
- errorContext = negotiator.parentErrorContext();
// Since the HDF file reader uses a stream to actually read the file, the
file name from the
// module is incorrect.
- fileName = split.getPath().getName();
- try {
- openFile(negotiator);
- } catch (IOException e) {
- throw UserException
- .dataReadError(e)
- .addContext("Failed to close input file: %s", split.getPath())
- .addContext(errorContext)
- .build(logger);
- }
+ fileName = file.split().getPath().getName();
- ResultSetLoader loader;
- if (readerConfig.defaultPath == null) {
- // Get file metadata
- List<HDF5DrillMetadata> metadata = getFileMetadata(hdfFile, new
ArrayList<>());
- metadataIterator = metadata.iterator();
-
- // Schema for Metadata query
- SchemaBuilder builder = new SchemaBuilder()
- .addNullable(PATH_COLUMN_NAME, MinorType.VARCHAR)
- .addNullable(DATA_TYPE_COLUMN_NAME, MinorType.VARCHAR)
- .addNullable(FILE_NAME_COLUMN_NAME, MinorType.VARCHAR)
- .addNullable(DATA_SIZE_COLUMN_NAME, MinorType.BIGINT)
- .addNullable(IS_LINK_COLUMN_NAME, MinorType.BIT)
- .addNullable(ELEMENT_COUNT_NAME, MinorType.BIGINT)
- .addNullable(DATASET_DATA_TYPE_NAME, MinorType.VARCHAR)
- .addNullable(DIMENSIONS_FIELD_NAME, MinorType.VARCHAR);
-
- negotiator.tableSchema(builder.buildSchema(), false);
-
- loader = negotiator.build();
- dimensions = new int[0];
- rowWriter = loader.writer();
-
- } else {
- // This is the case when the default path is specified. Since the user
is explicitly asking for a dataset
- // Drill can obtain the schema by getting the datatypes below and
ultimately mapping that schema to columns
- Dataset dataSet = hdfFile.getDatasetByPath(readerConfig.defaultPath);
- dimensions = dataSet.getDimensions();
-
- loader = negotiator.build();
- rowWriter = loader.writer();
- writerSpec = new WriterSpec(rowWriter, negotiator.providedSchema(),
- negotiator.parentErrorContext());
- if (dimensions.length <= 1) {
- buildSchemaFor1DimensionalDataset(dataSet);
- } else if (dimensions.length == 2) {
- buildSchemaFor2DimensionalDataset(dataSet);
- } else {
- // Case for datasets of greater than 2D
- // These are automatically flattened
- buildSchemaFor2DimensionalDataset(dataSet);
+ { // Opens an HDF5 file
Review Comment:
Interesting thing, when we use EVF2, the batch reader only needs a
constructor to start all initialization operations. But we will write a lot of
code into that function, initialize the final variables, open files, define the
schema, and so on. So I used the code blocks to group these actions to make
them easier to read.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]