This is an automated email from the ASF dual-hosted git repository. jenniferdai pushed a commit to branch remotefiles in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
commit c0ca7de32cdb6d42e192bb9f7fe013cc73a0013c Author: Jennifer Dai <[email protected]> AuthorDate: Mon Mar 11 14:34:02 2019 -0700 Adding abilitiy to set input file path to remote path --- .../core/indexsegment/generator/SegmentGeneratorConfig.java | 4 +++- .../maker/MetadataAndDictionaryAggregationPlanMakerTest.java | 4 ++-- .../index/creator/SegmentGenerationWithBytesTypeTest.java | 2 +- .../pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java | 2 +- .../java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java | 2 +- .../org/apache/pinot/queries/BaseSingleValueQueriesTest.java | 2 +- .../test/java/org/apache/pinot/queries/FastHllQueriesTest.java | 2 +- .../org/apache/pinot/segments/v1/creator/SegmentTestUtils.java | 6 +++--- .../main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java | 2 +- .../java/org/apache/pinot/hadoop/job/JobConfigConstants.java | 3 +++ .../apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java | 9 ++++++++- .../test/java/org/apache/pinot/server/util/SegmentTestUtils.java | 2 +- .../apache/pinot/tools/admin/command/CreateSegmentCommand.java | 2 +- .../tools/segment/converter/ColumnarToStarTreeConverter.java | 2 +- 14 files changed, 28 insertions(+), 16 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java b/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java index bc176bd..a43691c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java @@ -310,10 +310,12 @@ public class SegmentGeneratorConfig { return _inputFilePath; } - public void setInputFilePath(String inputFilePath) { + public void setInputFilePath(String inputFilePath, boolean isLocalPath) { Preconditions.checkNotNull(inputFilePath); File inputFile = new File(inputFilePath); + if (isLocalPath) { Preconditions.checkState(inputFile.exists(), "Input path {} does not exist.", inputFilePath); + } _inputFilePath = inputFile.getAbsolutePath(); } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java index 19c207b..e1a75d5 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java @@ -90,7 +90,7 @@ public class MetadataAndDictionaryAggregationPlanMakerTest { // Create the segment generator config. SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema); - segmentGeneratorConfig.setInputFilePath(filePath); + segmentGeneratorConfig.setInputFilePath(filePath, true); segmentGeneratorConfig.setTableName("testTable"); segmentGeneratorConfig.setSegmentName(SEGMENT_NAME); segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath()); @@ -116,7 +116,7 @@ public class MetadataAndDictionaryAggregationPlanMakerTest { // Create the segment generator config. segmentGeneratorConfig = new SegmentGeneratorConfig(schema); - segmentGeneratorConfig.setInputFilePath(filePath); + segmentGeneratorConfig.setInputFilePath(filePath, true); segmentGeneratorConfig.setTableName("testTableStarTree"); segmentGeneratorConfig.setSegmentName(SEGMENT_NAME_STARTREE); segmentGeneratorConfig.setOutDir(INDEX_DIR_STARTREE.getAbsolutePath()); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java index 6d6d810..bc4096e 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java @@ -320,7 +320,7 @@ public class SegmentGenerationWithBytesTypeTest { private IndexSegment buildSegmentFromAvro(Schema schema, String dirName, String avroName, String segmentName) throws Exception { SegmentGeneratorConfig config = new SegmentGeneratorConfig(); - config.setInputFilePath(dirName + File.separator + avroName); + config.setInputFilePath(dirName + File.separator + avroName, true); config.setOutDir(dirName); config.setSegmentName(segmentName); config.setSchema(schema); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java index ca390cb..67cabe8 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java @@ -124,7 +124,7 @@ public class SegmentWithHllIndexCreateHelper { new SegmentGeneratorConfig(SegmentTestUtils.extractSchemaFromAvroWithoutTime(inputAvro)); // set other fields in segmentGenConfig - segmentGenConfig.setInputFilePath(inputAvro.getAbsolutePath()); + segmentGenConfig.setInputFilePath(inputAvro.getAbsolutePath(), true); segmentGenConfig.setTimeColumnName(timeColumnName); segmentGenConfig.setSegmentTimeUnit(timeUnit); segmentGenConfig.setFormat(FileFormat.AVRO); diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java index d642210..791ee01 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java @@ -96,7 +96,7 @@ public abstract class BaseMultiValueQueriesTest extends BaseQueriesTest { // Create the segment generator config. SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema); - segmentGeneratorConfig.setInputFilePath(filePath); + segmentGeneratorConfig.setInputFilePath(filePath, true); segmentGeneratorConfig.setTableName("testTable"); segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath()); segmentGeneratorConfig.setInvertedIndexCreationColumns(Arrays.asList("column3", "column7", "column8", "column9")); diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java index b5b2176..b1465b1 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java @@ -98,7 +98,7 @@ public abstract class BaseSingleValueQueriesTest extends BaseQueriesTest { // Create the segment generator config. SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema); - segmentGeneratorConfig.setInputFilePath(filePath); + segmentGeneratorConfig.setInputFilePath(filePath, true); segmentGeneratorConfig.setTableName("testTable"); segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath()); segmentGeneratorConfig diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java index 57f0171..4ac4171 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java @@ -232,7 +232,7 @@ public class FastHllQueriesTest extends BaseQueriesTest { // Create the segment generator config SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schemaBuilder.build()); - segmentGeneratorConfig.setInputFilePath(filePath); + segmentGeneratorConfig.setInputFilePath(filePath, true); segmentGeneratorConfig.setTableName("testTable"); segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath()); segmentGeneratorConfig diff --git a/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java b/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java index 2dcaa4e..3c30b0b 100644 --- a/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java +++ b/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java @@ -62,7 +62,7 @@ public class SegmentTestUtils { throws IOException { SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(extractSchemaFromAvroWithoutTime(avroFile)); - segmentGeneratorConfig.setInputFilePath(avroFile.getAbsolutePath()); + segmentGeneratorConfig.setInputFilePath(avroFile.getAbsolutePath(), true); segmentGeneratorConfig.setOutDir(outputDir.getAbsolutePath()); segmentGeneratorConfig.setTableName(tableName); return segmentGeneratorConfig; @@ -73,7 +73,7 @@ public class SegmentTestUtils { throws IOException { final SegmentGeneratorConfig segmentGenSpec = new SegmentGeneratorConfig(extractSchemaFromAvroWithoutTime(inputAvro)); - segmentGenSpec.setInputFilePath(inputAvro.getAbsolutePath()); + segmentGenSpec.setInputFilePath(inputAvro.getAbsolutePath(), true); segmentGenSpec.setTimeColumnName(timeColumn); segmentGenSpec.setSegmentTimeUnit(timeUnit); segmentGenSpec.setFormat(FileFormat.AVRO); @@ -87,7 +87,7 @@ public class SegmentTestUtils { public static SegmentGeneratorConfig getSegmentGeneratorConfigWithSchema(File inputAvro, File outputDir, String tableName, Schema schema) { SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema); - segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath()); + segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath(), true); segmentGeneratorConfig.setOutDir(outputDir.getAbsolutePath()); segmentGeneratorConfig.setFormat(FileFormat.AVRO); segmentGeneratorConfig.setSegmentVersion(SegmentVersion.v1); diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java index 6585295..b8200db 100644 --- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java +++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java @@ -100,7 +100,7 @@ public class PinotRecordWriter<K, V> extends RecordWriter<K, V> { private void createSegment(String inputFile, SegmentIndexCreationDriver driver) { try { - _segmentConfig.setInputFilePath(inputFile); + _segmentConfig.setInputFilePath(inputFile, true); driver.init(_segmentConfig); driver.build(); } catch (Exception e) { diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java index 35557b5..1b67306 100644 --- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java +++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java @@ -52,4 +52,7 @@ public class JobConfigConstants { // The path to the record reader to be configured public static final String RECORD_READER_PATH = "record.reader.path"; + + // Path to segments, local v. on hdfs + public static final String USE_HDFS_PATH_FOR_SEGMENT_GEN = "use.hdfs.path.for.segment.gen"; } diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java index 21f3f16..469ac2e 100644 --- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java +++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java @@ -83,6 +83,8 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab protected FileSystem _fileSystem; + protected boolean _useHDFSPathForSegmentGen; + @Override public void setup(Context context) throws IOException, InterruptedException { @@ -103,6 +105,7 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab } _recordReaderPath = _jobConf.get(JobConfigConstants.RECORD_READER_PATH, null); + _useHDFSPathForSegmentGen = _jobConf.getBoolean(JobConfigConstants.USE_HDFS_PATH_FOR_SEGMENT_GEN, false); // Set up segment name generator String segmentNameGeneratorType = @@ -203,7 +206,11 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(_tableConfig, _schema); segmentGeneratorConfig.setTableName(_rawTableName); - segmentGeneratorConfig.setInputFilePath(localInputFile.getPath()); + if (_useHDFSPathForSegmentGen) { + segmentGeneratorConfig.setInputFilePath(hdfsInputFile.toString(), false); + } else { + segmentGeneratorConfig.setInputFilePath(localInputFile.getPath(), true); + } segmentGeneratorConfig.setOutDir(_localSegmentDir.getPath()); segmentGeneratorConfig.setSegmentNameGenerator(_segmentNameGenerator); segmentGeneratorConfig.setSequenceId(sequenceId); diff --git a/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java b/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java index be30064..27f613d 100644 --- a/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java +++ b/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java @@ -45,7 +45,7 @@ public class SegmentTestUtils { segmentGeneratorConfig = new SegmentGeneratorConfig(pinotSchema); } - segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath()); + segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath(), true); segmentGeneratorConfig.setSegmentTimeUnit(timeUnit); if (inputAvro.getName().endsWith("gz")) { segmentGeneratorConfig.setFormat(FileFormat.GZIPPED_AVRO); diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java index 95c1371..058e28e 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java @@ -370,7 +370,7 @@ public class CreateSegmentCommand extends AbstractBaseAdminCommand implements Co public void run() { try { SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentGeneratorConfig); - config.setInputFilePath(file.getAbsolutePath()); + config.setInputFilePath(file.getAbsolutePath(), true); config.setSegmentName(_segmentName + "_" + segCnt); config.loadConfigFiles(); diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java index 34094b9..42d2db7 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java @@ -111,7 +111,7 @@ public class ColumnarToStarTreeConverter { SegmentMetadata segmentMetadata = new SegmentMetadataImpl(columnarSegment); SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentMetadata.getSchema()); config.setDataDir(_inputDirName); - config.setInputFilePath(columnarSegment.getAbsolutePath()); + config.setInputFilePath(columnarSegment.getAbsolutePath(), true); config.setFormat(FileFormat.PINOT); config.setOutDir(_outputDirName); config.setOverwrite(_overwrite); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
