Repository: carbondata Updated Branches: refs/heads/master 52f8d7111 -> f910cfa98
[CARBONDATA-2276][SDK] Support API to read schema in data file and schema file This closes #2099 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f910cfa9 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f910cfa9 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f910cfa9 Branch: refs/heads/master Commit: f910cfa98d1d558779736e13e534bfff6c981172 Parents: 52f8d71 Author: Jacky Li <[email protected]> Authored: Sat Mar 24 22:38:20 2018 +0800 Committer: ravipesala <[email protected]> Committed: Mon Apr 2 14:28:58 2018 +0530 ---------------------------------------------------------------------- .../core/metadata/schema/SchemaReader.java | 2 +- .../core/reader/CarbonHeaderReader.java | 19 +++ .../apache/carbondata/core/util/CarbonUtil.java | 100 ++++++---------- .../carbondata/sdk/file/CarbonReader.java | 43 +++++++ .../sdk/file/CSVCarbonWriterTest.java | 110 +---------------- .../carbondata/sdk/file/CarbonReaderTest.java | 118 +++++++++++++++++++ .../apache/carbondata/sdk/file/TestUtil.java | 98 +++++++++++++++ 7 files changed, 322 insertions(+), 168 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/core/src/main/java/org/apache/carbondata/core/metadata/schema/SchemaReader.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/SchemaReader.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/SchemaReader.java index 787a9b9..54814cd 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/SchemaReader.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/SchemaReader.java @@ -87,7 +87,7 @@ public class SchemaReader { // Convert the ColumnSchema -> TableSchema -> TableInfo. // Return the TableInfo. org.apache.carbondata.format.TableInfo tableInfo = - CarbonUtil.inferSchema(identifier.getTablePath(), identifier, false); + CarbonUtil.inferSchema(identifier.getTablePath(), identifier.getTableName()); SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl(); TableInfo wrapperTableInfo = schemaConverter.fromExternalToWrapperTableInfo( tableInfo, identifier.getDatabaseName(), identifier.getTableName(), http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java b/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java index ab557cc..9bbdca9 100644 --- a/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java +++ b/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java @@ -17,9 +17,14 @@ package org.apache.carbondata.core.reader; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; import org.apache.carbondata.format.FileHeader; +import static org.apache.carbondata.core.util.CarbonUtil.thriftColumnSchmeaToWrapperColumnSchema; + import org.apache.thrift.TBase; /** @@ -65,4 +70,18 @@ public class CarbonHeaderReader { }); } + /** + * Read and return the schema in the header + */ + public List<ColumnSchema> readSchema() throws IOException { + FileHeader fileHeader = readHeader(); + List<ColumnSchema> columnSchemaList = new ArrayList<>(); + List<org.apache.carbondata.format.ColumnSchema> table_columns = fileHeader.getColumn_schema(); + for (org.apache.carbondata.format.ColumnSchema table_column : table_columns) { + ColumnSchema col = thriftColumnSchmeaToWrapperColumnSchema(table_column); + col.setColumnReferenceId(col.getColumnUniqueId()); + columnSchemaList.add(col); + } + return columnSchemaList; + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index 3c347db..8e21d46 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -86,8 +86,6 @@ import org.apache.carbondata.core.util.path.CarbonTablePath; import org.apache.carbondata.format.BlockletHeader; import org.apache.carbondata.format.DataChunk2; import org.apache.carbondata.format.DataChunk3; -import org.apache.carbondata.format.FileHeader; - import com.google.gson.Gson; import com.google.gson.GsonBuilder; @@ -2336,69 +2334,45 @@ public final class CarbonUtil { /** * This method will read the schema file from a given path * - * @param schemaFilePath - * @return + * @return table info containing the schema */ public static org.apache.carbondata.format.TableInfo inferSchema( - String carbonDataFilePath, AbsoluteTableIdentifier absoluteTableIdentifier, - boolean schemaExists) throws IOException { - TBaseCreator createTBase = new ThriftReader.TBaseCreator() { - public org.apache.thrift.TBase<org.apache.carbondata.format.TableInfo, - org.apache.carbondata.format.TableInfo._Fields> create() { - return new org.apache.carbondata.format.TableInfo(); - } - }; - if (schemaExists == false) { - List<String> filePaths = - getFilePathExternalFilePath(carbonDataFilePath + "/Fact/Part0/Segment_null"); - String fistFilePath = null; - try { - fistFilePath = filePaths.get(0); - } catch (Exception e) { - LOGGER.error("CarbonData file is not present in the table location"); - } - CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(fistFilePath); - FileHeader fileHeader = carbonHeaderReader.readHeader(); - List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>(); - List<org.apache.carbondata.format.ColumnSchema> table_columns = fileHeader.getColumn_schema(); - for (int i = 0; i < table_columns.size(); i++) { - ColumnSchema col = thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)); - col.setColumnReferenceId(col.getColumnUniqueId()); - columnSchemaList.add(col); - } - TableSchema tableSchema = new TableSchema(); - tableSchema.setTableName(absoluteTableIdentifier.getTableName()); - tableSchema.setBucketingInfo(null); - tableSchema.setSchemaEvalution(null); - tableSchema.setTableId(UUID.randomUUID().toString()); - tableSchema.setListOfColumns(columnSchemaList); - - ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter = - new ThriftWrapperSchemaConverterImpl(); - SchemaEvolutionEntry schemaEvolutionEntry = new SchemaEvolutionEntry(); - schemaEvolutionEntry.setTimeStamp(System.currentTimeMillis()); - SchemaEvolution schemaEvol = new SchemaEvolution(); - List<SchemaEvolutionEntry> schEntryList = new ArrayList<>(); - schEntryList.add(schemaEvolutionEntry); - schemaEvol.setSchemaEvolutionEntryList(schEntryList); - tableSchema.setSchemaEvalution(schemaEvol); - - org.apache.carbondata.format.TableSchema thriftFactTable = - thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema); - org.apache.carbondata.format.TableInfo tableInfo = - new org.apache.carbondata.format.TableInfo(thriftFactTable, - new ArrayList<org.apache.carbondata.format.TableSchema>()); - - tableInfo.setDataMapSchemas(null); - return tableInfo; - } else { - ThriftReader thriftReader = new ThriftReader(carbonDataFilePath, createTBase); - thriftReader.open(); - org.apache.carbondata.format.TableInfo tableInfo = - (org.apache.carbondata.format.TableInfo) thriftReader.read(); - thriftReader.close(); - return tableInfo; - } + String carbonDataFilePath, String tableName) throws IOException { + List<String> filePaths = + getFilePathExternalFilePath(carbonDataFilePath + "/Fact/Part0/Segment_null"); + String fistFilePath = null; + try { + fistFilePath = filePaths.get(0); + } catch (Exception e) { + LOGGER.error("CarbonData file is not present in the table location"); + } + CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(fistFilePath); + List<ColumnSchema> columnSchemaList = carbonHeaderReader.readSchema(); + TableSchema tableSchema = new TableSchema(); + tableSchema.setTableName(tableName); + tableSchema.setBucketingInfo(null); + tableSchema.setSchemaEvalution(null); + tableSchema.setTableId(UUID.randomUUID().toString()); + tableSchema.setListOfColumns(columnSchemaList); + + ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter = + new ThriftWrapperSchemaConverterImpl(); + SchemaEvolutionEntry schemaEvolutionEntry = new SchemaEvolutionEntry(); + schemaEvolutionEntry.setTimeStamp(System.currentTimeMillis()); + SchemaEvolution schemaEvol = new SchemaEvolution(); + List<SchemaEvolutionEntry> schEntryList = new ArrayList<>(); + schEntryList.add(schemaEvolutionEntry); + schemaEvol.setSchemaEvolutionEntryList(schEntryList); + tableSchema.setSchemaEvalution(schemaEvol); + + org.apache.carbondata.format.TableSchema thriftFactTable = + thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema); + org.apache.carbondata.format.TableInfo tableInfo = + new org.apache.carbondata.format.TableInfo(thriftFactTable, + new ArrayList<org.apache.carbondata.format.TableSchema>()); + + tableInfo.setDataMapSchemas(null); + return tableInfo; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java index 8cb8b2c..210d516 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java @@ -20,8 +20,22 @@ package org.apache.carbondata.sdk.file; import java.io.IOException; import java.util.List; +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.core.metadata.converter.SchemaConverter; +import org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl; +import org.apache.carbondata.core.metadata.schema.table.TableInfo; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.reader.CarbonHeaderReader; +import org.apache.carbondata.core.util.CarbonUtil; + import org.apache.hadoop.mapreduce.RecordReader; +/** + * Reader for carbondata file + */ [email protected] [email protected] public class CarbonReader<T> { private List<RecordReader<Void, T>> readers; @@ -30,6 +44,9 @@ public class CarbonReader<T> { private int index; + /** + * Call {@link #builder(String)} to construct an instance + */ CarbonReader(List<RecordReader<Void, T>> readers) { if (readers.size() == 0) { throw new IllegalArgumentException("no reader"); @@ -39,6 +56,9 @@ public class CarbonReader<T> { this.currentReader = readers.get(0); } + /** + * Return true if has next row + */ public boolean hasNext() throws IOException, InterruptedException { if (currentReader.nextKeyValue()) { return true; @@ -54,11 +74,34 @@ public class CarbonReader<T> { } } + /** + * Read and return next row object + */ public T readNextRow() throws IOException, InterruptedException { return currentReader.getCurrentValue(); } + /** + * Return a new {@link CarbonReaderBuilder} instance + */ public static CarbonReaderBuilder builder(String tablePath) { return new CarbonReaderBuilder(tablePath); } + + /** + * Read carbondata file and return the schema + */ + public static List<ColumnSchema> readSchemaInDataFile(String dataFilePath) throws IOException { + CarbonHeaderReader reader = new CarbonHeaderReader(dataFilePath); + return reader.readSchema(); + } + + /** + * Read schmea file and return table info object + */ + public static TableInfo readSchemaFile(String schemaFilePath) throws IOException { + org.apache.carbondata.format.TableInfo tableInfo = CarbonUtil.readSchemaFile(schemaFilePath); + SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl(); + return schemaConverter.fromExternalToWrapperTableInfo(tableInfo, "", "", ""); + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java index 68663ec..eecbf5f 100644 --- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java @@ -47,7 +47,7 @@ public class CSVCarbonWriterTest { fields[0] = new Field("name", DataTypes.STRING); fields[1] = new Field("age", DataTypes.INT); - writeFilesAndVerify(new Schema(fields), path); + TestUtil.writeFilesAndVerify(new Schema(fields), path); FileUtils.deleteDirectory(new File(path)); } @@ -65,105 +65,7 @@ public class CSVCarbonWriterTest { .append("]") .toString(); - writeFilesAndVerify(Schema.parseJson(schema), path); - - FileUtils.deleteDirectory(new File(path)); - } - - private void writeFilesAndVerify(Schema schema, String path) { - writeFilesAndVerify(schema, path, null); - } - - private void writeFilesAndVerify(Schema schema, String path, String[] sortColumns) { - writeFilesAndVerify(100, schema, path, sortColumns, false, -1, -1); - } - - private void writeFilesAndVerify(Schema schema, String path, boolean persistSchema) { - writeFilesAndVerify(100, schema, path, null, persistSchema, -1, -1); - } - - /** - * Invoke CarbonWriter API to write carbon files and assert the file is rewritten - * @param rows number of rows to write - * @param schema schema of the file - * @param path local write path - * @param sortColumns sort columns - * @param persistSchema true if want to persist schema file - * @param blockletSize blockletSize in the file, -1 for default size - * @param blockSize blockSize in the file, -1 for default size - */ - private void writeFilesAndVerify(int rows, Schema schema, String path, String[] sortColumns, - boolean persistSchema, int blockletSize, int blockSize) { - try { - CarbonWriterBuilder builder = CarbonWriter.builder() - .withSchema(schema) - .outputPath(path); - if (sortColumns != null) { - builder = builder.sortBy(sortColumns); - } - if (persistSchema) { - builder = builder.persistSchemaFile(true); - } - if (blockletSize != -1) { - builder = builder.withBlockletSize(blockletSize); - } - if (blockSize != -1) { - builder = builder.withBlockSize(blockSize); - } - - CarbonWriter writer = builder.buildWriterForCSVInput(); - - for (int i = 0; i < rows; i++) { - writer.write(new String[]{"robot" + (i % 10), String.valueOf(i), String.valueOf((double) i / 2)}); - } - writer.close(); - } catch (IOException e) { - e.printStackTrace(); - Assert.fail(e.getMessage()); - } catch (InvalidLoadOptionException l) { - l.printStackTrace(); - Assert.fail(l.getMessage()); - } - - File segmentFolder = new File(CarbonTablePath.getSegmentPath(path, "null")); - Assert.assertTrue(segmentFolder.exists()); - - File[] dataFiles = segmentFolder.listFiles(new FileFilter() { - @Override public boolean accept(File pathname) { - return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT); - } - }); - Assert.assertNotNull(dataFiles); - Assert.assertTrue(dataFiles.length > 0); - } - - @Test - public void testWriteAndReadFiles() throws IOException, InterruptedException { - String path = "./testWriteFiles"; - FileUtils.deleteDirectory(new File(path)); - - Field[] fields = new Field[2]; - fields[0] = new Field("name", DataTypes.STRING); - fields[1] = new Field("age", DataTypes.INT); - - writeFilesAndVerify(new Schema(fields), path, true); - - File[] files = new File(path + "/Fact/Part0/Segment_null/").listFiles(new FilenameFilter() { - @Override public boolean accept(File dir, String name) { - return name.endsWith("carbondata"); - } - }); - - CarbonReader reader = CarbonReader.builder(path) - .projection(new String[]{"name", "age"}).build(); - - int i = 0; - while (reader.hasNext()) { - Object[] row = (Object[])reader.readNextRow(); - Assert.assertEquals("robot" + (i % 10), row[0]); - Assert.assertEquals(i, row[1]); - i++; - } + TestUtil.writeFilesAndVerify(Schema.parseJson(schema), path); FileUtils.deleteDirectory(new File(path)); } @@ -234,7 +136,7 @@ public class CSVCarbonWriterTest { fields[0] = new Field("name", DataTypes.STRING); fields[1] = new Field("age", DataTypes.INT); - writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, false, 1, 100); + TestUtil.writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, false, 1, 100); // TODO: implement reader to verify the number of blocklet in the file @@ -250,7 +152,7 @@ public class CSVCarbonWriterTest { fields[0] = new Field("name", DataTypes.STRING); fields[1] = new Field("age", DataTypes.INT); - writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, false, 2, 2); + TestUtil.writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, false, 2, 2); File segmentFolder = new File(CarbonTablePath.getSegmentPath(path, "null")); File[] dataFiles = segmentFolder.listFiles(new FileFilter() { @@ -273,7 +175,7 @@ public class CSVCarbonWriterTest { fields[0] = new Field("name", DataTypes.STRING); fields[1] = new Field("age", DataTypes.INT); - writeFilesAndVerify(new Schema(fields), path, new String[]{"name"}); + TestUtil.writeFilesAndVerify(new Schema(fields), path, new String[]{"name"}); // TODO: implement reader and verify the data is sorted @@ -294,7 +196,7 @@ public class CSVCarbonWriterTest { fields[0] = new Field("name", DataTypes.STRING); fields[1] = new Field("age", DataTypes.INT); - writeFilesAndVerify(new Schema(fields), path, true); + TestUtil.writeFilesAndVerify(new Schema(fields), path, true); String schemaFile = CarbonTablePath.getSchemaFilePath(path); Assert.assertTrue(new File(schemaFile).exists()); http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java new file mode 100644 index 0000000..14802e6 --- /dev/null +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.sdk.file; + +import java.io.File; +import java.io.FilenameFilter; +import java.io.IOException; +import java.util.List; + +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.metadata.schema.table.TableInfo; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; + +import org.apache.commons.io.FileUtils; +import org.junit.Assert; +import org.junit.Test; + +public class CarbonReaderTest { + + @Test + public void testWriteAndReadFiles() throws IOException, InterruptedException { + String path = "./testWriteFiles"; + FileUtils.deleteDirectory(new File(path)); + + Field[] fields = new Field[2]; + fields[0] = new Field("name", DataTypes.STRING); + fields[1] = new Field("age", DataTypes.INT); + + TestUtil.writeFilesAndVerify(new Schema(fields), path, true); + + CarbonReader reader = CarbonReader.builder(path) + .projection(new String[]{"name", "age"}).build(); + + int i = 0; + while (reader.hasNext()) { + Object[] row = (Object[])reader.readNextRow(); + Assert.assertEquals("robot" + (i % 10), row[0]); + Assert.assertEquals(i, row[1]); + i++; + } + + FileUtils.deleteDirectory(new File(path)); + } + + @Test + public void testReadSchemaFromDataFile() throws IOException { + String path = "./testWriteFiles"; + FileUtils.deleteDirectory(new File(path)); + + Field[] fields = new Field[2]; + fields[0] = new Field("name", DataTypes.STRING); + fields[1] = new Field("age", DataTypes.INT); + + TestUtil.writeFilesAndVerify(new Schema(fields), path, true); + + File[] dataFiles = new File(path + "/Fact/Part0/Segment_null/").listFiles(new FilenameFilter() { + @Override public boolean accept(File dir, String name) { + return name.endsWith("carbondata"); + } + }); + Assert.assertTrue(dataFiles != null); + Assert.assertTrue(dataFiles.length > 0); + List<ColumnSchema> columns = CarbonReader.readSchemaInDataFile(dataFiles[0].getAbsolutePath()); + Assert.assertTrue(columns.size() == 2); + Assert.assertEquals("name", columns.get(0).getColumnName()); + Assert.assertEquals("age", columns.get(1).getColumnName()); + Assert.assertEquals(DataTypes.STRING, columns.get(0).getDataType()); + Assert.assertEquals(DataTypes.INT, columns.get(1).getDataType()); + + FileUtils.deleteDirectory(new File(path)); + } + + @Test + public void testReadSchemaFromSchemaFile() throws IOException { + String path = "./testWriteFiles"; + FileUtils.deleteDirectory(new File(path)); + + Field[] fields = new Field[2]; + fields[0] = new Field("name", DataTypes.STRING); + fields[1] = new Field("age", DataTypes.INT); + + TestUtil.writeFilesAndVerify(new Schema(fields), path, true); + + File[] dataFiles = new File(path + "/Metadata").listFiles(new FilenameFilter() { + @Override public boolean accept(File dir, String name) { + return name.endsWith("schema"); + } + }); + Assert.assertTrue(dataFiles != null); + Assert.assertTrue(dataFiles.length > 0); + TableInfo tableInfo = CarbonReader.readSchemaFile(dataFiles[0].getAbsolutePath()); + Assert.assertEquals(2, tableInfo.getFactTable().getListOfColumns().size()); + + List<ColumnSchema> columns = tableInfo.getFactTable().getListOfColumns(); + Assert.assertEquals(2, columns.size()); + Assert.assertEquals("name", columns.get(0).getColumnName()); + Assert.assertEquals("age", columns.get(1).getColumnName()); + Assert.assertEquals(DataTypes.STRING, columns.get(0).getDataType()); + Assert.assertEquals(DataTypes.INT, columns.get(1).getDataType()); + + FileUtils.deleteDirectory(new File(path)); + } +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/store/sdk/src/test/java/org/apache/carbondata/sdk/file/TestUtil.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/TestUtil.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/TestUtil.java new file mode 100644 index 0000000..dcedf10 --- /dev/null +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/TestUtil.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.sdk.file; + +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; + +import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import org.junit.Assert; + +class TestUtil { + + static void writeFilesAndVerify(Schema schema, String path) { + writeFilesAndVerify(schema, path, null); + } + + static void writeFilesAndVerify(Schema schema, String path, String[] sortColumns) { + writeFilesAndVerify(100, schema, path, sortColumns, false, -1, -1); + } + + static void writeFilesAndVerify(Schema schema, String path, boolean persistSchema) { + writeFilesAndVerify(100, schema, path, null, persistSchema, -1, -1); + } + + /** + * Invoke CarbonWriter API to write carbon files and assert the file is rewritten + * @param rows number of rows to write + * @param schema schema of the file + * @param path local write path + * @param sortColumns sort columns + * @param persistSchema true if want to persist schema file + * @param blockletSize blockletSize in the file, -1 for default size + * @param blockSize blockSize in the file, -1 for default size + */ + static void writeFilesAndVerify(int rows, Schema schema, String path, String[] sortColumns, + boolean persistSchema, int blockletSize, int blockSize) { + try { + CarbonWriterBuilder builder = CarbonWriter.builder() + .withSchema(schema) + .outputPath(path); + if (sortColumns != null) { + builder = builder.sortBy(sortColumns); + } + if (persistSchema) { + builder = builder.persistSchemaFile(true); + } + if (blockletSize != -1) { + builder = builder.withBlockletSize(blockletSize); + } + if (blockSize != -1) { + builder = builder.withBlockSize(blockSize); + } + + CarbonWriter writer = builder.buildWriterForCSVInput(); + + for (int i = 0; i < rows; i++) { + writer.write(new String[]{"robot" + (i % 10), String.valueOf(i), String.valueOf((double) i / 2)}); + } + writer.close(); + } catch (IOException e) { + e.printStackTrace(); + Assert.fail(e.getMessage()); + } catch (InvalidLoadOptionException l) { + l.printStackTrace(); + Assert.fail(l.getMessage()); + } + + File segmentFolder = new File(CarbonTablePath.getSegmentPath(path, "null")); + Assert.assertTrue(segmentFolder.exists()); + + File[] dataFiles = segmentFolder.listFiles(new FileFilter() { + @Override public boolean accept(File pathname) { + return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT); + } + }); + Assert.assertNotNull(dataFiles); + Assert.assertTrue(dataFiles.length > 0); + } +}
