Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2341#discussion_r191317495
--- Diff:
store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java ---
@@ -381,7 +384,343 @@ public boolean accept(File pathname) {
, "boolField"
, "dateField"
, "timeField"
- , "decimalField"}).build();
+ , "decimalField"})
+ .build();
+
+ int i = 0;
+ while (reader.hasNext()) {
+ Object[] row = (Object[]) reader.readNextRow();
+ int id = (int) row[2];
+ Assert.assertEquals("robot" + (id % 10), row[0]);
+ Assert.assertEquals(Short.parseShort(String.valueOf(id)), row[1]);
+ Assert.assertEquals(Long.MAX_VALUE - id, row[3]);
+ Assert.assertEquals((double) id / 2, row[4]);
+ Assert.assertEquals(true, (boolean) row[5]);
+ long day = 24L * 3600 * 1000;
+ Assert.assertEquals("2019-03-02", new Date((day * ((int)
row[6]))).toString());
+ Assert.assertEquals("2019-02-12 03:03:34.0", new Timestamp((long)
row[7] / 1000).toString());
+ i++;
+ }
+ Assert.assertEquals(i, 100);
+
+ reader.close();
+ FileUtils.deleteDirectory(new File(path));
+
carbonProperties.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+ timestampFormat);
+
carbonProperties.addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ badRecordAction);
+
carbonProperties.addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
+ badRecordLoc);
+ }
+
+ @Test
+ public void testReadSchemaFileAndSort() throws IOException,
InterruptedException {
+ String timestampFormat =
carbonProperties.getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+ CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
+ String badRecordAction =
carbonProperties.getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT);
+ String badRecordLoc =
carbonProperties.getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
+ CarbonCommonConstants.CARBON_BADRECORDS_LOC_DEFAULT_VAL);
+ String rootPath = new File(this.getClass().getResource("/").getPath()
+ + "../../").getCanonicalPath();
+ String storeLocation = rootPath + "/target/";
+ carbonProperties
+ .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
storeLocation)
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
"yyyy-MM-dd hh:mm:ss")
+ .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
"REDIRECT");
+ String path = "./testWriteFiles";
+ FileUtils.deleteDirectory(new File(path));
+
+ Field[] fields = new Field[9];
+ fields[0] = new Field("stringField", DataTypes.STRING);
+ fields[1] = new Field("shortField", DataTypes.SHORT);
+ fields[2] = new Field("intField", DataTypes.INT);
+ fields[3] = new Field("longField", DataTypes.LONG);
+ fields[4] = new Field("doubleField", DataTypes.DOUBLE);
+ fields[5] = new Field("boolField", DataTypes.BOOLEAN);
+ fields[6] = new Field("dateField", DataTypes.DATE);
+ fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
+ fields[8] = new Field("decimalField", DataTypes.createDecimalType(8,
2));
+
+ try {
+ CarbonWriterBuilder builder = CarbonWriter.builder()
+ .isTransactionalTable(true)
+ .persistSchemaFile(true)
+ .outputPath(path);
+
+ CarbonWriter writer = builder.buildWriterForCSVInput(new
Schema(fields));
+
+ for (int i = 0; i < 100; i++) {
+ String[] row2 = new String[]{
+ "robot" + (i % 10),
+ String.valueOf(i),
+ String.valueOf(i),
+ String.valueOf(Long.MAX_VALUE - i),
+ String.valueOf((double) i / 2),
+ String.valueOf(true),
+ "2019-03-02",
+ "2019-02-12 03:03:34",
+ "12.345"
+ };
+ writer.write(row2);
+ }
+ writer.close();
+ } catch (Exception e) {
+ e.printStackTrace();
+ Assert.fail(e.getMessage());
+ }
+
+ File[] dataFiles = new File(path + "/Metadata").listFiles(new
FilenameFilter() {
+ @Override public boolean accept(File dir, String name) {
+ return name.endsWith("schema");
+ }
+ });
+ TableInfo tableInfo =
CarbonReader.readSchemaFile(dataFiles[0].getAbsolutePath());
+
+ List<ColumnSchema> columns =
tableInfo.getFactTable().getListOfColumns();
+
+ // sort the schema
+ Collections.sort(tableInfo.getFactTable().getListOfColumns(), new
Comparator<ColumnSchema>() {
+ @Override
+ public int compare(ColumnSchema o1, ColumnSchema o2) {
+ return Integer.compare(o1.getSchemaOrdinal(),
o2.getSchemaOrdinal());
+ }
+ });
+
+ // Transform the schema
+ String[] strings= new String[columns.size()];
+ for (int i = 0; i < columns.size(); i++) {
+ strings[i]= columns.get(i).getColumnName();
+ }
+
+ File segmentFolder = new File(CarbonTablePath.getSegmentPath(path,
"null"));
+ Assert.assertTrue(segmentFolder.exists());
+
+ Assert.assertNotNull(dataFiles);
+ Assert.assertTrue(dataFiles.length > 0);
+
+ CarbonReader reader = CarbonReader
+ .builder(path, "_temp")
+ .projection(strings)
+ .build();
+
+ int i = 0;
+ while (reader.hasNext()) {
+ Object[] row = (Object[]) reader.readNextRow();
+ int id = (int) row[2];
+ Assert.assertEquals("robot" + (id % 10), row[0]);
+ Assert.assertEquals(Short.parseShort(String.valueOf(id)), row[1]);
+ Assert.assertEquals(Long.MAX_VALUE - id, row[3]);
+ Assert.assertEquals((double) id / 2, row[4]);
+ Assert.assertEquals(true, (boolean) row[5]);
+ long day = 24L * 3600 * 1000;
+ Assert.assertEquals("2019-03-02", new Date((day * ((int)
row[6]))).toString());
+ Assert.assertEquals("2019-02-12 03:03:34.0", new Timestamp((long)
row[7] / 1000).toString());
+ i++;
+ }
+ Assert.assertEquals(i, 100);
+
+ reader.close();
+ FileUtils.deleteDirectory(new File(path));
+
carbonProperties.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+ timestampFormat);
+
carbonProperties.addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ badRecordAction);
+
carbonProperties.addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
+ badRecordLoc);
+ }
+
+ @Test
+ public void testReadSchemaInDataFileAndSort() throws IOException,
InterruptedException {
+ String timestampFormat =
carbonProperties.getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+ CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
+ String badRecordAction =
carbonProperties.getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT);
+ String badRecordLoc =
carbonProperties.getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
+ CarbonCommonConstants.CARBON_BADRECORDS_LOC_DEFAULT_VAL);
+ String rootPath = new File(this.getClass().getResource("/").getPath()
+ + "../../").getCanonicalPath();
+ String storeLocation = rootPath + "/target/";
+ carbonProperties
+ .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
storeLocation)
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
"yyyy-MM-dd hh:mm:ss")
+ .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
"REDIRECT");
+ String path = "./testWriteFiles";
+ FileUtils.deleteDirectory(new File(path));
+
+ Field[] fields = new Field[9];
+ fields[0] = new Field("stringField", DataTypes.STRING);
+ fields[1] = new Field("shortField", DataTypes.SHORT);
+ fields[2] = new Field("intField", DataTypes.INT);
+ fields[3] = new Field("longField", DataTypes.LONG);
+ fields[4] = new Field("doubleField", DataTypes.DOUBLE);
+ fields[5] = new Field("boolField", DataTypes.BOOLEAN);
+ fields[6] = new Field("dateField", DataTypes.DATE);
+ fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
+ fields[8] = new Field("decimalField", DataTypes.createDecimalType(8,
2));
+
+ try {
+ CarbonWriterBuilder builder = CarbonWriter.builder()
+ .isTransactionalTable(true)
+ .persistSchemaFile(true)
+ .outputPath(path);
+
+ CarbonWriter writer = builder.buildWriterForCSVInput(new
Schema(fields));
+
+ for (int i = 0; i < 100; i++) {
+ String[] row2 = new String[]{
+ "robot" + (i % 10),
+ String.valueOf(i),
+ String.valueOf(i),
+ String.valueOf(Long.MAX_VALUE - i),
+ String.valueOf((double) i / 2),
+ String.valueOf(true),
+ "2019-03-02",
+ "2019-02-12 03:03:34",
+ "12.345"
+ };
+ writer.write(row2);
+ }
+ writer.close();
+ } catch (Exception e) {
+ e.printStackTrace();
+ Assert.fail(e.getMessage());
+ }
+
+ File[] dataFiles2 = new File(path +
"/Fact/Part0/Segment_null/").listFiles(new FilenameFilter() {
+ @Override public boolean accept(File dir, String name) {
+ return name.endsWith("carbondata");
+ }
+ });
+
+ List<ColumnSchema> columns =
CarbonReader.readSchemaInDataFile(dataFiles2[0].getAbsolutePath());
+
+ // sort the schema
+ Collections.sort(columns, new Comparator<ColumnSchema>() {
+ @Override
+ public int compare(ColumnSchema o1, ColumnSchema o2) {
+ return Integer.compare(o1.getSchemaOrdinal(),
o2.getSchemaOrdinal());
+ }
+ });
+
+ // Transform the schema
+ String[] strings= new String[columns.size()];
+ for (int i = 0; i < columns.size(); i++) {
+ strings[i]= columns.get(i).getColumnName();
+ }
+
+ File segmentFolder = new File(CarbonTablePath.getSegmentPath(path,
"null"));
+ Assert.assertTrue(segmentFolder.exists());
+
+ CarbonReader reader = CarbonReader
+ .builder(path, "_temp")
+ .projection(strings)
+ .build();
+
+ int i = 0;
+ while (reader.hasNext()) {
+ Object[] row = (Object[]) reader.readNextRow();
+ int id = (int) row[2];
+ Assert.assertEquals("robot" + (id % 10), row[0]);
+ Assert.assertEquals(Short.parseShort(String.valueOf(id)), row[1]);
+ Assert.assertEquals(Long.MAX_VALUE - id, row[3]);
+ Assert.assertEquals((double) id / 2, row[4]);
+ Assert.assertEquals(true, (boolean) row[5]);
+ long day = 24L * 3600 * 1000;
+ Assert.assertEquals("2019-03-02", new Date((day * ((int)
row[6]))).toString());
+ Assert.assertEquals("2019-02-12 03:03:34.0", new Timestamp((long)
row[7] / 1000).toString());
+ i++;
+ }
+ Assert.assertEquals(i, 100);
+
+ reader.close();
+ FileUtils.deleteDirectory(new File(path));
+
carbonProperties.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+ timestampFormat);
+
carbonProperties.addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ badRecordAction);
+
carbonProperties.addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
+ badRecordLoc);
+ }
+
+ @Test
+ public void testReadUserSchema() throws IOException,
InterruptedException {
+ String timestampFormat =
carbonProperties.getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+ CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
+ String badRecordAction =
carbonProperties.getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT);
+ String badRecordLoc =
carbonProperties.getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
+ CarbonCommonConstants.CARBON_BADRECORDS_LOC_DEFAULT_VAL);
+ String rootPath = new File(this.getClass().getResource("/").getPath()
+ + "../../").getCanonicalPath();
+ String storeLocation = rootPath + "/target/";
+ carbonProperties
+ .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
storeLocation)
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
"yyyy-MM-dd hh:mm:ss")
+ .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
"REDIRECT");
+ String path = "./testWriteFiles";
+ FileUtils.deleteDirectory(new File(path));
+
+ Field[] fields = new Field[9];
+ fields[0] = new Field("stringField", DataTypes.STRING);
+ fields[1] = new Field("shortField", DataTypes.SHORT);
--- End diff --
@xubo245 Please add a JIRA to track the Complex DataType Schema Ordinal
Alternative, especially for the child columns.
---