add comment option fix code style
fixed testcase renew some files Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/868ada55 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/868ada55 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/868ada55 Branch: refs/heads/branch-0.1 Commit: 868ada5597275e079bbbd92076de9fa2c9485d65 Parents: 9b415d7 Author: lion-x <xlion....@gmail.com> Authored: Thu Sep 1 19:55:43 2016 +0800 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Thu Sep 22 10:08:36 2016 +0530 ---------------------------------------------------------------------- .../hadoop/test/util/StoreCreator.java | 4 +++ .../carbondata/spark/load/CarbonLoadModel.java | 30 ++++++++++++++++++++ .../carbondata/spark/load/CarbonLoaderUtil.java | 4 +++ .../spark/util/GlobalDictionaryUtil.scala | 9 ++++++ .../org/apache/spark/sql/CarbonSqlParser.scala | 2 +- .../execution/command/carbonTableSchema.scala | 3 ++ .../spark/src/test/resources/comment.csv | 5 ++++ .../dataload/TestLoadDataWithHiveSyntax.scala | 20 +++++++++++-- .../util/ExternalColumnDictionaryTestCase.scala | 1 + ...GlobalDictionaryUtilConcurrentTestCase.scala | 1 + .../util/GlobalDictionaryUtilTestCase.scala | 1 + .../api/dataloader/DataLoadModel.java | 13 +++++++++ .../processing/csvreaderstep/CsvInput.java | 2 ++ .../processing/csvreaderstep/CsvInputMeta.java | 30 ++++++++++++++++++++ .../csvreaderstep/UnivocityCsvParser.java | 3 ++ .../csvreaderstep/UnivocityCsvParserVo.java | 21 ++++++++++++++ .../dataprocessor/DataProcessTaskStatus.java | 14 +++++++++ .../dataprocessor/IDataProcessStatus.java | 4 +++ .../graphgenerator/GraphGenerator.java | 6 ++++ 19 files changed, 170 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java ---------------------------------------------------------------------- diff --git a/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java b/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java index a48e6ad..5867160 100644 --- a/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java +++ b/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java @@ -353,6 +353,8 @@ public class StoreCreator { GraphGenerator.blockInfo.put("qwqwq", new BlockDetails[] { blockDetails }); schmaModel.setBlocksID("qwqwq"); schmaModel.setEscapeCharacter("\\"); + schmaModel.setQuoteCharacter("\""); + schmaModel.setCommentCharacter("#"); info.setDatabaseName(databaseName); info.setTableName(tableName); @@ -460,6 +462,8 @@ public class StoreCreator { model.setBlocksID(schmaModel.getBlocksID()); model.setFactTimeStamp(readCurrentTime()); model.setEscapeCharacter(schmaModel.getEscapeCharacter()); + model.setQuoteCharacter(schmaModel.getQuoteCharacter()); + model.setCommentCharacter(schmaModel.getCommentCharacter()); if (null != loadMetadataDetails && !loadMetadataDetails.isEmpty()) { model.setLoadNames( CarbonDataProcessorUtil.getLoadNameFromLoadMetaDataDetails(loadMetadataDetails)); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java index c1a073d..68f3929 100644 --- a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java +++ b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java @@ -97,6 +97,16 @@ public class CarbonLoadModel implements Serializable { private String escapeChar; /** + * quote Char + */ + private String quoteChar; + + /** + * comment Char + */ + private String commentChar; + + /** * defines the string that should be treated as null while loadind data */ private String serializationNullFormat; @@ -322,6 +332,8 @@ public class CarbonLoadModel implements Serializable { copy.segmentId = segmentId; copy.serializationNullFormat = serializationNullFormat; copy.escapeChar = escapeChar; + copy.quoteChar = quoteChar; + copy.commentChar = commentChar; copy.maxColumns = maxColumns; return copy; } @@ -361,6 +373,8 @@ public class CarbonLoadModel implements Serializable { copyObj.segmentId = segmentId; copyObj.serializationNullFormat = serializationNullFormat; copyObj.escapeChar = escapeChar; + copyObj.quoteChar = quoteChar; + copyObj.commentChar = commentChar; copyObj.maxColumns = maxColumns; return copyObj; } @@ -531,6 +545,22 @@ public class CarbonLoadModel implements Serializable { this.serializationNullFormat = serializationNullFormat; } + public String getQuoteChar() { + return quoteChar; + } + + public void setQuoteChar(String quoteChar) { + this.quoteChar = quoteChar; + } + + public String getCommentChar() { + return commentChar; + } + + public void setCommentChar(String commentChar) { + this.commentChar = commentChar; + } + /** * @return */ http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java index 246597c..a8302c0 100644 --- a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java +++ b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java @@ -126,6 +126,8 @@ public final class CarbonLoaderUtil { } model.setBlocksID(schmaModel.getBlocksID()); model.setEscapeCharacter(schmaModel.getEscapeCharacter()); + model.setQuoteCharacter(schmaModel.getQuoteCharacter()); + model.setCommentCharacter(schmaModel.getCommentCharacter()); model.setTaskNo(loadModel.getTaskNo()); model.setFactTimeStamp(loadModel.getFactTimeStamp()); model.setMaxColumns(loadModel.getMaxColumns()); @@ -180,6 +182,8 @@ public final class CarbonLoaderUtil { schmaModel.setBlocksID(loadModel.getBlocksID()); schmaModel.setEscapeCharacter(loadModel.getEscapeChar()); + schmaModel.setQuoteCharacter(loadModel.getQuoteChar()); + schmaModel.setCommentCharacter(loadModel.getCommentChar()); SchemaInfo info = new SchemaInfo(); info.setDatabaseName(databaseName); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala index b96a826..7e60320 100644 --- a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala +++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala @@ -365,6 +365,15 @@ object GlobalDictionaryUtil extends Logging { .option("ignoreLeadingWhiteSpace", "false") .option("ignoreTrailingWhiteSpace", "false") .option("codec", "gzip") + .option("quote", { + if (StringUtils.isEmpty(carbonLoadModel.getQuoteChar)) { + "" + CSVWriter. DEFAULT_QUOTE_CHARACTER + } + else { + carbonLoadModel.getQuoteChar + } + }) + .option("comment", carbonLoadModel.getCommentChar) .load(carbonLoadModel.getFactFilePath) df } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala index 69d921f..5675603 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala @@ -997,7 +997,7 @@ class CarbonSqlParser() val supportedOptions = Seq("DELIMITER", "QUOTECHAR", "FILEHEADER", "ESCAPECHAR", "MULTILINE", "COMPLEX_DELIMITER_LEVEL_1", "COMPLEX_DELIMITER_LEVEL_2", "COLUMNDICT", "SERIALIZATION_NULL_FORMAT", - "ALL_DICTIONARY_PATH", "MAXCOLUMNS" + "ALL_DICTIONARY_PATH", "MAXCOLUMNS", "COMMENTCHAR" ) var isSupported = true val invalidOptions = StringBuilder.newBuilder http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala index 1e06165..2047872 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala @@ -1091,6 +1091,7 @@ private[sql] case class LoadTable( val quoteChar = partionValues.getOrElse("quotechar", "\"") val fileHeader = partionValues.getOrElse("fileheader", "") val escapeChar = partionValues.getOrElse("escapechar", "\\") + val commentchar = partionValues.getOrElse("commentchar", "#") val columnDict = partionValues.getOrElse("columndict", null) val serializationNullFormat = partionValues.getOrElse("serialization_null_format", "\\N") val allDictionaryPath = partionValues.getOrElse("all_dictionary_path", "") @@ -1107,6 +1108,8 @@ private[sql] case class LoadTable( val maxColumns = partionValues.getOrElse("maxcolumns", null) carbonLoadModel.setMaxColumns(maxColumns) carbonLoadModel.setEscapeChar(escapeChar) + carbonLoadModel.setQuoteChar(quoteChar) + carbonLoadModel.setCommentChar(commentchar) carbonLoadModel.setSerializationNullFormat("serialization_null_format" + "," + serializationNullFormat) if (delimiter.equalsIgnoreCase(complex_delimiter_level_1) || http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/integration/spark/src/test/resources/comment.csv ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/resources/comment.csv b/integration/spark/src/test/resources/comment.csv new file mode 100644 index 0000000..34ccb12 --- /dev/null +++ b/integration/spark/src/test/resources/comment.csv @@ -0,0 +1,5 @@ +.~carbon,.,~carbon,~carbon,~carbon,~carbon,~carbon,~carbon,~carbon +,carbon,,carbon,,carbon,,carbon,,carbon,,carbon,,carbon,,carbon +#?carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon +?carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon +".carbon,"carbon,"carbon,"carbon,"carbon,"carbon,"carbon,"carbon \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala index e017a62..071c40c 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala @@ -385,7 +385,7 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll { sql( """ LOAD DATA LOCAL INPATH './src/test/resources/datawithescapecharacter.csv' into table t3 - options ('DELIMITER'=',', 'QUOTECHAR'='\"','ESCAPECHAR'='\') + options ('DELIMITER'=',', 'QUOTECHAR'='"','ESCAPECHAR'='\') """ ) checkAnswer(sql("select count(*) from t3"), Seq(Row(21))) @@ -406,7 +406,7 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll { sql( """ LOAD DATA LOCAL INPATH './src/test/resources/datawithescapecharacter.csv' into table t3 - options ('DELIMITER'=',', 'QUOTECHAR'='\"','ESCAPECHAR'='@') + options ('DELIMITER'=',', 'QUOTECHAR'='"','ESCAPECHAR'='@') """ ) checkAnswer(sql("select count(*) from t3"), Seq(Row(21))) @@ -590,6 +590,21 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll { checkAnswer(sql("select * from carbontable1"), sql("select * from hivetable1")) } + test("test data loading with comment option") { + sql("drop table if exists comment_test") + sql( + "create table comment_test(imei string, age int, task bigint, num double, level decimal(10," + + "3), productdate timestamp, mark int, name string) STORED BY 'org.apache.carbondata.format'" + ) + sql( + "LOAD DATA local inpath './src/test/resources/comment.csv' INTO TABLE comment_test " + + "options('DELIMITER' = ',', 'QUOTECHAR' = '.', 'COMMENTCHAR' = '?','FILEHEADER'='imei,age,task,num,level,productdate,mark,name')" + ) + checkAnswer(sql("select imei from comment_test"),Seq(Row("\".carbon"),Row("#?carbon"), Row(""), + Row("~carbon,"))) + } + + override def afterAll { sql("drop table carbontable") sql("drop table hivetable") @@ -597,5 +612,6 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll { sql("drop table if exists mixed_header_test") sql("drop table carbontable1") sql("drop table hivetable1") + sql("drop table if exists comment_test") } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala index 459b99c..4e013bb 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala @@ -139,6 +139,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll carbonLoadModel.setComplexDelimiterLevel1("\\$") carbonLoadModel.setComplexDelimiterLevel2("\\:") carbonLoadModel.setColDictFilePath(extColFilePath) + carbonLoadModel.setQuoteChar("\""); carbonLoadModel } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala index 8468090..4108abe 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala @@ -70,6 +70,7 @@ class GlobalDictionaryUtilConcurrentTestCase extends QueryTest with BeforeAndAft carbonLoadModel.setComplexDelimiterLevel1("\\$") carbonLoadModel.setComplexDelimiterLevel2("\\:") carbonLoadModel.setStorePath(relation.tableMeta.storePath) + carbonLoadModel.setQuoteChar("\"") carbonLoadModel } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala index 05c2715..32beeee 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala @@ -69,6 +69,7 @@ class GlobalDictionaryUtilTestCase extends QueryTest with BeforeAndAfterAll { carbonLoadModel.setComplexDelimiterLevel1("\\$") carbonLoadModel.setComplexDelimiterLevel2("\\:") carbonLoadModel.setStorePath(relation.tableMeta.storePath) + carbonLoadModel.setQuoteChar("\"") carbonLoadModel } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java b/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java index 239457b..42a8382 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java +++ b/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java @@ -61,6 +61,10 @@ public class DataLoadModel { private String escapeCharacter; + private String quoteCharacter; + + private String commentCharacter; + private String maxColumns; /** * @return Returns the schemaInfo. @@ -200,6 +204,15 @@ public class DataLoadModel { this.escapeCharacter = escapeCharacter; } + public String getQuoteCharacter() { return quoteCharacter; } + + public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter; } + + public String getCommentCharacter() { return commentCharacter; } + + public void setCommentCharacter(String commentCharacter) { + this.commentCharacter = commentCharacter; + } /** * @return */ http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java index ab3dbd8..8db8ed4 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java +++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java @@ -455,6 +455,8 @@ public class CsvInput extends BaseStep implements StepInterface { csvParserVo.setNumberOfColumns(meta.getInputFields().length); csvParserVo.setEscapeCharacter(meta.getEscapeCharacter()); csvParserVo.setHeaderPresent(meta.isHeaderPresent()); + csvParserVo.setQuoteCharacter(meta.getQuoteCharacter()); + csvParserVo.setCommentCharacter(meta.getCommentCharacter()); String maxColumns = meta.getMaxColumns(); if(null != maxColumns) { csvParserVo.setMaxColumns(Integer.parseInt(maxColumns)); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java index 2e0dece..c5b801a 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java +++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java @@ -99,6 +99,10 @@ public class CsvInputMeta extends BaseStepMeta private String escapeCharacter; + private String quoteCharacter; + + private String commentCharacter; + private String maxColumns; public CsvInputMeta() { @@ -121,6 +125,8 @@ public class CsvInputMeta extends BaseStepMeta blocksID = ""; partitionID = ""; escapeCharacter ="\\"; + quoteCharacter = "\""; + commentCharacter = "#"; } private void readData(Node stepnode) throws KettleXMLException { @@ -157,6 +163,8 @@ public class CsvInputMeta extends BaseStepMeta blocksID = XMLHandler.getTagValue(stepnode, "blocksID"); partitionID = XMLHandler.getTagValue(stepnode, "partitionID"); escapeCharacter = XMLHandler.getTagValue(stepnode, "escapeCharacter"); + quoteCharacter = XMLHandler.getTagValue(stepnode, "quoteCharacter"); + commentCharacter = XMLHandler.getTagValue(stepnode, "commentCharacter"); maxColumns = XMLHandler.getTagValue(stepnode, "maxColumns"); Node fields = XMLHandler.getSubNode(stepnode, getXmlCode("FIELDS")); int nrfields = XMLHandler.countNodes(fields, getXmlCode("FIELD")); @@ -220,6 +228,8 @@ public class CsvInputMeta extends BaseStepMeta retval.append(" ").append(XMLHandler.addTagValue("blocksID", blocksID)); retval.append(" ").append(XMLHandler.addTagValue("partitionID", partitionID)); retval.append(" ").append(XMLHandler.addTagValue("escapeCharacter", escapeCharacter)); + retval.append(" ").append(XMLHandler.addTagValue("quoteCharacter", quoteCharacter)); + retval.append(" ").append(XMLHandler.addTagValue("commentCharacter", commentCharacter)); retval.append(" ").append(XMLHandler.addTagValue("maxColumns", maxColumns)); retval.append(" ").append(XMLHandler.openTag(getXmlCode("FIELDS"))).append(Const.CR); for (int i = 0; i < inputFields.length; i++) { @@ -273,6 +283,8 @@ public class CsvInputMeta extends BaseStepMeta blocksID = rep.getStepAttributeString(idStep, getRepCode("blocksID")); partitionID = rep.getStepAttributeString(idStep, getRepCode("partitionID")); escapeCharacter = rep.getStepAttributeString(idStep, getRepCode("escapeCharacter")); + quoteCharacter = rep.getStepAttributeString(idStep, getRepCode("quoteCharacter")); + commentCharacter = rep.getStepAttributeString(idStep, getRepCode("commentCharacter")); maxColumns = rep.getStepAttributeString(idStep, getRepCode("maxColumns")); int nrfields = rep.countNrStepAttributes(idStep, getRepCode("FIELD_NAME")); @@ -329,6 +341,10 @@ public class CsvInputMeta extends BaseStepMeta rep.saveStepAttribute(idTransformation, idStep, getRepCode("partitionID"), partitionID); rep.saveStepAttribute(idTransformation, idStep, getRepCode("escapeCharacter"), escapeCharacter); + rep.saveStepAttribute(idTransformation, idStep, getRepCode("quoteCharacter"), + quoteCharacter); + rep.saveStepAttribute(idTransformation, idStep, getRepCode("commentCharacter"), + commentCharacter); rep.saveStepAttribute(idTransformation, idStep, getRepCode("maxColumns"), maxColumns); for (int i = 0; i < inputFields.length; i++) { @@ -623,6 +639,16 @@ public class CsvInputMeta extends BaseStepMeta this.escapeCharacter = escapeCharacter; } + public String getQuoteCharacter() { return quoteCharacter; } + + public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter; } + + public String getCommentCharacter() { return commentCharacter; } + + public void setCommentCharacter(String commentCharacter) { + this.commentCharacter = commentCharacter; + } + public String getFileType() { return "CSV"; } @@ -828,6 +854,10 @@ public class CsvInputMeta extends BaseStepMeta partitionID = (String) entry.getValue(); } else if ("escapeCharacter".equals(attributeKey)) { escapeCharacter = (String) entry.getValue(); + } else if ("quoteCharacter".equals(attributeKey)) { + quoteCharacter = (String) entry.getValue(); + } else if ("commentCharacter".equals(attributeKey)) { + commentCharacter = (String) entry.getValue(); } else { throw new RuntimeException( "Unhandled metadata injection of attribute: " + attr.toString() + " - " + attr http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java index 51dc1bc..f72dd5b 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java +++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java @@ -97,6 +97,7 @@ public class UnivocityCsvParser { public void initialize() throws IOException { CsvParserSettings parserSettings = new CsvParserSettings(); parserSettings.getFormat().setDelimiter(csvParserVo.getDelimiter().charAt(0)); + parserSettings.getFormat().setComment(csvParserVo.getCommentCharacter().charAt(0)); parserSettings.setLineSeparatorDetectionEnabled(true); parserSettings.setMaxColumns( getMaxColumnsForParsing(csvParserVo.getNumberOfColumns(), csvParserVo.getMaxColumns())); @@ -104,6 +105,8 @@ public class UnivocityCsvParser { parserSettings.setIgnoreLeadingWhitespaces(false); parserSettings.setIgnoreTrailingWhitespaces(false); parserSettings.setSkipEmptyLines(false); + parserSettings.getFormat().setQuote(null == csvParserVo.getQuoteCharacter() ? + '\"':csvParserVo.getQuoteCharacter().charAt(0)); parserSettings.getFormat().setQuoteEscape(null == csvParserVo.getEscapeCharacter() ? '\\' : csvParserVo.getEscapeCharacter().charAt(0)); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java index 623cac3..5383309 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java +++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java @@ -71,6 +71,16 @@ public class UnivocityCsvParserVo { private String escapeCharacter; /** + * quote character; + */ + private String quoteCharacter; + + /** + * comment character; + */ + private String commentCharacter; + + /** * max number of columns configured by user to be parsed in a row */ private int maxColumns; @@ -187,6 +197,17 @@ public class UnivocityCsvParserVo { this.escapeCharacter = escapeCharacter; } + public String getQuoteCharacter() { return quoteCharacter; } + + public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter; } + + public String getCommentCharacter() { return commentCharacter; } + + public void setCommentCharacter(String commentCharacter) { + this.commentCharacter = commentCharacter; + } + + /** * @return */ http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java index bc09f89..a0fb157 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java +++ b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java @@ -87,6 +87,10 @@ public class DataProcessTaskStatus implements IDataProcessStatus, Serializable { private String escapeCharacter; + private String quoteCharacter; + + private String commentCharacter; + public DataProcessTaskStatus(String databaseName, String tableName) { this.databaseName = databaseName; this.tableName = tableName; @@ -283,4 +287,14 @@ public class DataProcessTaskStatus implements IDataProcessStatus, Serializable { public void setEscapeCharacter(String escapeCharacter) { this.escapeCharacter = escapeCharacter; } + + public String getQuoteCharacter() { return quoteCharacter; } + + public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter; } + + public String getCommentCharacter() { return commentCharacter; } + + public void setCommentCharacter(String commentCharacter) { + this.commentCharacter = commentCharacter; + } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java index 22cada5..56dfe00 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java +++ b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java @@ -189,4 +189,8 @@ public interface IDataProcessStatus { String getBlocksID(); String getEscapeCharacter(); + + String getQuoteCharacter(); + + String getCommentCharacter(); } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/868ada55/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java b/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java index f55c247..b10841f 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java +++ b/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java @@ -185,6 +185,8 @@ public class GraphGenerator { private String factStoreLocation; private String blocksID; private String escapeCharacter; + private String quoteCharacter; + private String commentCharacter; /** * task id, each spark task has a unique id */ @@ -219,6 +221,8 @@ public class GraphGenerator { this.isColumnar = Boolean.parseBoolean(CarbonCommonConstants.IS_COLUMNAR_STORAGE_DEFAULTVALUE); this.blocksID = dataLoadModel.getBlocksID(); this.taskNo = dataLoadModel.getTaskNo(); + this.quoteCharacter = dataLoadModel.getQuoteCharacter(); + this.commentCharacter = dataLoadModel.getCommentCharacter(); this.factTimeStamp = dataLoadModel.getFactTimeStamp(); this.segmentId = segmentId; this.escapeCharacter = dataLoadModel.getEscapeCharacter(); @@ -450,6 +454,8 @@ public class GraphGenerator { csvInputMeta.setBlocksID(this.blocksID); csvInputMeta.setPartitionID(this.partitionID); csvInputMeta.setEscapeCharacter(this.escapeCharacter); + csvInputMeta.setQuoteCharacter(this.quoteCharacter); + csvInputMeta.setCommentCharacter(this.commentCharacter); csvDataStep.setDraw(true); csvDataStep.setDescription("Read raw data from " + GraphGeneratorConstants.CSV_INPUT);