QiangCai commented on a change in pull request #3538: [CARBONDATA-3637]
Optimize insert into flow
URL: https://github.com/apache/carbondata/pull/3538#discussion_r378690024
##########
File path:
integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala
##########
@@ -140,6 +141,89 @@ object CarbonScalaUtil {
}
}
+ /**
+ * Converts incoming value to String after converting data as per the data
type.
+ *
+ * @param value Input value to convert
+ * @param dataType Datatype to convert and then convert to String
+ * @param timeStampFormat Timestamp format to convert in case of timestamp
datatypes
+ * @param dateFormat DataFormat to convert in case of DateType datatype
+ * @return converted String
+ */
+ def convertStaticPartitionToValues(
+ value: String,
+ dataType: DataType,
+ timeStampFormat: SimpleDateFormat,
+ dateFormat: SimpleDateFormat): AnyRef = {
+ val defaultValue = value != null &&
value.equalsIgnoreCase(hiveDefaultPartition)
+ try {
+ dataType match {
+ case TimestampType if timeStampFormat != null =>
+ val formattedString =
+ if (defaultValue) {
+ timeStampFormat.format(new Date())
+ } else {
+ timeStampFormat.format(DateTimeUtils.stringToTime(value))
+ }
+ val convertedValue =
+ DataTypeUtil
+ .getDataBasedOnDataType(formattedString,
+
CarbonSparkDataSourceUtil.convertSparkToCarbonDataType(TimestampType))
+ convertedValue
+ case DateType if dateFormat != null =>
+ val formattedString =
+ if (defaultValue) {
+ dateFormat.format(new Date())
+ } else {
+ dateFormat.format(DateTimeUtils.stringToTime(value))
+ }
+ val convertedValue =
+ DataTypeUtil
+ .getDataBasedOnDataType(formattedString,
+
CarbonSparkDataSourceUtil.convertSparkToCarbonDataType(TimestampType))
+ val date = generateDictionaryKey(convertedValue.asInstanceOf[Long])
+ date.asInstanceOf[AnyRef]
+ case BinaryType =>
+ // TODO: decode required ? currently it is working
+ ByteUtil.toBytes(value)
+ case _ =>
+ val convertedValue =
+ DataTypeUtil
+ .getDataBasedOnDataType(value,
+
CarbonSparkDataSourceUtil.convertSparkToCarbonDataType(dataType))
+ if (convertedValue == null) {
+ if (defaultValue) {
+ dataType match {
+ case BooleanType =>
+ return false.asInstanceOf[AnyRef]
+ case _ =>
+ return 0.asInstanceOf[AnyRef]
+ }
+ }
+ throw new MalformedCarbonCommandException(
+ s"Value $value with datatype $dataType on static partition is
not correct")
+ }
+ convertedValue
+ }
+ } catch {
+ case e: Exception =>
+ throw new MalformedCarbonCommandException(
+ s"Value $value with datatype $dataType on static partition is not
correct")
+ }
+ }
+
+ def generateDictionaryKey(timeValue: Long): Int = {
+ if (timeValue < DateDirectDictionaryGenerator.MIN_VALUE ||
+ timeValue > DateDirectDictionaryGenerator.MAX_VALUE) {
+ if (LOGGER.isDebugEnabled) {
+ LOGGER.debug("Value for date type column is not in valid range. Value
considered as null.")
+ }
+ return CarbonCommonConstants.DIRECT_DICT_VALUE_NULL
+ }
+ Math.floor(timeValue.toDouble /
DateDirectDictionaryGenerator.MILLIS_PER_DAY).toInt +
+ DateDirectDictionaryGenerator.cutOffDate
Review comment:
please fix the code style
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services