Hi Community, I encounter a issue, the LONG_STRING_COLUMNS config for big strings not work. My env is spark2.3.2 + carbon 1.5.0
1. DDL Sql carbon.sql( s""" |CREATE TABLE IF NOT EXISTS product( |market_code STRING, |product_id LONG, |country_code STRING, |category_id LONG, |company_id LONG, |name STRING, |company STRING, |release_date STRING, |price DOUBLE, |version STRING, |description STRING, |ss_urls STRING, |size DOUBLE, |web_urls STRING, |created STRING, |content_rating STRING, |privacy_policy_url STRING, |last_updated STRING, |has_iap BOOLEAN, |status LONG, |current_release_date STRING, |original_price DOUBLE, |sensitive_status LONG, |artwork_url STRING, |slug STRING, |scrape_reviews BOOLEAN, |date_scraped STRING, |scrape_failed LONG, |dead BOOLEAN, |sku STRING, |req_version STRING, |req_device LONG, |has_game_center BOOLEAN, |is_mac BOOLEAN, |languages STRING, |support_url STRING, |license_url STRING, |link_apps STRING, |scrape_review_delay LONG, |requirements STRING, |app_store_notes STRING, |bundle_id STRING, |product_type LONG, |bundle_product_count LONG, |family_sharing BOOLEAN, |purchased_separately_price DOUBLE, |seller STRING, |required_devices STRING, |has_imsg BOOLEAN, |is_hidden_from_springboard BOOLEAN, |subtitle STRING, |promotional_text STRING, |editorial_badge_type STRING, |editorial_badge_name STRING, |only_32_bit BOOLEAN, |class STRING, |installs STRING, |require_os STRING, |downloads_chart_url STRING, |video_url STRING, |icon_url STRING, |banner_image_url STRING, |permissions STRING, |whats_new STRING, |related_apps STRING, |also_installed_apps STRING, |more_from_developer_apps STRING, |is_publisher_top BOOLEAN, |publisher_email STRING, |scrape_review_status LONG, |company_code STRING, |source STRING) |STORED BY 'carbondata' |TBLPROPERTIES( |'SORT_COLUMNS'='market_code, status, country_code, category_id, product_id, company_id', |'NO_INVERTED_INDEX'='name, company, release_date, artwork_url, slug, scrape_reviews, price, version, date_scraped, scrape_failed, sku, size, req_version, languages, created, support_url, license_url, scrape_review_delay, last_updated, bundle_id, bundle_product_count, family_sharing, purchased_separately_price, seller, required_devices, current_release_date, original_price, subtitle, promotional_text, editorial_badge_type, editorial_badge_name, installs, video_url, icon_url, banner_image_url, company_code, source', |'DICTIONARY_INCLUDE'='market_code,country_code', |'LONG_STRING_COLUMNS'='description, downloads_chart_url, permissions, whats_new, web_urls, related_apps, also_installed_apps, more_from_developer_apps, privacy_policy_url, publisher_email, ss_urls, link_apps, content_rating, requirements, app_store_notes', |'SORT_SCOPE'='LOCAL_SORT', |'CACHE_LEVEL'='BLOCKLET', |'TABLE_BLOCKSIZE'='256') """.stripMargin) 2. Table scala> carbon.sql("describe formatted product").show(200, truncate=false) 2018-10-10 21:24:34 STATISTIC QueryStatisticsRecorderImpl:212 - Time taken for Carbon Optimizer to optimize: 29 2018-10-10 21:24:35 ERROR CarbonUtil:141 - main Unable to unlock Table lock for table during table status updation +------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------+ |col_name |data_type |comment | +------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------+ |market_code |string |DICTIONARY, KEY COLUMN,null | |product_id |bigint |KEY COLUMN,null | |country_code |string |DICTIONARY, KEY COLUMN,null | |category_id |bigint |KEY COLUMN,null | |company_id |bigint |KEY COLUMN,null | |name |string |KEY COLUMN,NOINVERTEDINDEX,null | |company |string |KEY COLUMN,NOINVERTEDINDEX,null | |release_date |string |KEY COLUMN,NOINVERTEDINDEX,null | |price |double |MEASURE,null | |version |string |KEY COLUMN,NOINVERTEDINDEX,null | |description |string |KEY COLUMN,null | |ss_urls |string |KEY COLUMN,null | |size |double |MEASURE,null | |web_urls |string |KEY COLUMN,null | |created |string |KEY COLUMN,NOINVERTEDINDEX,null | |content_rating |string |KEY COLUMN,null | |privacy_policy_url |string |KEY COLUMN,null | |last_updated |string |KEY COLUMN,NOINVERTEDINDEX,null | |has_iap |boolean |MEASURE,null | |status |bigint |KEY COLUMN,null | |current_release_date |string |KEY COLUMN,NOINVERTEDINDEX,null | |original_price |double |MEASURE,null | |sensitive_status |bigint |MEASURE,null | |artwork_url |string |KEY COLUMN,NOINVERTEDINDEX,null | |slug |string |KEY COLUMN,NOINVERTEDINDEX,null | |scrape_reviews |boolean |MEASURE,null | |date_scraped |string |KEY COLUMN,NOINVERTEDINDEX,null | |scrape_failed |bigint |MEASURE,null | |dead |boolean |MEASURE,null | |sku |string |KEY COLUMN,NOINVERTEDINDEX,null | |req_version |string |KEY COLUMN,NOINVERTEDINDEX,null | |req_device |bigint |MEASURE,null | |has_game_center |boolean |MEASURE,null | |is_mac |boolean |MEASURE,null | |languages |string |KEY COLUMN,NOINVERTEDINDEX,null | |support_url |string |KEY COLUMN,NOINVERTEDINDEX,null | |license_url |string |KEY COLUMN,NOINVERTEDINDEX,null | |link_apps |string |KEY COLUMN,null | |scrape_review_delay |bigint |MEASURE,null | |requirements |string |KEY COLUMN,null | |app_store_notes |string |KEY COLUMN,null | |bundle_id |string |KEY COLUMN,NOINVERTEDINDEX,null | |product_type |bigint |MEASURE,null | |bundle_product_count |bigint |MEASURE,null | |family_sharing |boolean |MEASURE,null | |purchased_separately_price |double |MEASURE,null | |seller |string |KEY COLUMN,NOINVERTEDINDEX,null | |required_devices |string |KEY COLUMN,NOINVERTEDINDEX,null | |has_imsg |boolean |MEASURE,null | |is_hidden_from_springboard |boolean |MEASURE,null | |subtitle |string |KEY COLUMN,NOINVERTEDINDEX,null | |promotional_text |string |KEY COLUMN,NOINVERTEDINDEX,null | |editorial_badge_type |string |KEY COLUMN,NOINVERTEDINDEX,null | |editorial_badge_name |string |KEY COLUMN,NOINVERTEDINDEX,null | |only_32_bit |boolean |MEASURE,null | |class |string |KEY COLUMN,null | |installs |string |KEY COLUMN,NOINVERTEDINDEX,null | |require_os |string |KEY COLUMN,null | |downloads_chart_url |string |KEY COLUMN,null | |video_url |string |KEY COLUMN,NOINVERTEDINDEX,null | |icon_url |string |KEY COLUMN,NOINVERTEDINDEX,null | |banner_image_url |string |KEY COLUMN,NOINVERTEDINDEX,null | |permissions |string |KEY COLUMN,null | |whats_new |string |KEY COLUMN,null | |related_apps |string |KEY COLUMN,null | |also_installed_apps |string |KEY COLUMN,null | |more_from_developer_apps |string |KEY COLUMN,null | |is_publisher_top |boolean |MEASURE,null | |publisher_email |string |KEY COLUMN,null | |scrape_review_status |bigint |MEASURE,null | |company_code |string |KEY COLUMN,NOINVERTEDINDEX,null | |source |string |KEY COLUMN,NOINVERTEDINDEX,null | | | | | |##Detailed Table Information | | | |Database Name |default | | |Table Name |product | | |CARBON Store Path |hdfs://ec2-dca-aa-p-sdn-16.appannie.org:9000/usr/carbon/data/default/product | | |Comment | | | |Table Block Size |256 MB | | |Table Data Size |0 | | |Table Index Size |0 | | |Last Update Time |0 | | |SORT_SCOPE |local_sort |local_sort | |CACHE_LEVEL |BLOCKLET | | |Streaming |false | | |LONG_STRING_COLUMNS |description, downloads_chart_url, permissions, whats_new, web_urls, related_apps, also_installed_apps, more_from_developer_apps, privacy_policy_url, publisher_email, ss_urls, link_apps, content_rating, requirements, app_store_notes| | |Local Dictionary Enabled |false | | | | | | |##Detailed Column property | | | |ADAPTIVE | | | |SORT_COLUMNS |market_code,status,country_code,category_id,product_id,company_id | | +------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------+ 3. Error Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1651) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1639) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1638) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1638) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1872) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1821) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1810) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2034) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2055) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2074) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2099) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:945) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) at org.apache.spark.rdd.RDD.collect(RDD.scala:944) at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadDataFrame(CarbonDataRDDFactory.scala:1120) at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:365) at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.loadData(CarbonLoadDataCommand.scala:591) at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.processData(CarbonLoadDataCommand.scala:316) at org.apache.spark.sql.execution.command.AtomicRunnableCommand.run(package.scala:92) at org.apache.spark.sql.CarbonDataFrameWriter.loadDataFrame(CarbonDataFrameWriter.scala:62) at org.apache.spark.sql.CarbonDataFrameWriter.writeToCarbonFile(CarbonDataFrameWriter.scala:46) at org.apache.spark.sql.CarbonDataFrameWriter.appendToCarbonFile(CarbonDataFrameWriter.scala:41) at org.apache.spark.sql.CarbonSource.createRelation(CarbonSource.scala:116) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:656) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:656) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:656) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:273) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:267) at com.appannie.CarbonImporter$$anonfun$product_load$1$$anonfun$apply$1.apply$mcVI$sp(CarbonImporter.scala:177) at com.appannie.CarbonImporter$$anonfun$product_load$1$$anonfun$apply$1.apply(CarbonImporter.scala:165) at com.appannie.CarbonImporter$$anonfun$product_load$1$$anonfun$apply$1.apply(CarbonImporter.scala:165) at scala.collection.immutable.List.foreach(List.scala:381) at com.appannie.CarbonImporter$$anonfun$product_load$1.apply(CarbonImporter.scala:165) at com.appannie.CarbonImporter$$anonfun$product_load$1.apply(CarbonImporter.scala:157) at scala.collection.immutable.List.foreach(List.scala:381) at com.appannie.CarbonImporter$.product_load(CarbonImporter.scala:157) at com.appannie.CarbonImporter$.main(CarbonImporter.scala:58) at com.appannie.CarbonImporter.main(CarbonImporter.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException: at org.apache.carbondata.processing.loading.sort.AbstractMergeSorter.checkError(AbstractMergeSorter.java:39) at org.apache.carbondata.processing.loading.sort.impl.UnsafeParallelReadMergeSorterImpl.sort(UnsafeParallelReadMergeSorterImpl.java:106) at org.apache.carbondata.processing.loading.steps.SortProcessorStepImpl.execute(SortProcessorStepImpl.java:55) at org.apache.carbondata.processing.loading.steps.DataWriterProcessorStepImpl.execute(DataWriterProcessorStepImpl.java:112) at org.apache.carbondata.processing.loading.DataLoadExecutor.execute(DataLoadExecutor.java:51) at org.apache.carbondata.spark.rdd.NewDataFrameLoaderRDD$$anon$2.<init>(NewCarbonDataLoadRDD.scala:352) at org.apache.carbondata.spark.rdd.NewDataFrameLoaderRDD.internalCompute(NewCarbonDataLoadRDD.scala:317) at org.apache.carbondata.spark.rdd.CarbonRDD.compute(CarbonRDD.scala:78) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.Exception: Dataload failed, String length cannot exceed 32000 characters at org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53) at org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:70) at org.apache.carbondata.spark.rdd.LazyRddIterator$$anonfun$next$2.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:486) at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) at org.apache.carbondata.spark.rdd.LazyRddIterator.next(NewCarbonDataLoadRDD.scala:485) at org.apache.carbondata.spark.rdd.LazyRddIterator.next(NewCarbonDataLoadRDD.scala:442) at org.apache.carbondata.processing.loading.steps.InputProcessorStepImpl$InputProcessorIterator.getBatch(InputProcessorStepImpl.java:226) at org.apache.carbondata.processing.loading.steps.InputProcessorStepImpl$InputProcessorIterator.next(InputProcessorStepImpl.java:186) at org.apache.carbondata.processing.loading.steps.InputProcessorStepImpl$InputProcessorIterator.next(InputProcessorStepImpl.java:112) at org.apache.carbondata.processing.loading.steps.DataConverterProcessorStepImpl$1.next(DataConverterProcessorStepImpl.java:190) at org.apache.carbondata.processing.loading.steps.DataConverterProcessorStepImpl$1.next(DataConverterProcessorStepImpl.java:176) at org.apache.carbondata.processing.loading.sort.impl.UnsafeParallelReadMergeSorterImpl$SortIteratorThread.run(UnsafeParallelReadMergeSorterImpl.java:196) ... 3 more -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/