subject:"\[jira\] \[Comment Edited\] \(CARBONDATA\-906\) Always OOM error when import large dataset \(100milion rows\)"

[jira] [Comment Edited] (CARBONDATA-906) Always OOM error when import large dataset (100milion rows)

2017-04-12 Thread Crabo Yang (JIRA)


[ 
https://issues.apache.org/jira/browse/CARBONDATA-906?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15965585#comment-15965585
 ] 

Crabo Yang edited comment on CARBONDATA-906 at 4/12/17 9:18 AM:


1.oozie spark-opts

--jars 
rds.importer-1.0-SNAPSHOT.jar,carbondata_2.10-1.0.0-incubating-shade-hadoop2.6.0-cdh5.7.0.jar
 
--num-executors 12 --executor-cores 4 --executor-memory 13G
--conf spark.yarn.executor.memoryOverhead=5120
--conf spark.executor.heartbeatInterval=1000
--conf spark.network.timeout=1000


2.create script 
CREATE TABLE IF NOT EXISTS dmp_trade(id STRING,buyerNick STRING,buyerAlipayNO 
STRING,clientType STRING,sellerNick STRING,receiverName STRING,receiverMobile 
STRING,receiverPhone STRING,receiverCountry STRING,receiverState 
STRING,receiverCity STRING,receiverDistrict STRING,receiverTown 
STRING,receiverAddress STRING,receiverZip STRING,status STRING,tradeFrom 
STRING,type STRING,stepTradeStatus STRING,shippingType STRING,title 
STRING,buyerMessage STRING,buyerMemo STRING,rxAuditStatus STRING,buyerEmail 
STRING,picPath STRING,shopPick STRING,creditCardFee STRING,markDesc 
STRING,sellerMemo STRING,invoiceName STRING,invoiceType STRING,tradeAttr 
STRING,esRange STRING,esDate STRING,osDate STRING,osRange 
STRING,o2oSnatchStatus STRING,market STRING,etType STRING,obs 
STRING,tradeOriginalJson STRING,point STRING,omniAttr STRING,omniParam 
STRING,identity STRING,omnichannelParam STRING,assembly STRING,tradeId 
BIGINT,itemId BIGINT,platFormId INT,num INT,sellerFlag INT,naSource 
INT,etShopId INT,forbidConsign INT,buyerFlag INT,topHold INT,nvoiceKind 
INT,payment STRING,price STRING,totalFee STRING,discountFee STRING,postFee 
STRING,stepPaidFee STRING,adjustFee STRING,buyerCodFee STRING,orderTaxFee 
STRING,couponFee STRING,paidCouponFee STRING,sellerRate STRING,buyerRate 
STRING,postGateDeclare STRING,crossBondedDeclare STRING,hasBuyerMessage 
STRING,hasPostFee STRING,isShShip STRING,created TIMESTAMP,payTime 
TIMESTAMP,modified TIMESTAMP,endTime TIMESTAMP,consignTime TIMESTAMP,estConTime 
TIMESTAMP) STORED BY 'carbondata';

3.carbon.properties
#Mandatory. Carbon Store path
carbon.storelocation=hdfs://master.nascent.com:8020/Opt/CarbonStore
#Base directory for Data files
carbon.ddl.base.hdfs.url=hdfs://master.nascent.com:8020/opt/data
#Path where the bad records are stored
carbon.badRecords.location=/opt/Carbon/Spark/badrecords
#Mandatory. path to kettle home
carbon.kettle.home=/usr/lib/spark/carbonlib/carbonplugins

carbon.load.use.batch.sort=true
enable.unsafe.sort=true
offheap.sort.chunk.size.inmb=1024
carbon.load.batch.sort.size.inmb=450
#File read buffer size used during sorting(in MB) :MIN=1:MAX=100
carbon.sort.file.buffer.size=10
#Rowset size exchanged between data load graph steps :MIN=500:MAX=100
carbon.graph.rowset.size=1
#Number of cores to be used while data loading
carbon.number.of.cores.while.loading=6
#Record count to sort and write to temp intermediate files
carbon.sort.size=50
#Algorithm for hashmap for hashkey calculation
carbon.enableXXHash=true
#Number of cores to be used for block sort while dataloading
#carbon.number.of.cores.block.sort=7
#max level cache size upto which level cache will be loaded in memory
#carbon.max.level.cache.size=-1
#enable prefetch of data during merge sort while reading data from sort temp 
files in data loading
#carbon.merge.sort.prefetch=true

#Number of cores to be used while compacting
carbon.number.of.cores.while.compacting=8
#For minor compaction, Number of segments to be merged in stage 1, number of 
compacted segments to be merged in stage 2.
carbon.compaction.level.threshold=4,3
#default size (in MB) for major compaction to be triggered
carbon.major.compaction.size=1024
#Query Configuration
#Number of cores to be used for loading index into memory
carbon.number.of.cores=8
#Number of records to be in memory while querying :MIN=10:MAX=24
carbon.inmemory.record.size=12
#Improves the performance of filter query
carbon.enable.quick.filter=false
##number of core to load the blocks in driver
#no.of.cores.to.load.blocks.in.driver=10

#Extra Configuration
##Timestamp format of input data used for timestamp data type.
#carbon.timestamp.format=-MM-dd HH:mm:ss

##File write buffer size used during sorting.
#carbon.sort.file.write.buffer.size=10485760
##Locking mechanism for data loading on a table
carbon.lock.type=HDFSLOCK
##Minimum no of intermediate files after which sort merged to be started.
#carbon.sort.intermediate.files.limit=20
##space reserved in percentage for writing block meta data in carbon data file
#carbon.block.meta.size.reserved.percentage=10
##csv reading buffer size.
#carbon.csv.read.buffersize.byte=1048576
##To identify and apply compression for non-high cardinality columns
#high.cardinality.value=10
##maximum no of threads used for reading intermediate files for final merging.

[jira] [Comment Edited] (CARBONDATA-906) Always OOM error when import large dataset (100milion rows)

2017-04-12 Thread Crabo Yang (JIRA)


[ 
https://issues.apache.org/jira/browse/CARBONDATA-906?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15965585#comment-15965585
 ] 

Crabo Yang edited comment on CARBONDATA-906 at 4/12/17 9:17 AM:


1.oozie spark-opts

--jars 
rds.importer-1.0-SNAPSHOT.jar,carbondata_2.10-1.0.0-incubating-shade-hadoop2.6.0-cdh5.7.0.jar
 
--num-executors 12 --executor-cores 4 --executor-memory 13G
--conf spark.yarn.executor.memoryOverhead=5120
--conf spark.executor.heartbeatInterval=1000
--conf spark.network.timeout=1000


2.create script 
CREATE TABLE IF NOT EXISTS dmp_trade(id STRING,buyerNick STRING,buyerAlipayNO 
STRING,clientType STRING,sellerNick STRING,receiverName STRING,receiverMobile 
STRING,receiverPhone STRING,receiverCountry STRING,receiverState 
STRING,receiverCity STRING,receiverDistrict STRING,receiverTown 
STRING,receiverAddress STRING,receiverZip STRING,status STRING,tradeFrom 
STRING,type STRING,stepTradeStatus STRING,shippingType STRING,title 
STRING,buyerMessage STRING,buyerMemo STRING,rxAuditStatus STRING,buyerEmail 
STRING,picPath STRING,shopPick STRING,creditCardFee STRING,markDesc 
STRING,sellerMemo STRING,invoiceName STRING,invoiceType STRING,tradeAttr 
STRING,esRange STRING,esDate STRING,osDate STRING,osRange 
STRING,o2oSnatchStatus STRING,market STRING,etType STRING,obs 
STRING,tradeOriginalJson STRING,point STRING,omniAttr STRING,omniParam 
STRING,identity STRING,omnichannelParam STRING,assembly STRING,tradeId 
BIGINT,itemId BIGINT,platFormId INT,num INT,sellerFlag INT,naSource 
INT,etShopId INT,forbidConsign INT,buyerFlag INT,topHold INT,nvoiceKind 
INT,payment STRING,price STRING,totalFee STRING,discountFee STRING,postFee 
STRING,stepPaidFee STRING,adjustFee STRING,buyerCodFee STRING,orderTaxFee 
STRING,couponFee STRING,paidCouponFee STRING,sellerRate STRING,buyerRate 
STRING,postGateDeclare STRING,crossBondedDeclare STRING,hasBuyerMessage 
STRING,hasPostFee STRING,isShShip STRING,created TIMESTAMP,payTime 
TIMESTAMP,modified TIMESTAMP,endTime TIMESTAMP,consignTime TIMESTAMP,estConTime 
TIMESTAMP) STORED BY 'carbondata';

3.carbon.properties
#Mandatory. Carbon Store path
carbon.storelocation=hdfs://master.nascent.com:8020/Opt/CarbonStore
#Base directory for Data files
carbon.ddl.base.hdfs.url=hdfs://master.nascent.com:8020/opt/data
#Path where the bad records are stored
carbon.badRecords.location=/opt/Carbon/Spark/badrecords
#Mandatory. path to kettle home
carbon.kettle.home=/usr/lib/spark/carbonlib/carbonplugins

carbon.load.use.batch.sort=true
enable.unsafe.sort=true
offheap.sort.chunk.size.inmb=1024
carbon.load.batch.sort.size.inmb=450
#File read buffer size used during sorting(in MB) :MIN=1:MAX=100
carbon.sort.file.buffer.size=10
#Rowset size exchanged between data load graph steps :MIN=500:MAX=100
carbon.graph.rowset.size=1
#Number of cores to be used while data loading
carbon.number.of.cores.while.loading=6
#Record count to sort and write to temp intermediate files
carbon.sort.size=50
#Algorithm for hashmap for hashkey calculation
carbon.enableXXHash=true
#Number of cores to be used for block sort while dataloading
#carbon.number.of.cores.block.sort=7
#max level cache size upto which level cache will be loaded in memory
#carbon.max.level.cache.size=-1
#enable prefetch of data during merge sort while reading data from sort temp 
files in data loading
#carbon.merge.sort.prefetch=true

#Number of cores to be used while compacting
carbon.number.of.cores.while.compacting=8
#For minor compaction, Number of segments to be merged in stage 1, number of 
compacted segments to be merged in stage 2.
carbon.compaction.level.threshold=4,3
#default size (in MB) for major compaction to be triggered
carbon.major.compaction.size=1024
#Query Configuration
#Number of cores to be used for loading index into memory
carbon.number.of.cores=8
#Number of records to be in memory while querying :MIN=10:MAX=24
carbon.inmemory.record.size=12
#Improves the performance of filter query
carbon.enable.quick.filter=false
##number of core to load the blocks in driver
#no.of.cores.to.load.blocks.in.driver=10

#Extra Configuration
##Timestamp format of input data used for timestamp data type.
#carbon.timestamp.format=-MM-dd HH:mm:ss

##File write buffer size used during sorting.
#carbon.sort.file.write.buffer.size=10485760
##Locking mechanism for data loading on a table
carbon.lock.type=HDFSLOCK
##Minimum no of intermediate files after which sort merged to be started.
#carbon.sort.intermediate.files.limit=20
##space reserved in percentage for writing block meta data in carbon data file
#carbon.block.meta.size.reserved.percentage=10
##csv reading buffer size.
#carbon.csv.read.buffersize.byte=1048576
##To identify and apply compression for non-high cardinality columns
#high.cardinality.value=10
##maximum no of threads used for reading intermediate files for final merging.

[jira] [Comment Edited] (CARBONDATA-906) Always OOM error when import large dataset (100milion rows)

2017-04-12 Thread Crabo Yang (JIRA)


[ 
https://issues.apache.org/jira/browse/CARBONDATA-906?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15965585#comment-15965585
 ] 

Crabo Yang edited comment on CARBONDATA-906 at 4/12/17 9:16 AM:


1.oozie spark-opts

--jars 
rds.importer-1.0-SNAPSHOT.jar,carbondata_2.10-1.0.0-incubating-shade-hadoop2.6.0-cdh5.7.0.jar
 
--num-executors 12 --executor-cores 4 --executor-memory 13G
--conf spark.yarn.executor.memoryOverhead=5120
--conf spark.executor.heartbeatInterval=1000
--conf spark.network.timeout=1000


2.create script 
CREATE TABLE IF NOT EXISTS dmp_trade(id STRING,buyerNick STRING,buyerAlipayNO 
STRING,clientType STRING,sellerNick STRING,receiverName STRING,receiverMobile 
STRING,receiverPhone STRING,receiverCountry STRING,receiverState 
STRING,receiverCity STRING,receiverDistrict STRING,receiverTown 
STRING,receiverAddress STRING,receiverZip STRING,status STRING,tradeFrom 
STRING,type STRING,stepTradeStatus STRING,shippingType STRING,title 
STRING,buyerMessage STRING,buyerMemo STRING,rxAuditStatus STRING,buyerEmail 
STRING,picPath STRING,shopPick STRING,creditCardFee STRING,markDesc 
STRING,sellerMemo STRING,invoiceName STRING,invoiceType STRING,tradeAttr 
STRING,esRange STRING,esDate STRING,osDate STRING,osRange 
STRING,o2oSnatchStatus STRING,market STRING,etType STRING,obs 
STRING,tradeOriginalJson STRING,point STRING,omniAttr STRING,omniParam 
STRING,identity STRING,omnichannelParam STRING,assembly STRING,tradeId 
BIGINT,itemId BIGINT,platFormId INT,num INT,sellerFlag INT,naSource 
INT,etShopId INT,forbidConsign INT,buyerFlag INT,topHold INT,nvoiceKind 
INT,payment STRING,price STRING,totalFee STRING,discountFee STRING,postFee 
STRING,stepPaidFee STRING,adjustFee STRING,buyerCodFee STRING,orderTaxFee 
STRING,couponFee STRING,paidCouponFee STRING,sellerRate STRING,buyerRate 
STRING,postGateDeclare STRING,crossBondedDeclare STRING,hasBuyerMessage 
STRING,hasPostFee STRING,isShShip STRING,created TIMESTAMP,payTime 
TIMESTAMP,modified TIMESTAMP,endTime TIMESTAMP,consignTime TIMESTAMP,estConTime 
TIMESTAMP) STORED BY 'carbondata';

3.carbon.properties
#System Configuration
#Mandatory. Carbon Store path
carbon.storelocation=hdfs://master.nascent.com:8020/Opt/CarbonStore
#Base directory for Data files
carbon.ddl.base.hdfs.url=hdfs://master.nascent.com:8020/opt/data
#Path where the bad records are stored
carbon.badRecords.location=/opt/Carbon/Spark/badrecords
#Mandatory. path to kettle home
carbon.kettle.home=/usr/lib/spark/carbonlib/carbonplugins
#Performance Configuration
#DataLoading Configuration
carbon.load.use.batch.sort=true
enable.unsafe.sort=true
offheap.sort.chunk.size.inmb=1024
carbon.load.batch.sort.size.inmb=450
#File read buffer size used during sorting(in MB) :MIN=1:MAX=100
carbon.sort.file.buffer.size=10
#Rowset size exchanged between data load graph steps :MIN=500:MAX=100
carbon.graph.rowset.size=1
#Number of cores to be used while data loading
carbon.number.of.cores.while.loading=6
#Record count to sort and write to temp intermediate files
carbon.sort.size=50
#Algorithm for hashmap for hashkey calculation
carbon.enableXXHash=true
#Number of cores to be used for block sort while dataloading
#carbon.number.of.cores.block.sort=7
#max level cache size upto which level cache will be loaded in memory
#carbon.max.level.cache.size=-1
#enable prefetch of data during merge sort while reading data from sort temp 
files in data loading
#carbon.merge.sort.prefetch=true
#Compaction Configuration
#Number of cores to be used while compacting
carbon.number.of.cores.while.compacting=8
#For minor compaction, Number of segments to be merged in stage 1, number of 
compacted segments to be merged in stage 2.
carbon.compaction.level.threshold=4,3
#default size (in MB) for major compaction to be triggered
carbon.major.compaction.size=1024
#Query Configuration
#Number of cores to be used for loading index into memory
carbon.number.of.cores=8
#Number of records to be in memory while querying :MIN=10:MAX=24
carbon.inmemory.record.size=12
#Improves the performance of filter query
carbon.enable.quick.filter=false
##number of core to load the blocks in driver
#no.of.cores.to.load.blocks.in.driver=10

#Extra Configuration
##Timestamp format of input data used for timestamp data type.
#carbon.timestamp.format=-MM-dd HH:mm:ss
 Dataload Configuration 
##File write buffer size used during sorting.
#carbon.sort.file.write.buffer.size=10485760
##Locking mechanism for data loading on a table
carbon.lock.type=HDFSLOCK
##Minimum no of intermediate files after which sort merged to be started.
#carbon.sort.intermediate.files.limit=20
##space reserved in percentage for writing block meta data in carbon data file
#carbon.block.meta.size.reserved.percentage=10
##csv reading buffer size.
#carbon.csv.read.buffersize.byte=1048576
##To identify and apply compression for non-high cardinality

[jira] [Comment Edited] (CARBONDATA-906) Always OOM error when import large dataset (100milion rows)

2017-04-12 Thread Crabo Yang (JIRA)


[ 
https://issues.apache.org/jira/browse/CARBONDATA-906?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15965585#comment-15965585
 ] 

Crabo Yang edited comment on CARBONDATA-906 at 4/12/17 9:13 AM:


1.oozie spark-opts

--jars 
rds.importer-1.0-SNAPSHOT.jar,carbondata_2.10-1.0.0-incubating-shade-hadoop2.6.0-cdh5.7.0.jar
 
--num-executors 12 --executor-cores 4 --executor-memory 13G
--conf spark.yarn.executor.memoryOverhead=5120
--conf spark.executor.heartbeatInterval=1000
--conf spark.network.timeout=1000


2.create script 
CREATE TABLE IF NOT EXISTS dmp_trade(id STRING,buyerNick STRING,buyerAlipayNO 
STRING,clientType STRING,sellerNick STRING,receiverName STRING,receiverMobile 
STRING,receiverPhone STRING,receiverCountry STRING,receiverState 
STRING,receiverCity STRING,receiverDistrict STRING,receiverTown 
STRING,receiverAddress STRING,receiverZip STRING,status STRING,tradeFrom 
STRING,type STRING,stepTradeStatus STRING,shippingType STRING,title 
STRING,buyerMessage STRING,buyerMemo STRING,rxAuditStatus STRING,buyerEmail 
STRING,picPath STRING,shopPick STRING,creditCardFee STRING,markDesc 
STRING,sellerMemo STRING,invoiceName STRING,invoiceType STRING,tradeAttr 
STRING,esRange STRING,esDate STRING,osDate STRING,osRange 
STRING,o2oSnatchStatus STRING,market STRING,etType STRING,obs 
STRING,tradeOriginalJson STRING,point STRING,omniAttr STRING,omniParam 
STRING,identity STRING,omnichannelParam STRING,assembly STRING,tradeId 
BIGINT,itemId BIGINT,platFormId INT,num INT,sellerFlag INT,naSource 
INT,etShopId INT,forbidConsign INT,buyerFlag INT,topHold INT,nvoiceKind 
INT,payment STRING,price STRING,totalFee STRING,discountFee STRING,postFee 
STRING,stepPaidFee STRING,adjustFee STRING,buyerCodFee STRING,orderTaxFee 
STRING,couponFee STRING,paidCouponFee STRING,sellerRate STRING,buyerRate 
STRING,postGateDeclare STRING,crossBondedDeclare STRING,hasBuyerMessage 
STRING,hasPostFee STRING,isShShip STRING,created TIMESTAMP,payTime 
TIMESTAMP,modified TIMESTAMP,endTime TIMESTAMP,consignTime TIMESTAMP,estConTime 
TIMESTAMP) STORED BY 'carbondata';

3.carbon.properties
 System Configuration ##
#Mandatory. Carbon Store path
carbon.storelocation=hdfs://master.nascent.com:8020/Opt/CarbonStore
#Base directory for Data files
carbon.ddl.base.hdfs.url=hdfs://master.nascent.com:8020/opt/data
#Path where the bad records are stored
carbon.badRecords.location=/opt/Carbon/Spark/badrecords
#Mandatory. path to kettle home
carbon.kettle.home=/usr/lib/spark/carbonlib/carbonplugins

 Performance Configuration ##
 DataLoading Configuration 
##add by xzl
carbon.load.use.batch.sort=true
enable.unsafe.sort=true
offheap.sort.chunk.size.inmb=1024
carbon.load.batch.sort.size.inmb=450

#File read buffer size used during sorting(in MB) :MIN=1:MAX=100
carbon.sort.file.buffer.size=10
#Rowset size exchanged between data load graph steps :MIN=500:MAX=100
carbon.graph.rowset.size=1
#Number of cores to be used while data loading
carbon.number.of.cores.while.loading=6
#Record count to sort and write to temp intermediate files
carbon.sort.size=50
#Algorithm for hashmap for hashkey calculation
carbon.enableXXHash=true
#Number of cores to be used for block sort while dataloading
#carbon.number.of.cores.block.sort=7
#max level cache size upto which level cache will be loaded in memory
#carbon.max.level.cache.size=-1
#enable prefetch of data during merge sort while reading data from sort temp 
files in data loading
#carbon.merge.sort.prefetch=true
 Compaction Configuration 
#Number of cores to be used while compacting
carbon.number.of.cores.while.compacting=8
#For minor compaction, Number of segments to be merged in stage 1, number of 
compacted segments to be merged in stage 2.
carbon.compaction.level.threshold=4,3
#default size (in MB) for major compaction to be triggered
carbon.major.compaction.size=1024
 Query Configuration 
#Number of cores to be used for loading index into memory
carbon.number.of.cores=8
#Number of records to be in memory while querying :MIN=10:MAX=24
carbon.inmemory.record.size=12
#Improves the performance of filter query
carbon.enable.quick.filter=false
##number of core to load the blocks in driver
#no.of.cores.to.load.blocks.in.driver=10

 Extra Configuration ##
##Timestamp format of input data used for timestamp data type.
#carbon.timestamp.format=-MM-dd HH:mm:ss
 Dataload Configuration 
##File write buffer size used during sorting.
#carbon.sort.file.write.buffer.size=10485760
##Locking mechanism for data loading on a table
carbon.lock.type=HDFSLOCK
##Minimum no of intermediate files after which sort merged to be started.
#carbon.sort.intermediate.files.limit=20
##space reserved in percentage for writing block meta data in carbon data

[jira] [Comment Edited] (CARBONDATA-906) Always OOM error when import large dataset (100milion rows)

2017-04-12 Thread Crabo Yang (JIRA)


[ 
https://issues.apache.org/jira/browse/CARBONDATA-906?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15965585#comment-15965585
 ] 

Crabo Yang edited comment on CARBONDATA-906 at 4/12/17 9:11 AM:


1.oozie spark-opts

--jars 
rds.importer-1.0-SNAPSHOT.jar,carbondata_2.10-1.0.0-incubating-shade-hadoop2.6.0-cdh5.7.0.jar
 
--num-executors 12 --executor-cores 4 --executor-memory 13G
--conf spark.yarn.executor.memoryOverhead=5120
--conf spark.executor.heartbeatInterval=1000
--conf spark.network.timeout=1000


2.create script 
CREATE TABLE IF NOT EXISTS dmp_trade(id STRING,buyerNick STRING,buyerAlipayNO 
STRING,clientType STRING,sellerNick STRING,receiverName STRING,receiverMobile 
STRING,receiverPhone STRING,receiverCountry STRING,receiverState 
STRING,receiverCity STRING,receiverDistrict STRING,receiverTown 
STRING,receiverAddress STRING,receiverZip STRING,status STRING,tradeFrom 
STRING,type STRING,stepTradeStatus STRING,shippingType STRING,title 
STRING,buyerMessage STRING,buyerMemo STRING,rxAuditStatus STRING,buyerEmail 
STRING,picPath STRING,shopPick STRING,creditCardFee STRING,markDesc 
STRING,sellerMemo STRING,invoiceName STRING,invoiceType STRING,tradeAttr 
STRING,esRange STRING,esDate STRING,osDate STRING,osRange 
STRING,o2oSnatchStatus STRING,market STRING,etType STRING,obs 
STRING,tradeOriginalJson STRING,point STRING,omniAttr STRING,omniParam 
STRING,identity STRING,omnichannelParam STRING,assembly STRING,tradeId 
BIGINT,itemId BIGINT,platFormId INT,num INT,sellerFlag INT,naSource 
INT,etShopId INT,forbidConsign INT,buyerFlag INT,topHold INT,nvoiceKind 
INT,payment STRING,price STRING,totalFee STRING,discountFee STRING,postFee 
STRING,stepPaidFee STRING,adjustFee STRING,buyerCodFee STRING,orderTaxFee 
STRING,couponFee STRING,paidCouponFee STRING,sellerRate STRING,buyerRate 
STRING,postGateDeclare STRING,crossBondedDeclare STRING,hasBuyerMessage 
STRING,hasPostFee STRING,isShShip STRING,created TIMESTAMP,payTime 
TIMESTAMP,modified TIMESTAMP,endTime TIMESTAMP,consignTime TIMESTAMP,estConTime 
TIMESTAMP) STORED BY 'carbondata';

3.carbon.properties
 System Configuration ##
#Mandatory. Carbon Store path
carbon.storelocation=hdfs://master.nascent.com:8020/Opt/CarbonStore
#Base directory for Data files
carbon.ddl.base.hdfs.url=hdfs://master.nascent.com:8020/opt/data
#Path where the bad records are stored
carbon.badRecords.location=/opt/Carbon/Spark/badrecords
#Mandatory. path to kettle home
carbon.kettle.home=/usr/lib/spark/carbonlib/carbonplugins

 Performance Configuration ##
 DataLoading Configuration 
##add by xzl
carbon.load.use.batch.sort=true
enable.unsafe.sort=true
offheap.sort.chunk.size.inmb=1024
carbon.load.batch.sort.size.inmb=450

#File read buffer size used during sorting(in MB) :MIN=1:MAX=100
carbon.sort.file.buffer.size=10
#Rowset size exchanged between data load graph steps :MIN=500:MAX=100
carbon.graph.rowset.size=1
#Number of cores to be used while data loading
carbon.number.of.cores.while.loading=6
#Record count to sort and write to temp intermediate files
carbon.sort.size=50
#Algorithm for hashmap for hashkey calculation
carbon.enableXXHash=true
#Number of cores to be used for block sort while dataloading
#carbon.number.of.cores.block.sort=7
#max level cache size upto which level cache will be loaded in memory
#carbon.max.level.cache.size=-1
#enable prefetch of data during merge sort while reading data from sort temp 
files in data loading
#carbon.merge.sort.prefetch=true
 Compaction Configuration 
#Number of cores to be used while compacting
carbon.number.of.cores.while.compacting=8
#For minor compaction, Number of segments to be merged in stage 1, number of 
compacted segments to be merged in stage 2.
carbon.compaction.level.threshold=4,3
#default size (in MB) for major compaction to be triggered
carbon.major.compaction.size=1024
 Query Configuration 
#Number of cores to be used for loading index into memory
carbon.number.of.cores=8
#Number of records to be in memory while querying :MIN=10:MAX=24
carbon.inmemory.record.size=12
#Improves the performance of filter query
carbon.enable.quick.filter=false
##number of core to load the blocks in driver
#no.of.cores.to.load.blocks.in.driver=10

 Extra Configuration ##
##Timestamp format of input data used for timestamp data type.
#carbon.timestamp.format=-MM-dd HH:mm:ss
 Dataload Configuration 
##File write buffer size used during sorting.
#carbon.sort.file.write.buffer.size=10485760
##Locking mechanism for data loading on a table
carbon.lock.type=HDFSLOCK
##Minimum no of intermediate files after which sort merged to be started.
#carbon.sort.intermediate.files.limit=20
##space reserved in percentage for writing block meta data in carbon data

[jira] [Comment Edited] (CARBONDATA-906) Always OOM error when import large dataset (100milion rows)

[jira] [Comment Edited] (CARBONDATA-906) Always OOM error when import large dataset (100milion rows)

[jira] [Comment Edited] (CARBONDATA-906) Always OOM error when import large dataset (100milion rows)

[jira] [Comment Edited] (CARBONDATA-906) Always OOM error when import large dataset (100milion rows)

[jira] [Comment Edited] (CARBONDATA-906) Always OOM error when import large dataset (100milion rows)

5 matches

Site Navigation

Mail list logo

Footer information