[GitHub] carbondata pull request #1840: [CARBONDATA-2054]Add an example: how to use C...

zzcclp Sun, 21 Jan 2018 21:44:04 -0800

Github user zzcclp commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/1840#discussion_r162852531
  
    --- Diff: 
examples/spark2/src/main/scala/org/apache/carbondata/examples/DStreamWithBatchTableExample.scala
 ---
    @@ -0,0 +1,228 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.examples
    +
    +import java.io.{File, PrintWriter}
    +import java.net.ServerSocket
    +
    +import org.apache.spark.rdd.RDD
    +import org.apache.spark.sql.{CarbonEnv, SaveMode, SparkSession}
    +import org.apache.spark.streaming.{Seconds, StreamingContext, Time}
    +
    +import org.apache.carbondata.core.constants.CarbonCommonConstants
    +import org.apache.carbondata.core.util.CarbonProperties
    +import org.apache.carbondata.core.util.path.{CarbonStorePath, 
CarbonTablePath}
    +
    +/**
    + * This example introduces how to use CarbonData batch load to integrate
    + * with Spark Streaming(it's DStream, not Spark Structured Streaming)
    + */
    +// scalastyle:off println
    +
    +case class DStreamData(id: Int, name: String, city: String, salary: Float)
    +
    +object DStreamWithBatchTableExample {
    +
    +  def main(args: Array[String]): Unit = {
    +
    +    // setup paths
    +    val rootPath = new File(this.getClass.getResource("/").getPath
    +                            + "../../../..").getCanonicalPath
    +    val storeLocation = s"$rootPath/examples/spark2/target/store"
    +    val warehouse = s"$rootPath/examples/spark2/target/warehouse"
    +    val metastoredb = s"$rootPath/examples/spark2/target"
    +    val checkpointPath =
    +      s"$rootPath/examples/spark2/target/spark_streaming_cp_" +
    +      System.currentTimeMillis().toString()
    +    val streamTableName = s"dstream_batch_table"
    +
    +    CarbonProperties.getInstance()
    +      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, 
"yyyy/MM/dd")
    +
    +    import org.apache.spark.sql.CarbonSession._
    +    val spark = SparkSession
    +      .builder()
    +      .master("local[4]")
    +      .appName("DStreamWithBatchTableExample")
    +      .config("spark.sql.warehouse.dir", warehouse)
    +      .getOrCreateCarbonSession(storeLocation, metastoredb)
    +
    +    spark.sparkContext.setLogLevel("WARN")
    +
    +    val requireCreateTable = true
    +
    +    if (requireCreateTable) {
    +      // drop table if exists previously
    +      spark.sql(s"DROP TABLE IF EXISTS ${ streamTableName }")
    +      // Create target carbon table and populate with initial data
    +      // set AUTO_LOAD_MERGE to true to compact segment automatically
    +      spark.sql(
    +        s"""
    +           | CREATE TABLE ${ streamTableName }(
    +           | id INT,
    +           | name STRING,
    +           | city STRING,
    +           | salary FLOAT
    +           | )
    +           | STORED BY 'carbondata'
    +           | TBLPROPERTIES(
    +           | 'sort_columns'='name',
    +           | 'dictionary_include'='city',
    --- End diff --
    
    remove 'MAJOR_COMPACTION_SIZE' and it's ok to add other four properties for 
this example.

---

[GitHub] carbondata pull request #1840: [CARBONDATA-2054]Add an example: how to use C...

Reply via email to