[GitHub] carbondata pull request #2181: [CARBONDATA-2355] Support run SQL on carbonda...

xubo245 Thu, 26 Apr 2018 19:21:07 -0700

Github user xubo245 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2181#discussion_r184578212
  
    --- Diff: 
examples/spark2/src/main/scala/org/apache/carbondata/examples/DirectSQLExample.scala
 ---
    @@ -0,0 +1,114 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.examples
    +
    +import java.io.File
    +
    +import org.apache.commons.io.FileUtils
    +
    +import org.apache.carbondata.core.metadata.datatype.DataTypes
    +import org.apache.carbondata.examples.util.ExampleUtils
    +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
    +
    +/**
    + * Running SQL on carbondata files directly
    + */
    +object DirectSQLExample {
    +
    +  // prepare SDK writer output
    +  def buildTestData(
    +      path: String,
    +      num: Int = 3,
    +      persistSchema: Boolean = false): Any = {
    +
    +    // getCanonicalPath gives path with \, so code expects /. Need to 
handle in code ?
    +    val writerPath = path.replace("\\", "/");
    +
    +    val fields: Array[Field] = new Array[Field](3)
    +    fields(0) = new Field("name", DataTypes.STRING)
    +    fields(1) = new Field("age", DataTypes.INT)
    +    fields(2) = new Field("height", DataTypes.DOUBLE)
    +
    +    try {
    +      val builder = CarbonWriter.builder()
    +      val writer =
    +        if (persistSchema) {
    +          builder.persistSchemaFile(true)
    +          builder.withSchema(new 
Schema(fields)).outputPath(writerPath).isTransactionalTable(true)
    +            .uniqueIdentifier(
    +              System.currentTimeMillis)
    +            .buildWriterForCSVInput()
    +        } else {
    +          builder.withSchema(new 
Schema(fields)).outputPath(writerPath).isTransactionalTable(true)
    +            .uniqueIdentifier(
    +              System.currentTimeMillis).withBlockSize(2)
    +            .buildWriterForCSVInput()
    +        }
    +      var i = 0
    +      var row = num
    +      while (i < row) {
    +        writer.write(Array[String]("robot" + i, String.valueOf(i), 
String.valueOf(i.toDouble / 2)))
    +        i += 1
    +      }
    +      writer.close()
    +    } catch {
    +      case ex: Exception => None
    +      case e => None
    +    }
    +  }
    +
    +  def cleanTestData(path: String): Unit = {
    +    FileUtils.deleteDirectory(new File(path))
    +  }
    +
    +  // scalastyle:off
    +  def main(args: Array[String]) {
    +    val cc = ExampleUtils.createCarbonSession("DirectSQLExample")
    +    val rootPath = new File(this.getClass.getResource("/").getPath
    +      + "../../../..").getCanonicalPath
    +    System.setProperty("path.target", s"$rootPath/examples/spark2/target")
    +    val path = s"$rootPath/examples/spark2/target/carbonFile/"
    +
    +    import cc._
    +    // 1. generate data file
    +    cleanTestData(path)
    +    buildTestData(path, 100)
    +    val readPath = path + "Fact/Part0/Segment_null"
    +
    +    println("Running SQL on carbon files directly")
    +    try {
    +      // 2. run queries
    +      // 2.1 read data
    +      sql("drop table if exists t1")
    +      sql(s"""CREATE TABLE t1 USING carbonfile OPTIONS (PATH '$readPath') 
""")
    +      println("Reading data by SDK:")
    +      sql("select count(*) from t1").show();
    +
    +      // 2.1 direct sql
    +      //TODO:  support more than one carbonfile
    +      sql(s"""select * FROM  carbonfile.`$readPath` limit 
10""".stripMargin).show()
    +      sql(s"""select count(*) FROM  carbonfile.`$readPath` 
""".stripMargin).show()
    +    } catch {
    +      case e: Exception => println(e.getMessage)
    --- End diff --
    
    ok, done

---

[GitHub] carbondata pull request #2181: [CARBONDATA-2355] Support run SQL on carbonda...

Reply via email to