[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

kunal642 Tue, 27 Nov 2018 22:26:13 -0800

Github user kunal642 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2951#discussion_r236647791
  
    --- Diff: 
integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/SparkCarbonDataSourceTestCase.scala
 ---
    @@ -0,0 +1,1267 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.carbondata.cluster.sdv.generated.dli
    +
    +
    +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, 
DataInputStream, File, InputStream}
    +
    +
    +
    +import scala.collection.mutable
    +
    +import org.apache.avro
    +import org.apache.avro.file.DataFileWriter
    +import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, 
GenericRecord}
    +import org.apache.avro.io.{DecoderFactory, Encoder}
    +import org.apache.spark.sql.{AnalysisException, Row}
    +import org.apache.spark.sql.common.util.QueryTest
    +import org.junit.Assert
    +import org.scalatest.BeforeAndAfterAll
    +
    +import org.apache.carbondata.core.datamap.DataMapStoreManager
    +import org.apache.carbondata.core.datastore.impl.FileFactory
    +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
    +import org.apache.carbondata.core.metadata.datatype.{DataTypes, 
StructField}
    +import org.apache.carbondata.hadoop.testutil.StoreCreator
    +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
    +
    +class SparkCarbonDataSourceTestCase extends QueryTest with 
BeforeAndAfterAll {
    +
    +  val rootPath = new File(this.getClass.getResource("/").getPath
    +    + "../../../..").getCanonicalPath
    +
    +  val warehouse1 = 
FileFactory.getPath(s"$rootPath/integration/spark-datasource/target/warehouse").toString
    +
    +  test("test write using dataframe") {
    +    import sqlContext.implicits._
    +    val df = sqlContext.sparkContext.parallelize(1 to 10)
    +      .map(x => ("a" + x % 10, "b", x))
    +      .toDF("c1", "c2", "number")
    +    sql("drop table if exists testformat")
    +    // Saves dataframe to carbon file
    +    df.write
    +      .format("carbon").saveAsTable("testformat")
    +    assert(sql("select * from testformat").count() == 10)
    +    assert(sql("select * from testformat where c1='a0'").count() == 1)
    +    assert(sql("select * from testformat").count() == 10)
    +    sql("drop table if exists testformat")
    +  }
    +
    +  test("test write using ddl") {
    +    import sqlContext.implicits._
    +    val df = sqlContext.sparkContext.parallelize(1 to 10)
    +      .map(x => ("a" + x % 10, "b", x))
    +      .toDF("c1", "c2", "number")
    +    sql("drop table if exists testparquet")
    +    sql("drop table if exists testformat")
    +    // Saves dataframe to carbon file
    +    df.write
    +      .format("parquet").saveAsTable("testparquet")
    +    sql("create table carbon_table(c1 string, c2 string, number int) using 
carbon")
    +    sql("insert into carbon_table select * from testparquet")
    +    checkAnswer(sql("select * from carbon_table where c1='a1'"), 
sql("select * from testparquet where c1='a1'"))
    +    if (!sqlContext.sparkContext.version.startsWith("2.1")) {
    +      val mapSize = DataMapStoreManager.getInstance().getAllDataMaps.size()
    +      DataMapStoreManager.getInstance()
    +        .clearDataMaps(AbsoluteTableIdentifier.from(warehouse1 + 
"/carbon_table"))
    +      assert(mapSize >= 
DataMapStoreManager.getInstance().getAllDataMaps.size())
    +    }
    +    sql("drop table if exists testparquet")
    +    sql("drop table if exists testformat")
    +  }
    +
    +  test("test read with df write") {
    +    
FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(warehouse1 + 
"/test_folder"))
    +    import sqlContext.implicits._
    +    val df = sqlContext.sparkContext.parallelize(1 to 10)
    +      .map(x => ("a" + x % 10, "b", x))
    +      .toDF("c1", "c2", "number")
    +
    +    // Saves dataframe to carbon file
    +    df.write.format("carbon").save(warehouse1 + "/test_folder/")
    +
    +    val frame = sqlContext.read.format("carbon").load(warehouse1 + 
"/test_folder")
    +    frame.show()
    --- End diff --
    
    remove all .show calls

---

[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

Reply via email to