Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2951#discussion_r236647791 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/SparkCarbonDataSourceTestCase.scala --- @@ -0,0 +1,1267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.cluster.sdv.generated.dli + + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, File, InputStream} + + + +import scala.collection.mutable + +import org.apache.avro +import org.apache.avro.file.DataFileWriter +import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} +import org.apache.avro.io.{DecoderFactory, Encoder} +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.common.util.QueryTest +import org.junit.Assert +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.datamap.DataMapStoreManager +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.datatype.{DataTypes, StructField} +import org.apache.carbondata.hadoop.testutil.StoreCreator +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema} + +class SparkCarbonDataSourceTestCase extends QueryTest with BeforeAndAfterAll { + + val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + + val warehouse1 = FileFactory.getPath(s"$rootPath/integration/spark-datasource/target/warehouse").toString + + test("test write using dataframe") { + import sqlContext.implicits._ + val df = sqlContext.sparkContext.parallelize(1 to 10) + .map(x => ("a" + x % 10, "b", x)) + .toDF("c1", "c2", "number") + sql("drop table if exists testformat") + // Saves dataframe to carbon file + df.write + .format("carbon").saveAsTable("testformat") + assert(sql("select * from testformat").count() == 10) + assert(sql("select * from testformat where c1='a0'").count() == 1) + assert(sql("select * from testformat").count() == 10) + sql("drop table if exists testformat") + } + + test("test write using ddl") { + import sqlContext.implicits._ + val df = sqlContext.sparkContext.parallelize(1 to 10) + .map(x => ("a" + x % 10, "b", x)) + .toDF("c1", "c2", "number") + sql("drop table if exists testparquet") + sql("drop table if exists testformat") + // Saves dataframe to carbon file + df.write + .format("parquet").saveAsTable("testparquet") + sql("create table carbon_table(c1 string, c2 string, number int) using carbon") + sql("insert into carbon_table select * from testparquet") + checkAnswer(sql("select * from carbon_table where c1='a1'"), sql("select * from testparquet where c1='a1'")) + if (!sqlContext.sparkContext.version.startsWith("2.1")) { + val mapSize = DataMapStoreManager.getInstance().getAllDataMaps.size() + DataMapStoreManager.getInstance() + .clearDataMaps(AbsoluteTableIdentifier.from(warehouse1 + "/carbon_table")) + assert(mapSize >= DataMapStoreManager.getInstance().getAllDataMaps.size()) + } + sql("drop table if exists testparquet") + sql("drop table if exists testformat") + } + + test("test read with df write") { + FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(warehouse1 + "/test_folder")) + import sqlContext.implicits._ + val df = sqlContext.sparkContext.parallelize(1 to 10) + .map(x => ("a" + x % 10, "b", x)) + .toDF("c1", "c2", "number") + + // Saves dataframe to carbon file + df.write.format("carbon").save(warehouse1 + "/test_folder/") + + val frame = sqlContext.read.format("carbon").load(warehouse1 + "/test_folder") + frame.show() --- End diff -- remove all .show calls
---