[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/2951 ---
[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2951#discussion_r237781038 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala --- @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.cluster.sdv.generated.datasource + +import java.io.File +import java.text.SimpleDateFormat +import java.util.{Date, Random} +import scala.collection.JavaConverters._ +import org.apache.commons.io.FileUtils +import org.apache.commons.lang.RandomStringUtils +import org.scalatest.BeforeAndAfterAll +import org.apache.spark.util.SparkUtil +import org.apache.carbondata.core.datastore.filesystem.CarbonFile +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.datatype.DataTypes +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataFileFooterConverter} +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema} +import org.apache.spark.sql.Row +import org.apache.spark.sql.common.util.QueryTest +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datamap.DataMapStoreManager +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter + +class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with BeforeAndAfterAll { + + override def beforeAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") + } + + override def afterAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") + } + + var writerPath = new File(this.getClass.getResource("/").getPath ++ +"../." + + "./src/test/resources/SparkCarbonFileFormat/WriterOutput/") +.getCanonicalPath + //getCanonicalPath gives path with \, but the code expects /. + writerPath = writerPath.replace("\\", "/"); + + def buildTestData(): Any = { + +FileUtils.deleteDirectory(new File(writerPath)) + +val schema = new StringBuilder() + .append("[ \n") + .append(" {\"name\":\"string\"},\n") + .append(" {\"age\":\"int\"},\n") + .append(" {\"height\":\"double\"}\n") + .append("]") + .toString() + +try { + val builder = CarbonWriter.builder() + val writer = + builder.outputPath(writerPath).withCsvInput(Schema.parseJson(schema)).writtenBy("CreateTableUsingSparkCarbonFileFormatTestCase").build() + var i = 0 + while (i < 100) { +writer.write(Array[String]("robot" + i, String.valueOf(i), String.valueOf(i.toDouble / 2))) +i += 1 + } + writer.close() +} catch { + case _: Throwable => None +} + } + + def cleanTestData() = { +FileUtils.deleteDirectory(new File(writerPath)) + } + + def deleteIndexFile(path: String, extension: String) : Unit = { +val file: CarbonFile = FileFactory + .getCarbonFile(path, FileFactory.getFileType(path)) + +for (eachDir <- file.listFiles) { + if (!eachDir.isDirectory) { +if (eachDir.getName.endsWith(extension)) { + CarbonUtil.deleteFoldersAndFilesSilent(eachDir) +} + } else { +deleteIndexFile(eachDir.getPath, extension) + } +} + } + + test("Running SQL directly and read carbondata files (sdk Writer Output) using the SparkCarbonFileFormat ") { +buildTestData() +assert(new File(writerPath).exists()) +sql("DROP TABLE IF EXISTS sdkOutputTable") + +//data source file format +if (SparkUtil.isSparkVersionEqualTo("2.1")) { + //data
[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2951#discussion_r237779541 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala --- @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.cluster.sdv.generated.datasource + +import java.io.File +import java.text.SimpleDateFormat +import java.util.{Date, Random} +import scala.collection.JavaConverters._ +import org.apache.commons.io.FileUtils +import org.apache.commons.lang.RandomStringUtils +import org.scalatest.BeforeAndAfterAll +import org.apache.spark.util.SparkUtil +import org.apache.carbondata.core.datastore.filesystem.CarbonFile +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.datatype.DataTypes +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataFileFooterConverter} +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema} +import org.apache.spark.sql.Row +import org.apache.spark.sql.common.util.QueryTest +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datamap.DataMapStoreManager +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter + +class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with BeforeAndAfterAll { + + override def beforeAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") + } + + override def afterAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") + } + + var writerPath = new File(this.getClass.getResource("/").getPath ++ +"../." + + "./src/test/resources/SparkCarbonFileFormat/WriterOutput/") +.getCanonicalPath + //getCanonicalPath gives path with \, but the code expects /. + writerPath = writerPath.replace("\\", "/"); + + def buildTestData(): Any = { + +FileUtils.deleteDirectory(new File(writerPath)) + +val schema = new StringBuilder() + .append("[ \n") + .append(" {\"name\":\"string\"},\n") + .append(" {\"age\":\"int\"},\n") + .append(" {\"height\":\"double\"}\n") + .append("]") + .toString() + +try { + val builder = CarbonWriter.builder() + val writer = + builder.outputPath(writerPath).withCsvInput(Schema.parseJson(schema)).writtenBy("CreateTableUsingSparkCarbonFileFormatTestCase").build() + var i = 0 + while (i < 100) { +writer.write(Array[String]("robot" + i, String.valueOf(i), String.valueOf(i.toDouble / 2))) +i += 1 + } + writer.close() +} catch { + case _: Throwable => None +} + } + + def cleanTestData() = { +FileUtils.deleteDirectory(new File(writerPath)) + } + + def deleteIndexFile(path: String, extension: String) : Unit = { +val file: CarbonFile = FileFactory + .getCarbonFile(path, FileFactory.getFileType(path)) + +for (eachDir <- file.listFiles) { + if (!eachDir.isDirectory) { +if (eachDir.getName.endsWith(extension)) { + CarbonUtil.deleteFoldersAndFilesSilent(eachDir) +} + } else { +deleteIndexFile(eachDir.getPath, extension) + } +} + } + + test("Running SQL directly and read carbondata files (sdk Writer Output) using the SparkCarbonFileFormat ") { +buildTestData() +assert(new File(writerPath).exists()) +sql("DROP TABLE IF EXISTS sdkOutputTable") + +//data source file format +if (SparkUtil.isSparkVersionEqualTo("2.1")) { + //data
[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2951#discussion_r237779383 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala --- @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.cluster.sdv.generated.datasource + +import java.io.File +import java.text.SimpleDateFormat +import java.util.{Date, Random} +import scala.collection.JavaConverters._ +import org.apache.commons.io.FileUtils +import org.apache.commons.lang.RandomStringUtils +import org.scalatest.BeforeAndAfterAll +import org.apache.spark.util.SparkUtil +import org.apache.carbondata.core.datastore.filesystem.CarbonFile +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.datatype.DataTypes +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataFileFooterConverter} +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema} +import org.apache.spark.sql.Row +import org.apache.spark.sql.common.util.QueryTest +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datamap.DataMapStoreManager +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter + +class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with BeforeAndAfterAll { + + override def beforeAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") + } + + override def afterAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") + } + + var writerPath = new File(this.getClass.getResource("/").getPath ++ +"../." + + "./src/test/resources/SparkCarbonFileFormat/WriterOutput/") +.getCanonicalPath + //getCanonicalPath gives path with \, but the code expects /. + writerPath = writerPath.replace("\\", "/"); + + def buildTestData(): Any = { + +FileUtils.deleteDirectory(new File(writerPath)) + +val schema = new StringBuilder() + .append("[ \n") + .append(" {\"name\":\"string\"},\n") + .append(" {\"age\":\"int\"},\n") + .append(" {\"height\":\"double\"}\n") + .append("]") + .toString() + +try { + val builder = CarbonWriter.builder() + val writer = + builder.outputPath(writerPath).withCsvInput(Schema.parseJson(schema)).writtenBy("CreateTableUsingSparkCarbonFileFormatTestCase").build() + var i = 0 + while (i < 100) { +writer.write(Array[String]("robot" + i, String.valueOf(i), String.valueOf(i.toDouble / 2))) +i += 1 + } + writer.close() +} catch { + case _: Throwable => None +} + } + + def cleanTestData() = { +FileUtils.deleteDirectory(new File(writerPath)) + } + + def deleteIndexFile(path: String, extension: String) : Unit = { +val file: CarbonFile = FileFactory + .getCarbonFile(path, FileFactory.getFileType(path)) + +for (eachDir <- file.listFiles) { + if (!eachDir.isDirectory) { +if (eachDir.getName.endsWith(extension)) { + CarbonUtil.deleteFoldersAndFilesSilent(eachDir) +} + } else { +deleteIndexFile(eachDir.getPath, extension) + } +} + } + + test("Running SQL directly and read carbondata files (sdk Writer Output) using the SparkCarbonFileFormat ") { +buildTestData() +assert(new File(writerPath).exists()) +sql("DROP TABLE IF EXISTS sdkOutputTable") + +//data source file format +if (SparkUtil.isSparkVersionEqualTo("2.1")) { + //data
[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2951#discussion_r237780471 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala --- @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.cluster.sdv.generated.datasource --- End diff -- Please format all the newly added code ---
[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2951#discussion_r237779458 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala --- @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.cluster.sdv.generated.datasource + +import java.io.File +import java.text.SimpleDateFormat +import java.util.{Date, Random} +import scala.collection.JavaConverters._ +import org.apache.commons.io.FileUtils +import org.apache.commons.lang.RandomStringUtils +import org.scalatest.BeforeAndAfterAll +import org.apache.spark.util.SparkUtil +import org.apache.carbondata.core.datastore.filesystem.CarbonFile +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.datatype.DataTypes +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataFileFooterConverter} +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema} +import org.apache.spark.sql.Row +import org.apache.spark.sql.common.util.QueryTest +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datamap.DataMapStoreManager +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter + +class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with BeforeAndAfterAll { + + override def beforeAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") + } + + override def afterAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") + } + + var writerPath = new File(this.getClass.getResource("/").getPath ++ +"../." + + "./src/test/resources/SparkCarbonFileFormat/WriterOutput/") +.getCanonicalPath + //getCanonicalPath gives path with \, but the code expects /. + writerPath = writerPath.replace("\\", "/"); + + def buildTestData(): Any = { + +FileUtils.deleteDirectory(new File(writerPath)) + +val schema = new StringBuilder() + .append("[ \n") + .append(" {\"name\":\"string\"},\n") + .append(" {\"age\":\"int\"},\n") + .append(" {\"height\":\"double\"}\n") + .append("]") + .toString() + +try { + val builder = CarbonWriter.builder() + val writer = + builder.outputPath(writerPath).withCsvInput(Schema.parseJson(schema)).writtenBy("CreateTableUsingSparkCarbonFileFormatTestCase").build() + var i = 0 + while (i < 100) { +writer.write(Array[String]("robot" + i, String.valueOf(i), String.valueOf(i.toDouble / 2))) +i += 1 + } + writer.close() +} catch { + case _: Throwable => None +} + } + + def cleanTestData() = { +FileUtils.deleteDirectory(new File(writerPath)) + } + + def deleteIndexFile(path: String, extension: String) : Unit = { +val file: CarbonFile = FileFactory + .getCarbonFile(path, FileFactory.getFileType(path)) + +for (eachDir <- file.listFiles) { + if (!eachDir.isDirectory) { +if (eachDir.getName.endsWith(extension)) { + CarbonUtil.deleteFoldersAndFilesSilent(eachDir) +} + } else { +deleteIndexFile(eachDir.getPath, extension) + } +} + } + + test("Running SQL directly and read carbondata files (sdk Writer Output) using the SparkCarbonFileFormat ") { +buildTestData() +assert(new File(writerPath).exists()) +sql("DROP TABLE IF EXISTS sdkOutputTable") + +//data source file format +if (SparkUtil.isSparkVersionEqualTo("2.1")) { + //data
[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2951#discussion_r236647791 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/SparkCarbonDataSourceTestCase.scala --- @@ -0,0 +1,1267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.cluster.sdv.generated.dli + + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, File, InputStream} + + + +import scala.collection.mutable + +import org.apache.avro +import org.apache.avro.file.DataFileWriter +import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} +import org.apache.avro.io.{DecoderFactory, Encoder} +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.common.util.QueryTest +import org.junit.Assert +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.datamap.DataMapStoreManager +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.datatype.{DataTypes, StructField} +import org.apache.carbondata.hadoop.testutil.StoreCreator +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema} + +class SparkCarbonDataSourceTestCase extends QueryTest with BeforeAndAfterAll { + + val rootPath = new File(this.getClass.getResource("/").getPath ++ "../../../..").getCanonicalPath + + val warehouse1 = FileFactory.getPath(s"$rootPath/integration/spark-datasource/target/warehouse").toString + + test("test write using dataframe") { +import sqlContext.implicits._ +val df = sqlContext.sparkContext.parallelize(1 to 10) + .map(x => ("a" + x % 10, "b", x)) + .toDF("c1", "c2", "number") +sql("drop table if exists testformat") +// Saves dataframe to carbon file +df.write + .format("carbon").saveAsTable("testformat") +assert(sql("select * from testformat").count() == 10) +assert(sql("select * from testformat where c1='a0'").count() == 1) +assert(sql("select * from testformat").count() == 10) +sql("drop table if exists testformat") + } + + test("test write using ddl") { +import sqlContext.implicits._ +val df = sqlContext.sparkContext.parallelize(1 to 10) + .map(x => ("a" + x % 10, "b", x)) + .toDF("c1", "c2", "number") +sql("drop table if exists testparquet") +sql("drop table if exists testformat") +// Saves dataframe to carbon file +df.write + .format("parquet").saveAsTable("testparquet") +sql("create table carbon_table(c1 string, c2 string, number int) using carbon") +sql("insert into carbon_table select * from testparquet") +checkAnswer(sql("select * from carbon_table where c1='a1'"), sql("select * from testparquet where c1='a1'")) +if (!sqlContext.sparkContext.version.startsWith("2.1")) { + val mapSize = DataMapStoreManager.getInstance().getAllDataMaps.size() + DataMapStoreManager.getInstance() +.clearDataMaps(AbsoluteTableIdentifier.from(warehouse1 + "/carbon_table")) + assert(mapSize >= DataMapStoreManager.getInstance().getAllDataMaps.size()) +} +sql("drop table if exists testparquet") +sql("drop table if exists testformat") + } + + test("test read with df write") { + FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(warehouse1 + "/test_folder")) +import sqlContext.implicits._ +val df = sqlContext.sparkContext.parallelize(1 to 10) + .map(x => ("a" + x % 10, "b", x)) + .toDF("c1", "c2", "number") + +// Saves dataframe to carbon file +df.write.format("carbon").save(warehouse1 + "/test_folder/") + +val frame =
[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2951#discussion_r236647192 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/SparkCarbonDataSourceTestCase.scala --- @@ -0,0 +1,1267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.cluster.sdv.generated.dli + + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, File, InputStream} + --- End diff -- Remove extra lines ---
[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2951#discussion_r236646305 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/CreateTableUsingSparkCarbonFileFormatTestCase.scala --- @@ -0,0 +1,484 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.cluster.sdv.generated.dli + +import java.io.File +import java.text.SimpleDateFormat +import java.util.{Date, Random} + +import scala.collection.JavaConverters._ + +import org.apache.commons.io.FileUtils +import org.apache.commons.lang.RandomStringUtils +import org.scalatest.BeforeAndAfterAll +import org.apache.spark.util.SparkUtil + +import org.apache.carbondata.core.datastore.filesystem.CarbonFile +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.datatype.DataTypes +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataFileFooterConverter} +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema} +import org.apache.spark.sql.Row +import org.apache.spark.sql.common.util.QueryTest + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datamap.DataMapStoreManager +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter + +class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with BeforeAndAfterAll { + --- End diff -- Dont leave blank lines ---
[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2951#discussion_r236646395 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/CreateTableUsingSparkCarbonFileFormatTestCase.scala --- @@ -0,0 +1,484 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.cluster.sdv.generated.dli + +import java.io.File +import java.text.SimpleDateFormat +import java.util.{Date, Random} + +import scala.collection.JavaConverters._ + +import org.apache.commons.io.FileUtils +import org.apache.commons.lang.RandomStringUtils +import org.scalatest.BeforeAndAfterAll +import org.apache.spark.util.SparkUtil + +import org.apache.carbondata.core.datastore.filesystem.CarbonFile +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.datatype.DataTypes +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataFileFooterConverter} +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema} +import org.apache.spark.sql.Row +import org.apache.spark.sql.common.util.QueryTest + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datamap.DataMapStoreManager +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter + +class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with BeforeAndAfterAll { + + + + override def beforeAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") + } + + override def afterAll(): Unit = { +CarbonProperties.getInstance() --- End diff -- Remove this line ---
[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2951#discussion_r236646951 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/CreateTableUsingSparkCarbonFileFormatTestCase.scala --- @@ -0,0 +1,484 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.cluster.sdv.generated.dli + +import java.io.File +import java.text.SimpleDateFormat +import java.util.{Date, Random} + +import scala.collection.JavaConverters._ + +import org.apache.commons.io.FileUtils +import org.apache.commons.lang.RandomStringUtils +import org.scalatest.BeforeAndAfterAll +import org.apache.spark.util.SparkUtil + +import org.apache.carbondata.core.datastore.filesystem.CarbonFile +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.datatype.DataTypes +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataFileFooterConverter} +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema} +import org.apache.spark.sql.Row +import org.apache.spark.sql.common.util.QueryTest + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datamap.DataMapStoreManager +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter + +class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with BeforeAndAfterAll { + + + + override def beforeAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") + } + + override def afterAll(): Unit = { +CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MINMAX_ALLOWED_BYTE_COUNT, +CarbonCommonConstants.CARBON_MINMAX_ALLOWED_BYTE_COUNT_DEFAULT) +sql("DROP TABLE IF EXISTS sdkOutputTable") + } + + var writerPath = new File(this.getClass.getResource("/").getPath ++ +"../." + + "./src/test/resources/SparkCarbonFileFormat/WriterOutput/") +.getCanonicalPath + //getCanonicalPath gives path with \, but the code expects /. + writerPath = writerPath.replace("\\", "/"); + + def buildTestData(): Any = { + +FileUtils.deleteDirectory(new File(writerPath)) + +val schema = new StringBuilder() + .append("[ \n") + .append(" {\"name\":\"string\"},\n") + .append(" {\"age\":\"int\"},\n") + .append(" {\"height\":\"double\"}\n") + .append("]") + .toString() + +try { + val builder = CarbonWriter.builder() + val writer = + builder.outputPath(writerPath).withCsvInput(Schema.parseJson(schema)).writtenBy("CreateTableUsingSparkCarbonFileFormatTestCase").build() + var i = 0 + while (i < 100) { +writer.write(Array[String]("robot" + i, String.valueOf(i), String.valueOf(i.toDouble / 2))) +i += 1 + } + writer.close() +} catch { + case _: Throwable => None +} + } + + def cleanTestData() = { +FileUtils.deleteDirectory(new File(writerPath)) + } + + def deleteIndexFile(path: String, extension: String) : Unit = { +val file: CarbonFile = FileFactory + .getCarbonFile(path, FileFactory.getFileType(path)) + +for (eachDir <- file.listFiles) { + if (!eachDir.isDirectory) { +if (eachDir.getName.endsWith(extension)) { + CarbonUtil.deleteFoldersAndFilesSilent(eachDir) +} + } else { +deleteIndexFile(eachDir.getPath, extension) + } +} + } + + //TO DO, need to remove segment dependency and tableIdentifier Dependency + test("read carbondata files (sdk Writer Output)