[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

2018-12-19 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/carbondata/pull/2951


---


[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

2018-11-30 Thread kunal642
Github user kunal642 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2951#discussion_r237781038
  
--- Diff: 
integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala
 ---
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.cluster.sdv.generated.datasource
+
+import java.io.File
+import java.text.SimpleDateFormat
+import java.util.{Date, Random}
+import scala.collection.JavaConverters._
+import org.apache.commons.io.FileUtils
+import org.apache.commons.lang.RandomStringUtils
+import org.scalatest.BeforeAndAfterAll
+import org.apache.spark.util.SparkUtil
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.metadata.datatype.DataTypes
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, 
DataFileFooterConverter}
+import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datamap.DataMapStoreManager
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter
+
+class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with 
BeforeAndAfterAll {
+
+  override def beforeAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+  }
+
+  override def afterAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+  }
+
+  var writerPath = new File(this.getClass.getResource("/").getPath
++
+"../." +
+
"./src/test/resources/SparkCarbonFileFormat/WriterOutput/")
+.getCanonicalPath
+  //getCanonicalPath gives path with \, but the code expects /.
+  writerPath = writerPath.replace("\\", "/");
+
+  def buildTestData(): Any = {
+
+FileUtils.deleteDirectory(new File(writerPath))
+
+val schema = new StringBuilder()
+  .append("[ \n")
+  .append("   {\"name\":\"string\"},\n")
+  .append("   {\"age\":\"int\"},\n")
+  .append("   {\"height\":\"double\"}\n")
+  .append("]")
+  .toString()
+
+try {
+  val builder = CarbonWriter.builder()
+  val writer =
+
builder.outputPath(writerPath).withCsvInput(Schema.parseJson(schema)).writtenBy("CreateTableUsingSparkCarbonFileFormatTestCase").build()
+  var i = 0
+  while (i < 100) {
+writer.write(Array[String]("robot" + i, String.valueOf(i), 
String.valueOf(i.toDouble / 2)))
+i += 1
+  }
+  writer.close()
+} catch {
+  case _: Throwable => None
+}
+  }
+
+  def cleanTestData() = {
+FileUtils.deleteDirectory(new File(writerPath))
+  }
+
+  def deleteIndexFile(path: String, extension: String) : Unit = {
+val file: CarbonFile = FileFactory
+  .getCarbonFile(path, FileFactory.getFileType(path))
+
+for (eachDir <- file.listFiles) {
+  if (!eachDir.isDirectory) {
+if (eachDir.getName.endsWith(extension)) {
+  CarbonUtil.deleteFoldersAndFilesSilent(eachDir)
+}
+  } else {
+deleteIndexFile(eachDir.getPath, extension)
+  }
+}
+  }
+
+  test("Running SQL directly and read carbondata files (sdk Writer Output) 
using the SparkCarbonFileFormat ") {
+buildTestData()
+assert(new File(writerPath).exists())
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+
+//data source file format
+if (SparkUtil.isSparkVersionEqualTo("2.1")) {
+  //data 

[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

2018-11-30 Thread kunal642
Github user kunal642 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2951#discussion_r237779541
  
--- Diff: 
integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala
 ---
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.cluster.sdv.generated.datasource
+
+import java.io.File
+import java.text.SimpleDateFormat
+import java.util.{Date, Random}
+import scala.collection.JavaConverters._
+import org.apache.commons.io.FileUtils
+import org.apache.commons.lang.RandomStringUtils
+import org.scalatest.BeforeAndAfterAll
+import org.apache.spark.util.SparkUtil
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.metadata.datatype.DataTypes
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, 
DataFileFooterConverter}
+import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datamap.DataMapStoreManager
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter
+
+class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with 
BeforeAndAfterAll {
+
+  override def beforeAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+  }
+
+  override def afterAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+  }
+
+  var writerPath = new File(this.getClass.getResource("/").getPath
++
+"../." +
+
"./src/test/resources/SparkCarbonFileFormat/WriterOutput/")
+.getCanonicalPath
+  //getCanonicalPath gives path with \, but the code expects /.
+  writerPath = writerPath.replace("\\", "/");
+
+  def buildTestData(): Any = {
+
+FileUtils.deleteDirectory(new File(writerPath))
+
+val schema = new StringBuilder()
+  .append("[ \n")
+  .append("   {\"name\":\"string\"},\n")
+  .append("   {\"age\":\"int\"},\n")
+  .append("   {\"height\":\"double\"}\n")
+  .append("]")
+  .toString()
+
+try {
+  val builder = CarbonWriter.builder()
+  val writer =
+
builder.outputPath(writerPath).withCsvInput(Schema.parseJson(schema)).writtenBy("CreateTableUsingSparkCarbonFileFormatTestCase").build()
+  var i = 0
+  while (i < 100) {
+writer.write(Array[String]("robot" + i, String.valueOf(i), 
String.valueOf(i.toDouble / 2)))
+i += 1
+  }
+  writer.close()
+} catch {
+  case _: Throwable => None
+}
+  }
+
+  def cleanTestData() = {
+FileUtils.deleteDirectory(new File(writerPath))
+  }
+
+  def deleteIndexFile(path: String, extension: String) : Unit = {
+val file: CarbonFile = FileFactory
+  .getCarbonFile(path, FileFactory.getFileType(path))
+
+for (eachDir <- file.listFiles) {
+  if (!eachDir.isDirectory) {
+if (eachDir.getName.endsWith(extension)) {
+  CarbonUtil.deleteFoldersAndFilesSilent(eachDir)
+}
+  } else {
+deleteIndexFile(eachDir.getPath, extension)
+  }
+}
+  }
+
+  test("Running SQL directly and read carbondata files (sdk Writer Output) 
using the SparkCarbonFileFormat ") {
+buildTestData()
+assert(new File(writerPath).exists())
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+
+//data source file format
+if (SparkUtil.isSparkVersionEqualTo("2.1")) {
+  //data 

[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

2018-11-30 Thread kunal642
Github user kunal642 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2951#discussion_r237779383
  
--- Diff: 
integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala
 ---
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.cluster.sdv.generated.datasource
+
+import java.io.File
+import java.text.SimpleDateFormat
+import java.util.{Date, Random}
+import scala.collection.JavaConverters._
+import org.apache.commons.io.FileUtils
+import org.apache.commons.lang.RandomStringUtils
+import org.scalatest.BeforeAndAfterAll
+import org.apache.spark.util.SparkUtil
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.metadata.datatype.DataTypes
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, 
DataFileFooterConverter}
+import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datamap.DataMapStoreManager
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter
+
+class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with 
BeforeAndAfterAll {
+
+  override def beforeAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+  }
+
+  override def afterAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+  }
+
+  var writerPath = new File(this.getClass.getResource("/").getPath
++
+"../." +
+
"./src/test/resources/SparkCarbonFileFormat/WriterOutput/")
+.getCanonicalPath
+  //getCanonicalPath gives path with \, but the code expects /.
+  writerPath = writerPath.replace("\\", "/");
+
+  def buildTestData(): Any = {
+
+FileUtils.deleteDirectory(new File(writerPath))
+
+val schema = new StringBuilder()
+  .append("[ \n")
+  .append("   {\"name\":\"string\"},\n")
+  .append("   {\"age\":\"int\"},\n")
+  .append("   {\"height\":\"double\"}\n")
+  .append("]")
+  .toString()
+
+try {
+  val builder = CarbonWriter.builder()
+  val writer =
+
builder.outputPath(writerPath).withCsvInput(Schema.parseJson(schema)).writtenBy("CreateTableUsingSparkCarbonFileFormatTestCase").build()
+  var i = 0
+  while (i < 100) {
+writer.write(Array[String]("robot" + i, String.valueOf(i), 
String.valueOf(i.toDouble / 2)))
+i += 1
+  }
+  writer.close()
+} catch {
+  case _: Throwable => None
+}
+  }
+
+  def cleanTestData() = {
+FileUtils.deleteDirectory(new File(writerPath))
+  }
+
+  def deleteIndexFile(path: String, extension: String) : Unit = {
+val file: CarbonFile = FileFactory
+  .getCarbonFile(path, FileFactory.getFileType(path))
+
+for (eachDir <- file.listFiles) {
+  if (!eachDir.isDirectory) {
+if (eachDir.getName.endsWith(extension)) {
+  CarbonUtil.deleteFoldersAndFilesSilent(eachDir)
+}
+  } else {
+deleteIndexFile(eachDir.getPath, extension)
+  }
+}
+  }
+
+  test("Running SQL directly and read carbondata files (sdk Writer Output) 
using the SparkCarbonFileFormat ") {
+buildTestData()
+assert(new File(writerPath).exists())
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+
+//data source file format
+if (SparkUtil.isSparkVersionEqualTo("2.1")) {
+  //data 

[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

2018-11-30 Thread kunal642
Github user kunal642 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2951#discussion_r237780471
  
--- Diff: 
integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala
 ---
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.cluster.sdv.generated.datasource
--- End diff --

Please format all the newly added code


---


[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

2018-11-30 Thread kunal642
Github user kunal642 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2951#discussion_r237779458
  
--- Diff: 
integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala
 ---
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.cluster.sdv.generated.datasource
+
+import java.io.File
+import java.text.SimpleDateFormat
+import java.util.{Date, Random}
+import scala.collection.JavaConverters._
+import org.apache.commons.io.FileUtils
+import org.apache.commons.lang.RandomStringUtils
+import org.scalatest.BeforeAndAfterAll
+import org.apache.spark.util.SparkUtil
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.metadata.datatype.DataTypes
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, 
DataFileFooterConverter}
+import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datamap.DataMapStoreManager
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter
+
+class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with 
BeforeAndAfterAll {
+
+  override def beforeAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+  }
+
+  override def afterAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+  }
+
+  var writerPath = new File(this.getClass.getResource("/").getPath
++
+"../." +
+
"./src/test/resources/SparkCarbonFileFormat/WriterOutput/")
+.getCanonicalPath
+  //getCanonicalPath gives path with \, but the code expects /.
+  writerPath = writerPath.replace("\\", "/");
+
+  def buildTestData(): Any = {
+
+FileUtils.deleteDirectory(new File(writerPath))
+
+val schema = new StringBuilder()
+  .append("[ \n")
+  .append("   {\"name\":\"string\"},\n")
+  .append("   {\"age\":\"int\"},\n")
+  .append("   {\"height\":\"double\"}\n")
+  .append("]")
+  .toString()
+
+try {
+  val builder = CarbonWriter.builder()
+  val writer =
+
builder.outputPath(writerPath).withCsvInput(Schema.parseJson(schema)).writtenBy("CreateTableUsingSparkCarbonFileFormatTestCase").build()
+  var i = 0
+  while (i < 100) {
+writer.write(Array[String]("robot" + i, String.valueOf(i), 
String.valueOf(i.toDouble / 2)))
+i += 1
+  }
+  writer.close()
+} catch {
+  case _: Throwable => None
+}
+  }
+
+  def cleanTestData() = {
+FileUtils.deleteDirectory(new File(writerPath))
+  }
+
+  def deleteIndexFile(path: String, extension: String) : Unit = {
+val file: CarbonFile = FileFactory
+  .getCarbonFile(path, FileFactory.getFileType(path))
+
+for (eachDir <- file.listFiles) {
+  if (!eachDir.isDirectory) {
+if (eachDir.getName.endsWith(extension)) {
+  CarbonUtil.deleteFoldersAndFilesSilent(eachDir)
+}
+  } else {
+deleteIndexFile(eachDir.getPath, extension)
+  }
+}
+  }
+
+  test("Running SQL directly and read carbondata files (sdk Writer Output) 
using the SparkCarbonFileFormat ") {
+buildTestData()
+assert(new File(writerPath).exists())
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+
+//data source file format
+if (SparkUtil.isSparkVersionEqualTo("2.1")) {
+  //data 

[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

2018-11-27 Thread kunal642
Github user kunal642 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2951#discussion_r236647791
  
--- Diff: 
integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/SparkCarbonDataSourceTestCase.scala
 ---
@@ -0,0 +1,1267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.cluster.sdv.generated.dli
+
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, 
DataInputStream, File, InputStream}
+
+
+
+import scala.collection.mutable
+
+import org.apache.avro
+import org.apache.avro.file.DataFileWriter
+import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, 
GenericRecord}
+import org.apache.avro.io.{DecoderFactory, Encoder}
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.common.util.QueryTest
+import org.junit.Assert
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.datamap.DataMapStoreManager
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
+import org.apache.carbondata.core.metadata.datatype.{DataTypes, 
StructField}
+import org.apache.carbondata.hadoop.testutil.StoreCreator
+import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
+
+class SparkCarbonDataSourceTestCase extends QueryTest with 
BeforeAndAfterAll {
+
+  val rootPath = new File(this.getClass.getResource("/").getPath
++ "../../../..").getCanonicalPath
+
+  val warehouse1 = 
FileFactory.getPath(s"$rootPath/integration/spark-datasource/target/warehouse").toString
+
+  test("test write using dataframe") {
+import sqlContext.implicits._
+val df = sqlContext.sparkContext.parallelize(1 to 10)
+  .map(x => ("a" + x % 10, "b", x))
+  .toDF("c1", "c2", "number")
+sql("drop table if exists testformat")
+// Saves dataframe to carbon file
+df.write
+  .format("carbon").saveAsTable("testformat")
+assert(sql("select * from testformat").count() == 10)
+assert(sql("select * from testformat where c1='a0'").count() == 1)
+assert(sql("select * from testformat").count() == 10)
+sql("drop table if exists testformat")
+  }
+
+  test("test write using ddl") {
+import sqlContext.implicits._
+val df = sqlContext.sparkContext.parallelize(1 to 10)
+  .map(x => ("a" + x % 10, "b", x))
+  .toDF("c1", "c2", "number")
+sql("drop table if exists testparquet")
+sql("drop table if exists testformat")
+// Saves dataframe to carbon file
+df.write
+  .format("parquet").saveAsTable("testparquet")
+sql("create table carbon_table(c1 string, c2 string, number int) using 
carbon")
+sql("insert into carbon_table select * from testparquet")
+checkAnswer(sql("select * from carbon_table where c1='a1'"), 
sql("select * from testparquet where c1='a1'"))
+if (!sqlContext.sparkContext.version.startsWith("2.1")) {
+  val mapSize = DataMapStoreManager.getInstance().getAllDataMaps.size()
+  DataMapStoreManager.getInstance()
+.clearDataMaps(AbsoluteTableIdentifier.from(warehouse1 + 
"/carbon_table"))
+  assert(mapSize >= 
DataMapStoreManager.getInstance().getAllDataMaps.size())
+}
+sql("drop table if exists testparquet")
+sql("drop table if exists testformat")
+  }
+
+  test("test read with df write") {
+
FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(warehouse1 + 
"/test_folder"))
+import sqlContext.implicits._
+val df = sqlContext.sparkContext.parallelize(1 to 10)
+  .map(x => ("a" + x % 10, "b", x))
+  .toDF("c1", "c2", "number")
+
+// Saves dataframe to carbon file
+df.write.format("carbon").save(warehouse1 + "/test_folder/")
+
+val frame = 

[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

2018-11-27 Thread kunal642
Github user kunal642 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2951#discussion_r236647192
  
--- Diff: 
integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/SparkCarbonDataSourceTestCase.scala
 ---
@@ -0,0 +1,1267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.cluster.sdv.generated.dli
+
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, 
DataInputStream, File, InputStream}
+
--- End diff --

Remove extra lines



---


[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

2018-11-27 Thread kunal642
Github user kunal642 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2951#discussion_r236646305
  
--- Diff: 
integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/CreateTableUsingSparkCarbonFileFormatTestCase.scala
 ---
@@ -0,0 +1,484 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.cluster.sdv.generated.dli
+
+import java.io.File
+import java.text.SimpleDateFormat
+import java.util.{Date, Random}
+
+import scala.collection.JavaConverters._
+
+import org.apache.commons.io.FileUtils
+import org.apache.commons.lang.RandomStringUtils
+import org.scalatest.BeforeAndAfterAll
+import org.apache.spark.util.SparkUtil
+
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.metadata.datatype.DataTypes
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, 
DataFileFooterConverter}
+import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datamap.DataMapStoreManager
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter
+
+class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with 
BeforeAndAfterAll {
+
--- End diff --

Dont leave blank lines


---


[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

2018-11-27 Thread kunal642
Github user kunal642 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2951#discussion_r236646395
  
--- Diff: 
integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/CreateTableUsingSparkCarbonFileFormatTestCase.scala
 ---
@@ -0,0 +1,484 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.cluster.sdv.generated.dli
+
+import java.io.File
+import java.text.SimpleDateFormat
+import java.util.{Date, Random}
+
+import scala.collection.JavaConverters._
+
+import org.apache.commons.io.FileUtils
+import org.apache.commons.lang.RandomStringUtils
+import org.scalatest.BeforeAndAfterAll
+import org.apache.spark.util.SparkUtil
+
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.metadata.datatype.DataTypes
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, 
DataFileFooterConverter}
+import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datamap.DataMapStoreManager
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter
+
+class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with 
BeforeAndAfterAll {
+
+
+
+  override def beforeAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+  }
+
+  override def afterAll(): Unit = {
+CarbonProperties.getInstance()
--- End diff --

Remove this line


---


[GitHub] carbondata pull request #2951: [SDV] Add datasource testcases for Spark File...

2018-11-27 Thread kunal642
Github user kunal642 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2951#discussion_r236646951
  
--- Diff: 
integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/dli/CreateTableUsingSparkCarbonFileFormatTestCase.scala
 ---
@@ -0,0 +1,484 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.cluster.sdv.generated.dli
+
+import java.io.File
+import java.text.SimpleDateFormat
+import java.util.{Date, Random}
+
+import scala.collection.JavaConverters._
+
+import org.apache.commons.io.FileUtils
+import org.apache.commons.lang.RandomStringUtils
+import org.scalatest.BeforeAndAfterAll
+import org.apache.spark.util.SparkUtil
+
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.metadata.datatype.DataTypes
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, 
DataFileFooterConverter}
+import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datamap.DataMapStoreManager
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter
+
+class CreateTableUsingSparkCarbonFileFormatTestCase extends QueryTest with 
BeforeAndAfterAll {
+
+
+
+  override def beforeAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+  }
+
+  override def afterAll(): Unit = {
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.CARBON_MINMAX_ALLOWED_BYTE_COUNT,
+CarbonCommonConstants.CARBON_MINMAX_ALLOWED_BYTE_COUNT_DEFAULT)
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+  }
+
+  var writerPath = new File(this.getClass.getResource("/").getPath
++
+"../." +
+
"./src/test/resources/SparkCarbonFileFormat/WriterOutput/")
+.getCanonicalPath
+  //getCanonicalPath gives path with \, but the code expects /.
+  writerPath = writerPath.replace("\\", "/");
+
+  def buildTestData(): Any = {
+
+FileUtils.deleteDirectory(new File(writerPath))
+
+val schema = new StringBuilder()
+  .append("[ \n")
+  .append("   {\"name\":\"string\"},\n")
+  .append("   {\"age\":\"int\"},\n")
+  .append("   {\"height\":\"double\"}\n")
+  .append("]")
+  .toString()
+
+try {
+  val builder = CarbonWriter.builder()
+  val writer =
+
builder.outputPath(writerPath).withCsvInput(Schema.parseJson(schema)).writtenBy("CreateTableUsingSparkCarbonFileFormatTestCase").build()
+  var i = 0
+  while (i < 100) {
+writer.write(Array[String]("robot" + i, String.valueOf(i), 
String.valueOf(i.toDouble / 2)))
+i += 1
+  }
+  writer.close()
+} catch {
+  case _: Throwable => None
+}
+  }
+
+  def cleanTestData() = {
+FileUtils.deleteDirectory(new File(writerPath))
+  }
+
+  def deleteIndexFile(path: String, extension: String) : Unit = {
+val file: CarbonFile = FileFactory
+  .getCarbonFile(path, FileFactory.getFileType(path))
+
+for (eachDir <- file.listFiles) {
+  if (!eachDir.isDirectory) {
+if (eachDir.getName.endsWith(extension)) {
+  CarbonUtil.deleteFoldersAndFilesSilent(eachDir)
+}
+  } else {
+deleteIndexFile(eachDir.getPath, extension)
+  }
+}
+  }
+
+  //TO DO, need to remove segment dependency and tableIdentifier Dependency
+  test("read carbondata files (sdk Writer Output)