Repository: carbondata Updated Branches: refs/heads/master d27f20523 -> 3e0693b9e
[CARBONDATA-3000] Provide C++ interface for writing carbon data in CSDK 1.suport string, short, int, long, double, float, array<string>, boolean data type 2.provide builder, build, write, close interface 3.support write data to S3 4. add log for SDK and CSDK This closes #2837 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3e0693b9 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3e0693b9 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3e0693b9 Branch: refs/heads/master Commit: 3e0693b9e9cecf5660ec8669f63f178fde15deab Parents: d27f205 Author: xubo245 <xub...@huawei.com> Authored: Fri Oct 19 15:40:04 2018 +0800 Committer: kunal642 <kunalkapoor...@gmail.com> Committed: Tue Oct 30 21:40:19 2018 +0530 ---------------------------------------------------------------------- docs/csdk-guide.md | 89 +++++++++- docs/ddl-of-carbondata.md | 2 +- docs/quick-start-guide.md | 2 +- .../carbondata/examples/sdk/SDKS3Example.java | 11 +- store/CSDK/CMakeLists.txt | 2 +- store/CSDK/src/CarbonReader.cpp | 18 ++ store/CSDK/src/CarbonReader.h | 15 ++ store/CSDK/src/CarbonWriter.cpp | 167 +++++++++++++++++++ store/CSDK/src/CarbonWriter.h | 123 ++++++++++++++ store/CSDK/test/main.cpp | 156 +++++++++++++++-- .../sdk/file/CarbonWriterBuilder.java | 28 ++++ store/sdk/src/main/resources/log4j.properties | 11 ++ .../sdk/file/CSVCarbonWriterTest.java | 31 ++++ 13 files changed, 637 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/docs/csdk-guide.md ---------------------------------------------------------------------- diff --git a/docs/csdk-guide.md b/docs/csdk-guide.md index 5598cb6..71afa4a 100644 --- a/docs/csdk-guide.md +++ b/docs/csdk-guide.md @@ -35,7 +35,7 @@ Please find example code at [main.cpp](https://github.com/apache/carbondata/blo When users use C++ to read carbon files, users should init JVM first. Then users create carbon reader and read data.There are some example code of read data from local disk -and read data from S3 at main.cpp of CSDK module. Finally, Finally, users need to +and read data from S3 at main.cpp of CSDK module. Finally, users need to release the memory and destroy JVM. ## API List @@ -101,3 +101,90 @@ release the memory and destroy JVM. jboolean close(); ``` + +# CSDK Writer +This CSDK writer writes CarbonData file and carbonindex file at a given path. +External client can make use of this writer to write CarbonData files in C++ +code and without CarbonSession. CSDK already supports S3 and local disk. + +In the carbon jars package, there exist a carbondata-sdk.jar, +including SDK writer for CSDK. + +## Quick example +Please find example code at [main.cpp](https://github.com/apache/carbondata/blob/master/store/CSDK/test/main.cpp) of CSDK module + +When users use C++ to write carbon files, users should init JVM first. Then users create +carbon writer and write data.There are some example code of write data to local disk +and write data to S3 at main.cpp of CSDK module. Finally, users need to +release the memory and destroy JVM. + +## API List + +``` + /** + * create a CarbonWriterBuilder object for building carbonWriter, + * CarbonWriterBuilder object can configure different parameter + * + * @param env JNIEnv + * @return CarbonWriterBuilder object + */ + void builder(JNIEnv *env); +``` +``` + /** + * Sets the output path of the writer builder + * + * @param path is the absolute path where output files are written + * This method must be called when building CarbonWriterBuilder + * @return updated CarbonWriterBuilder + */ + void outputPath(char *path); +``` +``` + /** + * configure the schema with json style schema + * + * @param jsonSchema json style schema + * @return updated CarbonWriterBuilder + */ + void withCsvInput(char *jsonSchema); +``` +``` + /** + * configure parameter, including ak,sk and endpoint + * + * @param key key word + * @param value value + * @return CarbonWriterBuilder object + */ + void withHadoopConf(char *key, char *value); +``` +``` + /** + * @param appName appName which is writing the carbondata files + */ + void writtenBy(char *appName); +``` +``` + /** + * build carbonWriter object for writing data + * it support write data from load disk + * + * @return carbonWriter object + */ + void build(); +``` +``` + /** + * Write an object to the file, the format of the object depends on the + * implementation. + * Note: This API is not thread safe + */ + void write(jobject obj); +``` +``` + /** + * close the carbon Writer + */ + void close(); +``` \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/docs/ddl-of-carbondata.md ---------------------------------------------------------------------- diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md index 96335c6..1173f38 100644 --- a/docs/ddl-of-carbondata.md +++ b/docs/ddl-of-carbondata.md @@ -565,7 +565,7 @@ CarbonData DDL statements are documented here,which includes: ``` Here writer path will have carbondata and index files. - This can be SDK output. Refer [SDK Guide](./sdk-guide.md). + This can be SDK output or CSDK output. Refer [SDK Guide](./sdk-guide.md) and [CSDK Guide](./csdk-guide.md). **Note:** 1. Dropping of the external table should not delete the files present in the location. http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/docs/quick-start-guide.md ---------------------------------------------------------------------- diff --git a/docs/quick-start-guide.md b/docs/quick-start-guide.md index fd535ae..977235e 100644 --- a/docs/quick-start-guide.md +++ b/docs/quick-start-guide.md @@ -294,7 +294,7 @@ hdfs://<host_name>:port/user/hive/warehouse/carbon.store ## Installing and Configuring CarbonData on Presto **NOTE:** **CarbonData tables cannot be created nor loaded from Presto. User need to create CarbonData Table and load data into it -either with [Spark](#installing-and-configuring-carbondata-to-run-locally-with-spark-shell) or [SDK](./sdk-guide.md). +either with [Spark](#installing-and-configuring-carbondata-to-run-locally-with-spark-shell) or [SDK](./sdk-guide.md) or [CSDK](./csdk-guide.md). Once the table is created,it can be queried from Presto.** http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/SDKS3Example.java ---------------------------------------------------------------------- diff --git a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/SDKS3Example.java b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/SDKS3Example.java index 245d3e8..eb98798 100644 --- a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/SDKS3Example.java +++ b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/SDKS3Example.java @@ -62,7 +62,7 @@ public class SDKS3Example { num = Integer.parseInt(args[4]); } - Configuration conf = new Configuration(false); + Configuration conf = new Configuration(true); conf.set(Constants.ACCESS_KEY, args[0]); conf.set(Constants.SECRET_KEY, args[1]); conf.set(Constants.ENDPOINT, args[2]); @@ -70,8 +70,13 @@ public class SDKS3Example { Field[] fields = new Field[2]; fields[0] = new Field("name", DataTypes.STRING); fields[1] = new Field("age", DataTypes.INT); - CarbonWriterBuilder builder = CarbonWriter.builder().outputPath(path).withHadoopConf(conf); - CarbonWriter writer = builder.withCsvInput(new Schema(fields)).build(); + CarbonWriter writer = CarbonWriter + .builder() + .outputPath(path) + .withHadoopConf(conf) + .withCsvInput(new Schema(fields)) + .writtenBy("SDKS3Example") + .build(); for (int i = 0; i < num; i++) { writer.write(new String[]{"robot" + (i % 10), String.valueOf(i)}); http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/store/CSDK/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/store/CSDK/CMakeLists.txt b/store/CSDK/CMakeLists.txt index bfda148..ab1429d 100644 --- a/store/CSDK/CMakeLists.txt +++ b/store/CSDK/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(JNI REQUIRED) include_directories(${JNI_INCLUDE_DIRS}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") -set(SOURCE_FILES src/CarbonReader.cpp src/CarbonReader.h test/main.cpp src/CarbonRow.h src/CarbonRow.cpp) +set(SOURCE_FILES src/CarbonReader.cpp src/CarbonReader.h test/main.cpp src/CarbonRow.h src/CarbonRow.cpp src/CarbonWriter.h src/CarbonWriter.cpp) add_executable(CJDK ${SOURCE_FILES}) get_filename_component(JAVA_JVM_LIBRARY_DIR ${JAVA_JVM_LIBRARY} DIRECTORY) http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/store/CSDK/src/CarbonReader.cpp ---------------------------------------------------------------------- diff --git a/store/CSDK/src/CarbonReader.cpp b/store/CSDK/src/CarbonReader.cpp index 456e00e..8b908ab 100644 --- a/store/CSDK/src/CarbonReader.cpp +++ b/store/CSDK/src/CarbonReader.cpp @@ -71,6 +71,12 @@ void CarbonReader::builder(JNIEnv *env, char *path) { carbonReaderBuilderObject = env->CallStaticObjectMethodA(carbonReaderClass, carbonReaderBuilderID, args); } +bool CarbonReader::checkBuilder() { + if (carbonReaderBuilderObject == NULL) { + throw std::runtime_error("carbonReaderBuilder Object can't be NULL. Please call builder method first."); + } +} + void CarbonReader::projection(int argc, char *argv[]) { if (argc < 0) { throw std::runtime_error("argc parameter can't be negative."); @@ -78,6 +84,7 @@ void CarbonReader::projection(int argc, char *argv[]) { if (argv == NULL) { throw std::runtime_error("argv parameter can't be NULL."); } + checkBuilder(); jclass carbonReaderBuilderClass = jniEnv->GetObjectClass(carbonReaderBuilderObject); jmethodID buildID = jniEnv->GetMethodID(carbonReaderBuilderClass, "projection", "([Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonReaderBuilder;"); @@ -106,6 +113,7 @@ void CarbonReader::withHadoopConf(char *key, char *value) { if (value == NULL) { throw std::runtime_error("value parameter can't be NULL."); } + checkBuilder(); jclass carbonReaderBuilderClass = jniEnv->GetObjectClass(carbonReaderBuilderObject); jmethodID id = jniEnv->GetMethodID(carbonReaderBuilderClass, "withHadoopConf", "(Ljava/lang/String;Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonReaderBuilder;"); @@ -119,6 +127,7 @@ void CarbonReader::withHadoopConf(char *key, char *value) { } jobject CarbonReader::build() { + checkBuilder(); jclass carbonReaderBuilderClass = jniEnv->GetObjectClass(carbonReaderBuilderObject); jmethodID buildID = jniEnv->GetMethodID(carbonReaderBuilderClass, "build", "()Lorg/apache/carbondata/sdk/file/CarbonReader;"); @@ -132,7 +141,14 @@ jobject CarbonReader::build() { return carbonReaderObject; } +bool CarbonReader::checkReader() { + if (carbonReaderObject == NULL) { + throw std::runtime_error("carbonReader Object is NULL, Please call build first."); + } +} + jboolean CarbonReader::hasNext() { + checkReader(); if (hasNextID == NULL) { jclass carbonReader = jniEnv->GetObjectClass(carbonReaderObject); hasNextID = jniEnv->GetMethodID(carbonReader, "hasNext", "()Z"); @@ -148,6 +164,7 @@ jboolean CarbonReader::hasNext() { } jobject CarbonReader::readNextRow() { + checkReader(); if (readNextRowID == NULL) { jclass carbonReader = jniEnv->GetObjectClass(carbonReaderObject); readNextRowID = jniEnv->GetMethodID(carbonReader, "readNextRow", @@ -164,6 +181,7 @@ jobject CarbonReader::readNextRow() { } void CarbonReader::close() { + checkReader(); jclass carbonReader = jniEnv->GetObjectClass(carbonReaderObject); jmethodID closeID = jniEnv->GetMethodID(carbonReader, "close", "()V"); if (closeID == NULL) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/store/CSDK/src/CarbonReader.h ---------------------------------------------------------------------- diff --git a/store/CSDK/src/CarbonReader.h b/store/CSDK/src/CarbonReader.h index b4841bf..254062b 100644 --- a/store/CSDK/src/CarbonReader.h +++ b/store/CSDK/src/CarbonReader.h @@ -40,6 +40,21 @@ private: */ jobject carbonReaderObject; + /** + * Return true if carbonReaderBuilder Object isn't NULL + * Throw exception if carbonReaderBuilder Object is NULL + * + * @return true or throw exception + */ + bool checkBuilder(); + + /** + * Return true if carbonReader Object isn't NULL + * Throw exception if carbonReader Object is NULL + * + * @return true or throw exception + */ + bool checkReader(); public: /** http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/store/CSDK/src/CarbonWriter.cpp ---------------------------------------------------------------------- diff --git a/store/CSDK/src/CarbonWriter.cpp b/store/CSDK/src/CarbonWriter.cpp new file mode 100644 index 0000000..6619e33 --- /dev/null +++ b/store/CSDK/src/CarbonWriter.cpp @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdexcept> +#include "CarbonWriter.h" + +void CarbonWriter::builder(JNIEnv *env) { + if (env == NULL) { + throw std::runtime_error("JNIEnv parameter can't be NULL."); + } + jniEnv = env; + carbonWriter = env->FindClass("org/apache/carbondata/sdk/file/CarbonWriter"); + if (carbonWriter == NULL) { + throw std::runtime_error("Can't find the class in java: org/apache/carbondata/sdk/file/CarbonWriter"); + } + jmethodID carbonWriterBuilderID = env->GetStaticMethodID(carbonWriter, "builder", + "()Lorg/apache/carbondata/sdk/file/CarbonWriterBuilder;"); + if (carbonWriterBuilderID == NULL) { + throw std::runtime_error("Can't find the method in java: carbonWriterBuilder"); + } + carbonWriterBuilderObject = env->CallStaticObjectMethod(carbonWriter, carbonWriterBuilderID); +} + +bool CarbonWriter::checkBuilder() { + if (carbonWriterBuilderObject == NULL) { + throw std::runtime_error("carbonWriterBuilder Object can't be NULL. Please call builder method first."); + } +} + +void CarbonWriter::outputPath(char *path) { + if (path == NULL) { + throw std::runtime_error("path parameter can't be NULL."); + } + checkBuilder(); + jclass carbonWriterBuilderClass = jniEnv->GetObjectClass(carbonWriterBuilderObject); + jmethodID methodID = jniEnv->GetMethodID(carbonWriterBuilderClass, "outputPath", + "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonWriterBuilder;"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: outputPath"); + } + jstring jPath = jniEnv->NewStringUTF(path); + jvalue args[1]; + args[0].l = jPath; + carbonWriterBuilderObject = jniEnv->CallObjectMethodA(carbonWriterBuilderObject, methodID, args); +} + +void CarbonWriter::withCsvInput(char *jsonSchema) { + if (jsonSchema == NULL) { + throw std::runtime_error("jsonSchema parameter can't be NULL."); + } + checkBuilder(); + jclass carbonWriterBuilderClass = jniEnv->GetObjectClass(carbonWriterBuilderObject); + jmethodID methodID = jniEnv->GetMethodID(carbonWriterBuilderClass, "withCsvInput", + "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonWriterBuilder;"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: withCsvInput"); + } + jstring jPath = jniEnv->NewStringUTF(jsonSchema); + jvalue args[1]; + args[0].l = jPath; + carbonWriterBuilderObject = jniEnv->CallObjectMethodA(carbonWriterBuilderObject, methodID, args); + if (jniEnv->ExceptionCheck()) { + throw jniEnv->ExceptionOccurred(); + } +}; + +void CarbonWriter::withHadoopConf(char *key, char *value) { + if (key == NULL) { + throw std::runtime_error("key parameter can't be NULL."); + } + if (value == NULL) { + throw std::runtime_error("value parameter can't be NULL."); + } + checkBuilder(); + jclass carbonWriterBuilderClass = jniEnv->GetObjectClass(carbonWriterBuilderObject); + jmethodID methodID = jniEnv->GetMethodID(carbonWriterBuilderClass, "withHadoopConf", + "(Ljava/lang/String;Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonWriterBuilder;"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: withHadoopConf"); + } + jvalue args[2]; + args[0].l = jniEnv->NewStringUTF(key); + args[1].l = jniEnv->NewStringUTF(value); + carbonWriterBuilderObject = jniEnv->CallObjectMethodA(carbonWriterBuilderObject, methodID, args); +} + +void CarbonWriter::writtenBy(char *appName) { + checkBuilder(); + jclass carbonWriterBuilderClass = jniEnv->GetObjectClass(carbonWriterBuilderObject); + jmethodID methodID = jniEnv->GetMethodID(carbonWriterBuilderClass, "writtenBy", + "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonWriterBuilder;"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: writtenBy"); + } + jvalue args[1]; + args[0].l = jniEnv->NewStringUTF(appName); + carbonWriterBuilderObject = jniEnv->CallObjectMethodA(carbonWriterBuilderObject, methodID, args); +} + +void CarbonWriter::build() { + checkBuilder(); + + // If not add this, it will throw java.io.IOException: No FileSystem for scheme: file + withHadoopConf("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); + + jclass carbonWriterBuilderClass = jniEnv->GetObjectClass(carbonWriterBuilderObject); + jmethodID methodID = jniEnv->GetMethodID(carbonWriterBuilderClass, "build", + "()Lorg/apache/carbondata/sdk/file/CarbonWriter;"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: build"); + } + carbonWriterObject = jniEnv->CallObjectMethod(carbonWriterBuilderObject, methodID); + + if (jniEnv->ExceptionCheck()) { + throw jniEnv->ExceptionOccurred(); + } +} + +bool CarbonWriter::checkWriter() { + if (carbonWriterObject == NULL) { + throw std::runtime_error("carbonWriter Object is NULL, Please call build first."); + } +} + +void CarbonWriter::write(jobject obj) { + checkWriter(); + if (writeID == NULL) { + carbonWriter = jniEnv->GetObjectClass(carbonWriterObject); + writeID = jniEnv->GetMethodID(carbonWriter, "write", "(Ljava/lang/Object;)V"); + if (writeID == NULL) { + throw std::runtime_error("Can't find the method in java: write"); + } + } + jvalue args[1]; + args[0].l = obj; + jniEnv->CallBooleanMethodA(carbonWriterObject, writeID, args); + if (jniEnv->ExceptionCheck()) { + throw jniEnv->ExceptionOccurred(); + } +}; + +void CarbonWriter::close() { + checkWriter(); + jclass carbonWriter = jniEnv->GetObjectClass(carbonWriterObject); + jmethodID methodID = jniEnv->GetMethodID(carbonWriter, "close", "()V"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: close"); + } + jniEnv->CallBooleanMethod(carbonWriterObject, methodID); + if (jniEnv->ExceptionCheck()) { + throw jniEnv->ExceptionOccurred(); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/store/CSDK/src/CarbonWriter.h ---------------------------------------------------------------------- diff --git a/store/CSDK/src/CarbonWriter.h b/store/CSDK/src/CarbonWriter.h new file mode 100644 index 0000000..2c8c715 --- /dev/null +++ b/store/CSDK/src/CarbonWriter.h @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <jni.h> + +class CarbonWriter { +private: + /** + * jni env + */ + JNIEnv *jniEnv; + + /** + * carbonWriterBuilder object for building carbonWriter + * it can configure some operation + */ + jobject carbonWriterBuilderObject = NULL; + + /** + * carbonWriter object for writing data + */ + jobject carbonWriterObject; + + /** + * carbon writer class + */ + jclass carbonWriter; + + /** + * write method id + */ + jmethodID writeID = NULL; + + /** + * check whether has called builder + * + * @return true or throw exception + */ + bool checkBuilder(); + + /** + * check writer whether has called build + * + * @return true or throw exception + */ + bool checkWriter(); +public: + /** + * create a CarbonWriterBuilder object for building carbonWriter, + * CarbonWriterBuilder object can configure different parameter + * + * @param env JNIEnv + * @return CarbonWriterBuilder object + */ + void builder(JNIEnv *env); + + /** + * Sets the output path of the writer builder + * + * @param path is the absolute path where output files are written + * This method must be called when building CarbonWriterBuilder + * @return updated CarbonWriterBuilder + */ + void outputPath(char *path); + + /** + * configure the schema with json style schema + * + * @param jsonSchema json style schema + * @return updated CarbonWriterBuilder + */ + void withCsvInput(char *jsonSchema); + + /** + * configure parameter, including ak,sk and endpoint + * + * @param key key word + * @param value value + * @return CarbonWriterBuilder object + */ + void withHadoopConf(char *key, char *value); + + /** + * @param appName appName which is writing the carbondata files + */ + void writtenBy(char *appName); + + /** + * build carbonWriter object for writing data + * it support write data from load disk + * + * @return carbonWriter object + */ + void build(); + + /** + * Write an object to the file, the format of the object depends on the + * implementation. + * Note: This API is not thread safe + */ + void write(jobject obj); + + /** + * close the carbon Writer + */ + void close(); +}; + + http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/store/CSDK/test/main.cpp ---------------------------------------------------------------------- diff --git a/store/CSDK/test/main.cpp b/store/CSDK/test/main.cpp index 843155f..b102622 100644 --- a/store/CSDK/test/main.cpp +++ b/store/CSDK/test/main.cpp @@ -20,8 +20,10 @@ #include <stdlib.h> #include <iostream> #include <unistd.h> +#include <sys/time.h> #include "../src/CarbonReader.h" #include "../src/CarbonRow.h" +#include "../src/CarbonWriter.h" using namespace std; @@ -210,6 +212,142 @@ bool tryCatchException(JNIEnv *env) { } printf("\nfinished handle exception\n"); } + +/** + * test write data to local disk + * + * @param env jni env + * @param path file path + * @param argc argument counter + * @param argv argument vector + * @return true or throw exception + */ +bool testWriteData(JNIEnv *env, char *path, int argc, char *argv[]) { + + char *jsonSchema = "[{stringField:string},{shortField:short},{intField:int},{longField:long},{doubleField:double},{boolField:boolean},{dateField:date},{timeField:timestamp},{floatField:float},{arrayField:array}]"; + try { + CarbonWriter writer; + writer.builder(env); + writer.outputPath(path); + writer.withCsvInput(jsonSchema); + writer.writtenBy("CSDK"); + if (argc > 3) { + writer.withHadoopConf("fs.s3a.access.key", argv[1]); + writer.withHadoopConf("fs.s3a.secret.key", argv[2]); + writer.withHadoopConf("fs.s3a.endpoint", argv[3]); + } + writer.build(); + + int rowNum = 10; + int size = 10; + long longValue = 0; + double doubleValue = 0; + float floatValue = 0; + jclass objClass = env->FindClass("java/lang/String"); + for (int i = 0; i < rowNum; ++i) { + jobjectArray arr = env->NewObjectArray(size, objClass, 0); + char ctrInt[10]; + gcvt(i, 10, ctrInt); + + char a[15] = "robot"; + strcat(a, ctrInt); + jobject stringField = env->NewStringUTF(a); + env->SetObjectArrayElement(arr, 0, stringField); + + char ctrShort[10]; + gcvt(i % 10000, 10, ctrShort); + jobject shortField = env->NewStringUTF(ctrShort); + env->SetObjectArrayElement(arr, 1, shortField); + + jobject intField = env->NewStringUTF(ctrInt); + env->SetObjectArrayElement(arr, 2, intField); + + + char ctrLong[10]; + gcvt(longValue, 10, ctrLong); + longValue = longValue + 2; + jobject longField = env->NewStringUTF(ctrLong); + env->SetObjectArrayElement(arr, 3, longField); + + char ctrDouble[10]; + gcvt(doubleValue, 10, ctrDouble); + doubleValue = doubleValue + 2; + jobject doubleField = env->NewStringUTF(ctrDouble); + env->SetObjectArrayElement(arr, 4, doubleField); + + jobject boolField = env->NewStringUTF("true"); + env->SetObjectArrayElement(arr, 5, boolField); + + jobject dateField = env->NewStringUTF(" 2019-03-02"); + env->SetObjectArrayElement(arr, 6, dateField); + + jobject timeField = env->NewStringUTF("2019-02-12 03:03:34"); + env->SetObjectArrayElement(arr, 7, timeField); + + char ctrFloat[10]; + gcvt(floatValue, 10, ctrFloat); + floatValue = floatValue + 2; + jobject floatField = env->NewStringUTF(ctrFloat); + env->SetObjectArrayElement(arr, 8, floatField); + + jobject arrayField = env->NewStringUTF("Hello#World#From#Carbon"); + env->SetObjectArrayElement(arr, 9, arrayField); + + + writer.write(arr); + + env->DeleteLocalRef(stringField); + env->DeleteLocalRef(shortField); + env->DeleteLocalRef(intField); + env->DeleteLocalRef(longField); + env->DeleteLocalRef(doubleField); + env->DeleteLocalRef(floatField); + env->DeleteLocalRef(dateField); + env->DeleteLocalRef(timeField); + env->DeleteLocalRef(boolField); + env->DeleteLocalRef(arrayField); + env->DeleteLocalRef(arr); + } + writer.close(); + + CarbonReader carbonReader; + carbonReader.builder(env, path); + carbonReader.build(); + int i = 0; + CarbonRow carbonRow(env); + while (carbonReader.hasNext()) { + jobject row = carbonReader.readNextRow(); + i++; + carbonRow.setCarbonRow(row); + printf("%s\t%d\t%ld\t", carbonRow.getString(0), carbonRow.getInt(1), carbonRow.getLong(2)); + jobjectArray array1 = carbonRow.getArray(3); + jsize length = env->GetArrayLength(array1); + int j = 0; + for (j = 0; j < length; j++) { + jobject element = env->GetObjectArrayElement(array1, j); + char *str = (char *) env->GetStringUTFChars((jstring) element, JNI_FALSE); + printf("%s\t", str); + } + printf("%d\t", carbonRow.getShort(4)); + printf("%d\t", carbonRow.getInt(5)); + printf("%ld\t", carbonRow.getLong(6)); + printf("%lf\t", carbonRow.getDouble(7)); + bool bool1 = carbonRow.getBoolean(8); + if (bool1) { + printf("true\t"); + } else { + printf("false\t"); + } + printf("%f\t\n", carbonRow.getFloat(9)); + env->DeleteLocalRef(row); + } + carbonReader.close(); + } catch (jthrowable ex) { + env->ExceptionDescribe(); + env->ExceptionClear(); + } +} + /** * read data from S3 * parameter is ak sk endpoint @@ -218,19 +356,11 @@ bool tryCatchException(JNIEnv *env) { * @param argv argument vector * @return */ -bool readFromS3(JNIEnv *env, char *argv[]) { +bool readFromS3(JNIEnv *env, char *path, char *argv[]) { printf("\nRead data from S3:\n"); CarbonReader reader; - char *args[3]; - // "your access key" - args[0] = argv[1]; - // "your secret key" - args[1] = argv[2]; - // "your endPoint" - args[2] = argv[3]; - - reader.builder(env, "s3a://sdk/WriterOutput/carbondata/", "test"); + reader.builder(env, path, "test"); reader.withHadoopConf("fs.s3a.access.key", argv[1]); reader.withHadoopConf("fs.s3a.secret.key", argv[2]); reader.withHadoopConf("fs.s3a.endpoint", argv[3]); @@ -250,12 +380,16 @@ int main(int argc, char *argv[]) { // init jvm JNIEnv *env; env = initJVM(); + char *S3WritePath = "s3a://sdk/WriterOutput/carbondata2"; + char *S3ReadPath = "s3a://sdk/WriterOutput/carbondata"; if (argc > 3) { - readFromS3(env, argv); + testWriteData(env, S3WritePath, 4, argv); + readFromS3(env, S3ReadPath, argv); } else { tryCatchException(env); readFromLocalWithoutProjection(env); + testWriteData(env, "./data", 1, argv); readFromLocal(env); } (jvm)->DestroyJavaVM(); http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java index a47cc68..877777f 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java @@ -269,6 +269,21 @@ public class CarbonWriterBuilder { } /** + * configure hadoop configuration with key value + * + * @param key key word + * @param value value + * @return this object + */ + public CarbonWriterBuilder withHadoopConf(String key, String value) { + if (this.hadoopConf == null) { + this.hadoopConf = new Configuration(true); + } + this.hadoopConf.set(key, value); + return this; + } + + /** * To set the carbondata file size in MB between 1MB-2048MB * @param blockSize is size in MB between 1MB to 2048 MB * default value is 1024 MB @@ -342,6 +357,19 @@ public class CarbonWriterBuilder { } /** + * to build a {@link CarbonWriter}, which accepts row in CSV format + * + * @param jsonSchema json Schema string + * @return CarbonWriterBuilder + */ + public CarbonWriterBuilder withCsvInput(String jsonSchema) { + Objects.requireNonNull(jsonSchema, "schema should not be null"); + this.schema = Schema.parseJson(jsonSchema); + this.writerType = WRITER_TYPE.CSV; + return this; + } + + /** * to build a {@link CarbonWriter}, which accepts Avro object * * @param avroSchema avro Schema object {org.apache.avro.Schema} http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/store/sdk/src/main/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/store/sdk/src/main/resources/log4j.properties b/store/sdk/src/main/resources/log4j.properties new file mode 100644 index 0000000..e369916 --- /dev/null +++ b/store/sdk/src/main/resources/log4j.properties @@ -0,0 +1,11 @@ +# Root logger option +log4j.rootLogger=INFO,stdout + + +# Redirect log messages to console +log4j.appender.debug=org.apache.log4j.RollingFileAppender +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n + http://git-wip-us.apache.org/repos/asf/carbondata/blob/3e0693b9/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java index 483ec88..90ea909 100644 --- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java @@ -105,6 +105,37 @@ public class CSVCarbonWriterTest { } @Test + public void testWriteFilesBuildWithJsonSchema() throws IOException, InvalidLoadOptionException, InterruptedException { + String path = "./testWriteFilesJsonSchema"; + FileUtils.deleteDirectory(new File(path)); + + String schema = "[{name:string},{age:int},{height:double}]"; + CarbonWriterBuilder builder = CarbonWriter + .builder() + .outputPath(path) + .withCsvInput(schema) + .writtenBy("testWriteFilesBuildWithJsonSchema"); + + CarbonWriter writer = builder.build(); + for (int i = 0; i < 10; i++) { + writer.write(new String[]{ + "robot" + (i % 10), String.valueOf(i % 3000000), String.valueOf((double) i / 2)}); + } + writer.close(); + + CarbonReader carbonReader = CarbonReader.builder(path).build(); + int i = 0; + while (carbonReader.hasNext()) { + Object[] row = (Object[]) carbonReader.readNextRow(); + Assert.assertEquals(row[0], "robot" + i % 10); + System.out.println(); + i++; + } + carbonReader.close(); + FileUtils.deleteDirectory(new File(path)); + } + + @Test public void testAllPrimitiveDataType() throws IOException { // TODO: write all data type and read by CarbonRecordReader to verify the content String path = "./testWriteFiles";