This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new e22f029c4 AVRO-3591: start with commons schemas (#1850)
e22f029c4 is described below
commit e22f029c4dd74ec962dca7cfe21a87c69fa2872e
Author: clesaec <[email protected]>
AuthorDate: Tue Feb 7 08:34:42 2023 +0100
AVRO-3591: start with commons schemas (#1850)
* AVRO-3591: start with commons schemas
* AVRO-3591: Add licence
* AVRO-3591: add tests for rust
* AVRO-3591: Add license
* AVRO-3591: clear warning
* AVRO-3591: regroup imports
* fix
Co-authored-by: Martin Grigorov <[email protected]>
* AVRO-3591: delete to string
* AVRO-3591: Common tests in C
* AVRO-3591: use Result
* AVRO*-3591: fix clippy warning
* AVRO-3591: readme files added
* AVRO-3591: fix clippy
* AVRO-3591: fix rs format
---------
Co-authored-by: Martin Grigorov <[email protected]>
---
lang/c/tests/CMakeLists.txt | 1 +
lang/c/tests/test_avro_commons_schema.c | 147 +++++++++++++++++++++
.../java/org/apache/avro/TestSchemaCommons.java | 86 ++++++++++++
lang/rust/avro/tests/shared.rs | 147 +++++++++++++++++++++
share/test/data/schemas/README.md | 13 ++
share/test/data/schemas/simple/README.md | 1 +
share/test/data/schemas/simple/data.avro | Bin 0 -> 154 bytes
share/test/data/schemas/simple/schema.json | 8 ++
share/test/data/schemas/withUnion/README.md | 1 +
share/test/data/schemas/withUnion/data.avro | Bin 0 -> 309 bytes
share/test/data/schemas/withUnion/schema.json | 17 +++
11 files changed, 421 insertions(+)
diff --git a/lang/c/tests/CMakeLists.txt b/lang/c/tests/CMakeLists.txt
index 2e84a06a3..1413a3f37 100644
--- a/lang/c/tests/CMakeLists.txt
+++ b/lang/c/tests/CMakeLists.txt
@@ -64,6 +64,7 @@ add_avro_executable(test_interop_data)
add_avro_test_checkmem(test_data_structures)
add_avro_test_checkmem(test_avro_schema)
+add_avro_test_checkmem(test_avro_commons_schema)
add_avro_test_checkmem(test_avro_schema_names)
add_avro_test_checkmem(test_avro_values)
add_avro_test_checkmem(test_avro_766)
diff --git a/lang/c/tests/test_avro_commons_schema.c
b/lang/c/tests/test_avro_commons_schema.c
new file mode 100644
index 000000000..c4679d89d
--- /dev/null
+++ b/lang/c/tests/test_avro_commons_schema.c
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#include "avro.h"
+#include "avro_private.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#ifdef _WIN32
+ #include "msdirent.h"
+#else
+ #include <dirent.h>
+#endif
+
+avro_writer_t avro_stderr;
+
+static avro_schema_t read_common_schema_test(const char *dirpath) {
+ char schemafilepath[1024];
+ char jsontext[4096];
+
+ avro_schema_t schema;
+ int n = snprintf(schemafilepath, sizeof(schemafilepath),
"%s/schema.json", dirpath);
+ if (n < 0) {
+ fprintf(stderr, "Size of dir path is too long %s !\n", dirpath);
+ exit(EXIT_FAILURE);
+ }
+ FILE* fp = fopen(schemafilepath, "r");
+ if (!fp) {
+ fprintf(stderr, "can't open file %s !\n", schemafilepath);
+ exit(EXIT_FAILURE);
+ }
+ int rval = fread(jsontext, 1, sizeof(jsontext) - 1, fp);
+ fclose(fp);
+ jsontext[rval] = '\0';
+
+ int test_rval = avro_schema_from_json(jsontext, 0, &schema, NULL);
+ if (test_rval != 0) {
+ fprintf(stderr, "fail! Can' read schema from file %s\n",
schemafilepath);
+ exit(EXIT_FAILURE);
+ }
+ return schema;
+}
+
+static void create_writer(avro_schema_t schema, avro_file_writer_t* writer)
+{
+ // create / reset copy.avro file.
+ FILE* copyFile = fopen("./copy.avro", "w");
+ if (!copyFile) {
+ fprintf(stderr, "can't create file copy.avro !\n");
+ exit(EXIT_FAILURE);
+ }
+ fclose(copyFile);
+
+ // create avro writer on file.
+ if (avro_file_writer_create("./copy.avro", schema, writer)) {
+ fprintf(stdout, "\nThere was an error creating db: %s",
avro_strerror());
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void read_data(const char *dirpath, avro_schema_t schema) {
+ char datafilepath[1024];
+ int n = snprintf(datafilepath, sizeof(datafilepath), "%s/data.avro",
dirpath);
+ if (n < 0) {
+ fprintf(stderr, "Size of dir path is too long %s/data.avro !\n",
dirpath);
+ exit(EXIT_FAILURE);
+ }
+
+ avro_file_reader_t reader;
+ avro_datum_t datum;
+ int rval = avro_file_reader(datafilepath, &reader);
+ if (rval) {
+ exit(EXIT_FAILURE);
+ }
+
+ avro_file_writer_t writer;
+ create_writer(schema, &writer);
+
+ int records_read = 0;
+ while ((rval = avro_file_reader_read(reader, schema, &datum)) == 0) {
+ records_read++;
+ if (avro_file_writer_append(writer, datum)) {
+ fprintf(stdout, "\nCan't write record: %s\n", avro_strerror());
+ exit(EXIT_FAILURE);
+ }
+
+ avro_datum_decref(datum);
+ }
+ fprintf(stdout, "\nExit run test OK => %d records", records_read);
+ remove("./copy.avro");
+ fflush(stdout);
+}
+
+static void run_tests(const char *dirpath)
+{
+ fprintf(stdout, "\nRun test for path '%s'", dirpath);
+ avro_schema_t schema = read_common_schema_test(dirpath);
+ read_data(dirpath, schema);
+}
+
+
+
+int main(int argc, char *argv[])
+{
+ char *srcdir = "../../../share/test/data/schemas";
+ AVRO_UNUSED(argc);
+ AVRO_UNUSED(argv);
+
+ avro_stderr = avro_writer_file(stderr);
+
+ DIR* dir = opendir(srcdir);
+ if (dir == NULL) {
+ fprintf(stdout, "Unable to open '%s'\n", srcdir);
+ fflush(stdout);
+ exit(EXIT_FAILURE);
+ }
+ struct dirent *dent;
+ do {
+ dent = readdir(dir);
+
+ if (dent && dent->d_name[0] != '.' && dent->d_type == DT_DIR) {
+ char filepath[1024];
+ snprintf(filepath, sizeof(filepath), "%s/%s", srcdir,
dent->d_name);
+ run_tests(filepath);
+ }
+ }
+ while(dent != NULL);
+ closedir(dir);
+
+ avro_writer_free(avro_stderr);
+ return EXIT_SUCCESS;
+}
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java
b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java
new file mode 100644
index 000000000..30f760b30
--- /dev/null
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.stream.Stream;
+
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumWriter;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestSchemaCommons {
+ private static final Logger LOG =
LoggerFactory.getLogger(TestSchemaCommons.class);
+
+ @ParameterizedTest
+ @MethodSource("sharedFolders")
+ void runFolder(final File folder) throws IOException {
+ final File schemaSource = new File(folder, "schema.json");
+ final File data = new File(folder, "data.avro");
+
+ if (!schemaSource.exists()) {
+ LOG.warn("No 'schema.json' file on folder {}", folder.getPath());
+ return;
+ }
+ final Schema schema = new Schema.Parser().parse(schemaSource);
+ Assertions.assertNotNull(schema);
+
+ if (!data.exists()) {
+ LOG.warn("No 'data.avro' file on folder {}", folder.getPath());
+ return;
+ }
+
+ // output file
+ final String rootTest =
Thread.currentThread().getContextClassLoader().getResource(".").getPath();
+ final File copyData = new File(rootTest, "copy.avro");
+
+ // Deserialize from disk
+ DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
+ GenericDatumReader<GenericRecord> datumReader = new
GenericDatumReader<>(schema);
+ try (DataFileReader<GenericRecord> dataFileReader = new
DataFileReader<>(data, datumReader);
+ DataFileWriter<GenericRecord> dataFileWriter = new
DataFileWriter<>(datumWriter)) {
+ dataFileWriter.create(schema, copyData);
+ GenericRecord record = null;
+ int counter = 0;
+ while (dataFileReader.hasNext()) {
+ record = dataFileReader.next();
+ counter++;
+ Assertions.assertNotNull(record);
+ dataFileWriter.append(record);
+ }
+ Assertions.assertTrue(counter > 0, "no data in file");
+ }
+ }
+
+ public static Stream<Arguments> sharedFolders() {
+ File root = new File("../../../share/test/data/schemas");
+ return Arrays.stream(root.listFiles(File::isDirectory)).map(Arguments::of);
+ }
+
+}
diff --git a/lang/rust/avro/tests/shared.rs b/lang/rust/avro/tests/shared.rs
new file mode 100644
index 000000000..f5686129c
--- /dev/null
+++ b/lang/rust/avro/tests/shared.rs
@@ -0,0 +1,147 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use apache_avro::{types::Value, Codec, Reader, Schema, Writer};
+use std::{
+ fmt,
+ fs::{DirEntry, File, ReadDir},
+ io::BufReader,
+ path::Path,
+ slice::Iter,
+};
+
+const ROOT_DIRECTORY: &str = "../../../share/test/data/schemas";
+
+#[test]
+fn test_schema() {
+ let directory: ReadDir = scan_shared_folder();
+ let mut result: Result<(), ErrorsDesc> = Ok(());
+ for f in directory {
+ let entry: DirEntry = match f {
+ Ok(entry) => entry,
+ Err(e) => core::panic!("Can't get file {}", e),
+ };
+ log::debug!("{:?}", entry.file_name());
+ if let Ok(ft) = entry.file_type() {
+ if ft.is_dir() {
+ let sub_folder =
+ ROOT_DIRECTORY.to_owned() + "/" +
entry.file_name().to_str().unwrap();
+
+ let dir_result = test_folder(sub_folder.as_str());
+ if let Result::Err(ed) = dir_result {
+ result = match result {
+ Ok(()) => Err(ed),
+ Err(e) => Err(e.merge(&ed)),
+ }
+ }
+ }
+ }
+ }
+ if let Err(e) = result {
+ core::panic!("{}", e)
+ }
+}
+
+#[derive(Debug)]
+struct ErrorsDesc {
+ details: Vec<String>,
+}
+
+impl ErrorsDesc {
+ fn new(msg: &str) -> ErrorsDesc {
+ ErrorsDesc {
+ details: vec![msg.to_string()],
+ }
+ }
+
+ fn add(&self, msg: &str) -> Self {
+ let mut new_vec = self.details.clone();
+ new_vec.push(msg.to_string());
+ Self { details: new_vec }
+ }
+
+ fn merge(&self, err: &ErrorsDesc) -> Self {
+ let mut new_vec = self.details.clone();
+ err.details
+ .iter()
+ .for_each(|d: &String| new_vec.push(d.clone()));
+ Self { details: new_vec }
+ }
+}
+
+impl fmt::Display for ErrorsDesc {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", self.details.join("\n").as_str())
+ }
+}
+
+fn test_folder(folder: &str) -> Result<(), ErrorsDesc> {
+ let file_name = folder.to_owned() + "/schema.json";
+ let content = std::fs::read_to_string(file_name).expect("Unable to find
schema.jon file");
+
+ let schema: Schema = Schema::parse_str(content.as_str()).expect("Can't
read schema");
+
+ let data_file_name = folder.to_owned() + "/data.avro";
+ let data_path: &Path = Path::new(data_file_name.as_str());
+ let mut result = Result::Ok(());
+ if !data_path.exists() {
+ log::error!("{}", format!("folder {folder} does not exist"));
+ return Result::Err(ErrorsDesc::new(
+ format!("folder {folder} does not exist").as_str(),
+ ));
+ } else {
+ let file: File = File::open(data_path).expect("Can't open data.avro");
+ let reader =
+ Reader::with_schema(&schema, BufReader::new(&file)).expect("Can't
read data.avro");
+
+ let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null);
+
+ let mut records: Vec<Value> = vec![];
+
+ for r in reader {
+ let record: Value = r.expect("Error on reading");
+ writer.append(record.clone()).expect("Error on write item");
+ records.push(record);
+ }
+
+ writer.flush().expect("Error on flush");
+ let bytes: Vec<u8> = writer.into_inner().unwrap();
+ let reader_bis =
+ Reader::with_schema(&schema, &bytes[..]).expect("Can't read
flushed vector");
+
+ let mut records_iter: Iter<Value> = records.iter();
+ for r2 in reader_bis {
+ let record: Value = r2.expect("Error on reading");
+ let original = records_iter.next().expect("Error, no next");
+ if original != &record {
+ result = match result {
+ Ok(_) => Result::Err(ErrorsDesc::new(
+ format!("Records are not equals for folder :
{folder}").as_str(),
+ )),
+ Err(e) => {
+ Err(e.add(format!("Records are not equals for folder :
{folder}").as_str()))
+ }
+ }
+ }
+ }
+ }
+ result
+}
+
+fn scan_shared_folder() -> ReadDir {
+ std::fs::read_dir(ROOT_DIRECTORY).expect("Can't read root folder")
+}
diff --git a/share/test/data/schemas/README.md
b/share/test/data/schemas/README.md
new file mode 100644
index 000000000..7a5e82b48
--- /dev/null
+++ b/share/test/data/schemas/README.md
@@ -0,0 +1,13 @@
+## commons schemas and data
+
+The objective of this folder is to provide test cases on avro schemas and
datas for each SDK.
+
+Each subfolder is composed of a :
+- schema.json file, for schema
+- data.avro file that contains some records
+- README.md that briefly explains the tested used case.
+
+Steps for tests are :
+- read schema (with schema.json file).
+- read data file (data.avro file)
+- Check it can write record in output temp file.
diff --git a/share/test/data/schemas/simple/README.md
b/share/test/data/schemas/simple/README.md
new file mode 100644
index 000000000..133480af4
--- /dev/null
+++ b/share/test/data/schemas/simple/README.md
@@ -0,0 +1 @@
+Basic record case with only one string field.
diff --git a/share/test/data/schemas/simple/data.avro
b/share/test/data/schemas/simple/data.avro
new file mode 100644
index 000000000..277128e5d
Binary files /dev/null and b/share/test/data/schemas/simple/data.avro differ
diff --git a/share/test/data/schemas/simple/schema.json
b/share/test/data/schemas/simple/schema.json
new file mode 100644
index 000000000..8f3fd1a48
--- /dev/null
+++ b/share/test/data/schemas/simple/schema.json
@@ -0,0 +1,8 @@
+{
+ "type": "record",
+ "name": "simple",
+ "fields": [{
+ "name": "text",
+ "type": "string"
+ }]
+}
diff --git a/share/test/data/schemas/withUnion/README.md
b/share/test/data/schemas/withUnion/README.md
new file mode 100644
index 000000000..df501547c
--- /dev/null
+++ b/share/test/data/schemas/withUnion/README.md
@@ -0,0 +1 @@
+Treats union types.
diff --git a/share/test/data/schemas/withUnion/data.avro
b/share/test/data/schemas/withUnion/data.avro
new file mode 100644
index 000000000..fe0be880d
Binary files /dev/null and b/share/test/data/schemas/withUnion/data.avro differ
diff --git a/share/test/data/schemas/withUnion/schema.json
b/share/test/data/schemas/withUnion/schema.json
new file mode 100644
index 000000000..d55fd4f9b
--- /dev/null
+++ b/share/test/data/schemas/withUnion/schema.json
@@ -0,0 +1,17 @@
+{
+ "type": "record",
+ "name": "unionfields",
+ "fields": [{
+ "name": "data1",
+ "type": ["string", "int"]
+ },
+ {
+ "name": "data2",
+ "type": [
+ {"type": "record", "name": "inner", "fields": [{
+ "name": "d1", "type": ["string", "int", "boolean", "null"]
+ }]
+ },
+ "null"]
+ }]
+}