Copilot commented on code in PR #84:
URL: https://github.com/apache/paimon-cpp/pull/84#discussion_r3418283711


##########
src/paimon/format/avro/avro_input_stream_impl.cpp:
##########
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Adapted from Apache Iceberg C++
+// 
https://github.com/apache/iceberg-cpp/blob/main/src/iceberg/avro/avro_stream_internal.cc
+
+#include "paimon/format/avro/avro_input_stream_impl.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "avro/Exception.hh"
+#include "paimon/fs/file_system.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/status.h"
+
+namespace paimon::avro {
+
+Result<std::unique_ptr<AvroInputStreamImpl>> AvroInputStreamImpl::Create(
+    const std::shared_ptr<paimon::InputStream>& input_stream, size_t 
buffer_size,
+    const std::shared_ptr<MemoryPool>& pool) {
+    PAIMON_ASSIGN_OR_RAISE(uint64_t length, input_stream->Length());
+    return std::unique_ptr<AvroInputStreamImpl>(
+        new AvroInputStreamImpl(input_stream, buffer_size, length, pool));
+}
+
+AvroInputStreamImpl::AvroInputStreamImpl(const 
std::shared_ptr<paimon::InputStream>& input_stream,
+                                         size_t buffer_size, const uint64_t 
total_length,
+                                         const std::shared_ptr<MemoryPool>& 
pool)
+    : pool_(pool),
+      in_(input_stream),
+      buffer_size_(buffer_size),
+      total_length_(total_length),
+      buffer_(reinterpret_cast<uint8_t*>(pool_->Malloc(buffer_size))) {}
+
+AvroInputStreamImpl::~AvroInputStreamImpl() {
+    pool_->Free(buffer_, buffer_size_);
+}
+
+bool AvroInputStreamImpl::next(const uint8_t** data, size_t* len) {
+    // Return all unconsumed data in the buffer
+    if (buffer_pos_ < available_bytes_) {
+        *data = buffer_ + buffer_pos_;
+        *len = available_bytes_ - buffer_pos_;
+        byte_count_ += available_bytes_ - buffer_pos_;
+        buffer_pos_ = available_bytes_;
+        return true;
+    }
+
+    // Read from the input stream when the buffer is empty
+    uint64_t remaining = total_length_ - stream_pos_;
+    if (remaining == 0) {
+        return false;  // eof
+    }
+    auto read_length =
+        in_->Read(reinterpret_cast<char*>(buffer_),
+                  static_cast<uint32_t>(std::min<uint64_t>(buffer_size_, 
remaining)));

Review Comment:
   This narrows a `size_t`-derived value to `uint32_t`. If `buffer_size_` can 
exceed `UINT32_MAX`, the cast will truncate and the read size will be 
incorrect. If `InputStream::Read` is limited to `uint32_t`, consider clamping 
`buffer_size_` at construction time (and reject larger values) or chunk reads 
without narrowing.



##########
src/paimon/format/avro/avro_output_stream_impl.cpp:
##########
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/format/avro/avro_output_stream_impl.h"
+
+#include <stdexcept>
+#include <string>
+
+#include "fmt/format.h"
+#include "paimon/fs/file_system.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/result.h"
+#include "paimon/status.h"
+
+namespace paimon::avro {
+
+AvroOutputStreamImpl::AvroOutputStreamImpl(const 
std::shared_ptr<paimon::OutputStream>& out,
+                                           size_t buffer_size,
+                                           const std::shared_ptr<MemoryPool>& 
pool)
+    : pool_(pool),
+      buffer_size_(buffer_size),
+      buffer_(reinterpret_cast<uint8_t*>(pool_->Malloc(buffer_size))),
+      out_(out),
+      next_(buffer_),
+      available_(buffer_size_),
+      byte_count_(0) {}
+
+AvroOutputStreamImpl::~AvroOutputStreamImpl() {
+    pool_->Free(buffer_, buffer_size_);
+}
+
+bool AvroOutputStreamImpl::next(uint8_t** data, size_t* len) {
+    if (available_ == 0) {
+        FlushBuffer();
+    }
+    *data = next_;
+    *len = available_;
+    next_ += available_;
+    byte_count_ += available_;
+    available_ = 0;
+    return true;
+}
+
+void AvroOutputStreamImpl::backup(size_t len) {
+    available_ += len;
+    next_ -= len;
+    byte_count_ -= len;
+}

Review Comment:
   `backup()` does not validate `len` against the amount previously returned by 
`next()`. If `len` is larger than the last provided span (or larger than 
`buffer_size_`), `next_`/`byte_count_` can underflow and corrupt state, 
potentially leading to out-of-bounds writes on the next call. Add a guard (and 
throw an Avro exception or assert) to ensure `len <= (buffer_size_ - 
available_)` (or equivalently `len <= (next_ - buffer_)`).



##########
src/paimon/format/avro/avro_output_stream_impl.cpp:
##########
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/format/avro/avro_output_stream_impl.h"
+
+#include <stdexcept>
+#include <string>
+
+#include "fmt/format.h"
+#include "paimon/fs/file_system.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/result.h"
+#include "paimon/status.h"
+
+namespace paimon::avro {
+
+AvroOutputStreamImpl::AvroOutputStreamImpl(const 
std::shared_ptr<paimon::OutputStream>& out,
+                                           size_t buffer_size,
+                                           const std::shared_ptr<MemoryPool>& 
pool)
+    : pool_(pool),
+      buffer_size_(buffer_size),
+      buffer_(reinterpret_cast<uint8_t*>(pool_->Malloc(buffer_size))),
+      out_(out),
+      next_(buffer_),
+      available_(buffer_size_),
+      byte_count_(0) {}
+
+AvroOutputStreamImpl::~AvroOutputStreamImpl() {
+    pool_->Free(buffer_, buffer_size_);
+}
+
+bool AvroOutputStreamImpl::next(uint8_t** data, size_t* len) {
+    if (available_ == 0) {
+        FlushBuffer();
+    }
+    *data = next_;
+    *len = available_;
+    next_ += available_;
+    byte_count_ += available_;
+    available_ = 0;
+    return true;
+}
+
+void AvroOutputStreamImpl::backup(size_t len) {
+    available_ += len;
+    next_ -= len;
+    byte_count_ -= len;
+}
+
+void AvroOutputStreamImpl::FlushBuffer() {
+    size_t length = buffer_size_ - available_;
+    Result<int32_t> write_len = out_->Write(reinterpret_cast<const 
char*>(buffer_), length);
+    if (!write_len.ok()) {
+        throw std::runtime_error("write failed, status: " + 
write_len.status().ToString());
+    }
+    if (static_cast<size_t>(write_len.value()) != length) {
+        throw std::runtime_error(
+            fmt::format("write failed, expected length: {}, actual write 
length: {}", length,
+                        write_len.value()));
+    }
+    Status status = out_->Flush();
+    if (!status.ok()) {
+        throw std::runtime_error("flush failed, status: " + status.ToString());
+    }
+    next_ = buffer_;
+    available_ = buffer_size_;
+}

Review Comment:
   `FlushBuffer()` unconditionally calls `out_->Flush()` every time the 
internal buffer is written, which can amplify I/O flush frequency and 
counteract the intent described in `AvroOutputStreamImpl::flush()` (avoiding 
many small I/O operations). Prefer flushing only when explicitly 
finishing/closing, or make this flush conditional (e.g., only when underlying 
stream requires it), while still resetting the buffer state after the write.



##########
src/paimon/format/avro/avro_reader_builder.h:
##########
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "avro/DataFile.hh"
+#include "paimon/format/avro/avro_file_batch_reader.h"
+#include "paimon/format/avro/avro_input_stream_impl.h"
+#include "paimon/format/reader_builder.h"
+#include "paimon/fs/file_system.h"
+#include "paimon/memory/memory_pool.h"
+
+namespace paimon::avro {
+
+class AvroReaderBuilder : public ReaderBuilder {
+ public:
+    AvroReaderBuilder(const std::map<std::string, std::string>& options, 
int32_t batch_size)
+        : batch_size_(batch_size), pool_(GetDefaultPool()), options_(options) 
{}
+
+    ReaderBuilder* WithMemoryPool(const std::shared_ptr<MemoryPool>& pool) 
override {
+        pool_ = pool;
+        return this;
+    }
+
+    Result<std::unique_ptr<FileBatchReader>> Build(
+        const std::shared_ptr<InputStream>& path) const override {
+        return AvroFileBatchReader::Create(path, batch_size_, pool_);
+    }
+
+    Result<std::unique_ptr<FileBatchReader>> Build(const std::string& path) 
const override {
+        return Status::Invalid("do not support build reader with path in avro 
format");
+    }

Review Comment:
   The message is unclear and inconsistent in wording/capitalization. Consider 
a clearer “Avro reader builder does not support building from path string; use 
Build(InputStream) instead.” If the codebase has a dedicated “NotSupported” 
status type, that would communicate intent better than `Invalid`.



##########
src/paimon/format/avro/avro_schema_converter_test.cpp:
##########
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/format/avro/avro_schema_converter.h"
+
+#include "arrow/api.h"
+#include "avro/Compiler.hh"
+#include "avro/ValidSchema.hh"
+#include "gtest/gtest.h"
+#include "paimon/common/utils/date_time_utils.h"
+#include "paimon/core/manifest/manifest_file_meta.h"
+#include "paimon/core/utils/versioned_object_serializer.h"
+#include "paimon/status.h"
+#include "paimon/testing/utils/testharness.h"
+
+namespace paimon::avro::test {
+
+TEST(AvroSchemaConverterTest, TestSimple) {
+    // Test a basic record with primitive types
+    std::string schema_json = R"({
+        "type": "record",
+        "namespace": "org.apache.paimon.avro.generated",
+        "name": "record",
+        "fields": [
+            {"name": "f_bool", "type": "boolean"},
+            {"name": "f_int", "type": "int"},
+            {"name": "f_long", "type": "long"},
+            {"name": "f_float", "type": "float"},
+            {"name": "f_double", "type": "double"},
+            {"name": "f_string", "type": "string"},
+            {"name": "f_bytes", "type": "bytes"}
+        ]
+    })";
+
+    auto avro_schema = ::avro::compileJsonSchemaFromString(schema_json);
+
+    ASSERT_OK_AND_ASSIGN(auto arrow_type,
+                         
AvroSchemaConverter::AvroSchemaToArrowDataType(avro_schema));
+
+    // Expected Arrow Schema
+    auto expected_fields = {
+        arrow::field("f_bool", arrow::boolean(), false),
+        arrow::field("f_int", arrow::int32(), false),
+        arrow::field("f_long", arrow::int64(), false),
+        arrow::field("f_float", arrow::float32(), false),
+        arrow::field("f_double", arrow::float64(), false),
+        arrow::field("f_string", arrow::utf8(), false),
+        arrow::field("f_bytes", arrow::binary(), false),
+    };
+    auto arrow_schema = arrow::schema(expected_fields);
+
+    // The converted type should be a StructType
+    ASSERT_EQ(arrow_type->id(), arrow::Type::STRUCT);
+    ASSERT_TRUE(arrow_type->Equals(arrow::struct_(arrow_schema->fields())));
+
+    ASSERT_OK_AND_ASSIGN(auto expected_avro_schema,
+                         
AvroSchemaConverter::ArrowSchemaToAvroSchema(arrow_schema));
+    ASSERT_EQ(expected_avro_schema.toJson(), avro_schema.toJson());

Review Comment:
   Comparing `toJson()` string output is brittle 
(formatting/order/canonicalization differences can fail the test even for 
equivalent schemas). Prefer asserting semantic equivalence (e.g., parse both 
into `ValidSchema` and compare structure where possible) or validate round-trip 
behavior via Arrow types/fields rather than raw JSON string equality.



##########
src/paimon/format/avro/avro_schema_converter.cpp:
##########
@@ -0,0 +1,394 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/format/avro/avro_schema_converter.h"
+
+#include <cassert>
+#include <cstddef>
+#include <utility>
+#include <vector>
+
+#include "arrow/util/checked_cast.h"
+#include "avro/CustomAttributes.hh"
+#include "avro/LogicalType.hh"
+#include "avro/Node.hh"
+#include "avro/Schema.hh"
+#include "avro/Types.hh"
+#include "avro/ValidSchema.hh"
+#include "fmt/format.h"
+#include "paimon/common/utils/date_time_utils.h"
+#include "paimon/format/avro/avro_file_format_factory.h"
+#include "paimon/format/avro/avro_utils.h"
+#include "paimon/macros.h"
+#include "paimon/status.h"
+namespace paimon::avro {
+
+/// Returns schema with nullable true.
+::avro::Schema AvroSchemaConverter::NullableSchema(const ::avro::Schema& 
schema) {
+    assert(schema.type() != ::avro::AVRO_UNION);
+    ::avro::UnionSchema union_schema;
+    union_schema.addType(::avro::NullSchema());
+    union_schema.addType(schema);
+    return union_schema;
+}
+
+void AvroSchemaConverter::AddRecordField(::avro::RecordSchema* record_schema,
+                                         const std::string& field_name,
+                                         const ::avro::Schema& field_schema) {
+    if (field_schema.type() == ::avro::Type::AVRO_UNION) {
+        ::avro::CustomAttributes attrs;
+        attrs.addAttribute("default", "null", /*addQuotes=*/false);
+        record_schema->addField(field_name, field_schema, attrs);
+    } else {
+        record_schema->addField(field_name, field_schema);
+    }
+}
+
+Result<bool> AvroSchemaConverter::CheckUnionType(const ::avro::NodePtr& 
avro_node) {
+    auto type = avro_node->type();
+    if (type == ::avro::AVRO_UNION) {
+        if (avro_node->leaves() != 2) {
+            return Status::Invalid("not support avro union leaves not 2");
+        }
+        auto node = avro_node->leafAt(0);
+        if (node->type() != ::avro::AVRO_NULL) {
+            return Status::Invalid("not support avro union first leaf is not 
avro null");
+        }
+        return true;
+    }
+    return false;
+}

Review Comment:
   The new error messages are grammatically unclear and omit useful context for 
debugging. Include the observed leaf count and first-leaf type (and consider 
“unsupported” wording), e.g., “Unsupported Avro union: expected 2 branches 
(null + T), got {leaves}” and “Unsupported Avro union: first branch must be 
null, got {type}”.



##########
src/paimon/format/avro/avro_schema_converter.cpp:
##########
@@ -0,0 +1,394 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/format/avro/avro_schema_converter.h"
+
+#include <cassert>
+#include <cstddef>
+#include <utility>
+#include <vector>
+
+#include "arrow/util/checked_cast.h"
+#include "avro/CustomAttributes.hh"
+#include "avro/LogicalType.hh"
+#include "avro/Node.hh"
+#include "avro/Schema.hh"
+#include "avro/Types.hh"
+#include "avro/ValidSchema.hh"
+#include "fmt/format.h"
+#include "paimon/common/utils/date_time_utils.h"
+#include "paimon/format/avro/avro_file_format_factory.h"
+#include "paimon/format/avro/avro_utils.h"
+#include "paimon/macros.h"
+#include "paimon/status.h"
+namespace paimon::avro {
+
+/// Returns schema with nullable true.
+::avro::Schema AvroSchemaConverter::NullableSchema(const ::avro::Schema& 
schema) {
+    assert(schema.type() != ::avro::AVRO_UNION);
+    ::avro::UnionSchema union_schema;
+    union_schema.addType(::avro::NullSchema());
+    union_schema.addType(schema);
+    return union_schema;
+}
+
+void AvroSchemaConverter::AddRecordField(::avro::RecordSchema* record_schema,
+                                         const std::string& field_name,
+                                         const ::avro::Schema& field_schema) {
+    if (field_schema.type() == ::avro::Type::AVRO_UNION) {
+        ::avro::CustomAttributes attrs;
+        attrs.addAttribute("default", "null", /*addQuotes=*/false);
+        record_schema->addField(field_name, field_schema, attrs);
+    } else {
+        record_schema->addField(field_name, field_schema);
+    }
+}
+
+Result<bool> AvroSchemaConverter::CheckUnionType(const ::avro::NodePtr& 
avro_node) {
+    auto type = avro_node->type();
+    if (type == ::avro::AVRO_UNION) {
+        if (avro_node->leaves() != 2) {
+            return Status::Invalid("not support avro union leaves not 2");
+        }
+        auto node = avro_node->leafAt(0);
+        if (node->type() != ::avro::AVRO_NULL) {
+            return Status::Invalid("not support avro union first leaf is not 
avro null");
+        }
+        return true;
+    }
+    return false;
+}
+
+Result<std::shared_ptr<arrow::DataType>> 
AvroSchemaConverter::AvroSchemaToArrowDataType(
+    const ::avro::ValidSchema& avro_schema) {
+    ::avro::NodePtr root = avro_schema.root();
+    PAIMON_ASSIGN_OR_RAISE(bool is_union, CheckUnionType(root));
+    if (is_union) {
+        root = root->leafAt(1);
+    }
+    if (PAIMON_UNLIKELY(root->type() != ::avro::AVRO_RECORD)) {
+        return Status::Invalid("Avro schema root node is not a record type");
+    }
+    bool nullable = false;
+    return GetArrowType(root, &nullable);
+}
+
+Result<std::shared_ptr<arrow::Field>> AvroSchemaConverter::GetArrowField(
+    const std::string& name, const ::avro::NodePtr& avro_node) {
+    bool nullable = false;
+    PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<arrow::DataType> arrow_type,
+                           GetArrowType(avro_node, &nullable));
+    return arrow::field(name, std::move(arrow_type), nullable);
+}
+
+Result<std::shared_ptr<arrow::DataType>> AvroSchemaConverter::GetArrowType(
+    const ::avro::NodePtr& avro_node, bool* nullable) {
+    PAIMON_ASSIGN_OR_RAISE(bool is_union, CheckUnionType(avro_node));
+    if (is_union) {
+        *nullable = true;
+        return GetArrowType(avro_node->leafAt(1), nullable);
+    }
+    auto type = avro_node->type();
+    auto logical_type = avro_node->logicalType();
+    switch (logical_type.type()) {
+        case ::avro::LogicalType::Type::NONE:
+            break;
+        case ::avro::LogicalType::Type::DATE:
+            if (type != ::avro::AVRO_INT) {
+                return Status::TypeError("invalid avro date stored as ", 
toString(type));
+            }
+            return arrow::date32();
+        case ::avro::LogicalType::Type::DECIMAL:
+            if (type != ::avro::AVRO_BYTES) {
+                return Status::TypeError("invalid avro decimal stored as ", 
toString(type));
+            }
+            return arrow::decimal128(logical_type.precision(), 
logical_type.scale());

Review Comment:
   Avro decimal logical types can be encoded as either `bytes` or `fixed` in 
common Avro schemas. Rejecting `AVRO_FIXED` reduces interoperability and may 
cause conversions to fail for valid Avro input. Consider supporting 
`AVRO_FIXED` here (and similarly emitting fixed if desired) or making the error 
explicitly state that only `bytes`-backed decimals are supported.



##########
src/paimon/format/avro/avro_file_format_factory.cpp:
##########
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/format/avro/avro_file_format_factory.h"
+
+#include <utility>
+
+#include "paimon/factories/factory.h"
+#include "paimon/format/avro/avro_file_format.h"
+
+namespace paimon::avro {
+
+const char AvroFileFormatFactory::IDENTIFIER[] = "avro";
+
+Result<std::unique_ptr<FileFormat>> AvroFileFormatFactory::Create(
+    const std::map<std::string, std::string>& options) const {
+    return std::make_unique<AvroFileFormat>(options);
+}
+
+static __attribute__((constructor)) void 
AvroFileFormatFactoryRegisterLogicalTypes() {
+    ::avro::CustomLogicalTypeRegistry::instance().registerType(
+        "map", [](const std::string&) { return 
std::make_shared<MapLogicalType>(); });
+}

Review Comment:
   `__attribute__((constructor))` is compiler-specific (GCC/Clang) and can be 
problematic for portability and static init ordering. A more portable pattern 
is to trigger registration via a function-local static initializer that is 
guaranteed to run before use (e.g., called from 
`AvroFileFormatFactory::Create()` or from factory registration code).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to