paleolimbot commented on code in PR #564:
URL: https://github.com/apache/arrow-nanoarrow/pull/564#discussion_r1689741153


##########
src/nanoarrow/ipc/encoder.c:
##########
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+// For thread safe shared buffers we need C11 + stdatomic.h
+// Can compile with -DNANOARROW_IPC_USE_STDATOMIC=0 or 1 to override
+// automatic detection
+#if !defined(NANOARROW_IPC_USE_STDATOMIC)
+#define NANOARROW_IPC_USE_STDATOMIC 0
+
+// Check for C11
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+
+// Check for GCC 4.8, which doesn't include stdatomic.h but does
+// not define __STDC_NO_ATOMICS__
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ >= 5
+
+#if !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#undef NANOARROW_IPC_USE_STDATOMIC
+#define NANOARROW_IPC_USE_STDATOMIC 1
+#endif
+#endif
+#endif
+
+#endif
+
+#include "nanoarrow/ipc/flatcc_generated.h"
+#include "nanoarrow/nanoarrow.h"
+#include "nanoarrow/nanoarrow_ipc.h"
+
+// R 3.6 / Windows builds on a very old toolchain that does not define ENODATA
+#if defined(_WIN32) && !defined(_MSC_VER) && !defined(ENODATA)
+#define ENODATA 120
+#endif
+
+#define ns(x) FLATBUFFERS_WRAP_NAMESPACE(org_apache_arrow_flatbuf, x)
+
+#define FLATCC_RETURN_UNLESS_0(x) \
+  if (ns(x) != 0) return ENOMEM;
+
+struct ArrowIpcEncoderPrivate {
+  flatcc_builder_t builder;
+  struct ArrowBuffer buffers, nodes;
+};
+
+ArrowErrorCode ArrowIpcEncoderInit(struct ArrowIpcEncoder* encoder) {
+  NANOARROW_DCHECK(encoder);
+  memset(encoder, 0, sizeof(struct ArrowIpcEncoder));
+  encoder->encode_buffer = NULL;
+  encoder->encode_buffer_state = NULL;
+  encoder->codec = NANOARROW_IPC_COMPRESSION_TYPE_NONE;
+  encoder->private_data = ArrowMalloc(sizeof(struct ArrowIpcEncoderPrivate));
+  struct ArrowIpcEncoderPrivate* private =
+      (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+  ArrowBufferInit(&private->buffers);
+  ArrowBufferInit(&private->nodes);
+  if (flatcc_builder_init(&private->builder) == -1) {
+    return ESPIPE;
+  }
+  return NANOARROW_OK;
+}
+
+void ArrowIpcEncoderReset(struct ArrowIpcEncoder* encoder) {
+  NANOARROW_DCHECK(encoder && encoder->private_data);
+  struct ArrowIpcEncoderPrivate* private =
+      (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+  flatcc_builder_clear(&private->builder);
+  ArrowBufferReset(&private->nodes);
+  ArrowBufferReset(&private->buffers);
+  ArrowFree(private);
+  memset(encoder, 0, sizeof(struct ArrowIpcEncoder));
+}
+
+ArrowErrorCode ArrowIpcEncoderFinalizeBuffer(struct ArrowIpcEncoder* encoder,
+                                             struct ArrowBuffer* out) {
+  NANOARROW_DCHECK(encoder && encoder->private_data && out);
+  struct ArrowIpcEncoderPrivate* private =
+      (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+  ArrowBufferReset(out);
+  size_t size = flatcc_builder_get_buffer_size(&private->builder);
+  if (size == 0) {
+    // Finalizing an empty flatcc_builder_t triggers an assertion
+    return NANOARROW_OK;
+  }
+
+  out->size_bytes = out->capacity_bytes = (int64_t)size;
+  out->data = (uint8_t*)flatcc_builder_finalize_buffer(&private->builder, 
&size);
+  return out->data ? NANOARROW_OK : ENOMEM;

Review Comment:
   I think this works by accident because `ArrowDefaultAllocator().free()` and 
`FLATCC_BUILDER_FREE()` both end up calling `free()` from libc in the default 
implementations. It looks like this is defined in the flatcc runtime (which we 
allow to be linked from an existing install) and so we probably need to inject 
a custom allocator into the buffer if we're going to assume that we get a fresh 
`ArrowBuffer()` here.
   
   It's probably more likely that we want to append this to an 
`ArrowIpcOutputStream()` or an existing `ArrowBuffer` as some part of output 
buffering. Using `ArrowBufferAppend()` is probably easiest for this PR just 
until we sort out exactly what we need to do with the buffer at the end.
   
   Slightly unrelated, but we would have to remember to set `out->size_bytes = 
0;` before returning `ENOMEM`. It is not a strong guarantee, but in general 
(exported) nanoarrow functions do not touch pointer output arguments unless 
returning `NANOARROW_OK`.



##########
src/nanoarrow/ipc/encoder.c:
##########
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+// For thread safe shared buffers we need C11 + stdatomic.h
+// Can compile with -DNANOARROW_IPC_USE_STDATOMIC=0 or 1 to override
+// automatic detection
+#if !defined(NANOARROW_IPC_USE_STDATOMIC)
+#define NANOARROW_IPC_USE_STDATOMIC 0
+
+// Check for C11
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+
+// Check for GCC 4.8, which doesn't include stdatomic.h but does
+// not define __STDC_NO_ATOMICS__
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ >= 5
+
+#if !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#undef NANOARROW_IPC_USE_STDATOMIC
+#define NANOARROW_IPC_USE_STDATOMIC 1
+#endif
+#endif
+#endif
+
+#endif
+
+#include "nanoarrow/ipc/flatcc_generated.h"
+#include "nanoarrow/nanoarrow.h"
+#include "nanoarrow/nanoarrow_ipc.h"
+
+// R 3.6 / Windows builds on a very old toolchain that does not define ENODATA
+#if defined(_WIN32) && !defined(_MSC_VER) && !defined(ENODATA)
+#define ENODATA 120
+#endif
+
+#define ns(x) FLATBUFFERS_WRAP_NAMESPACE(org_apache_arrow_flatbuf, x)
+
+#define FLATCC_RETURN_UNLESS_0(x) \
+  if (ns(x) != 0) return ENOMEM;
+
+struct ArrowIpcEncoderPrivate {
+  flatcc_builder_t builder;
+  struct ArrowBuffer buffers, nodes;
+};
+
+ArrowErrorCode ArrowIpcEncoderInit(struct ArrowIpcEncoder* encoder) {
+  NANOARROW_DCHECK(encoder);
+  memset(encoder, 0, sizeof(struct ArrowIpcEncoder));
+  encoder->encode_buffer = NULL;
+  encoder->encode_buffer_state = NULL;
+  encoder->codec = NANOARROW_IPC_COMPRESSION_TYPE_NONE;
+  encoder->private_data = ArrowMalloc(sizeof(struct ArrowIpcEncoderPrivate));
+  struct ArrowIpcEncoderPrivate* private =
+      (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+  ArrowBufferInit(&private->buffers);
+  ArrowBufferInit(&private->nodes);
+  if (flatcc_builder_init(&private->builder) == -1) {
+    return ESPIPE;

Review Comment:
   ```suggestion
       ArrowBufferReset(&private->buffers);
       ArrowBufferReset(&parivate->nodes);
       ArrowFree(encoder->private_data);
       return ESPIPE;
   ```



##########
src/nanoarrow/ipc/encoder.c:
##########
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+// For thread safe shared buffers we need C11 + stdatomic.h
+// Can compile with -DNANOARROW_IPC_USE_STDATOMIC=0 or 1 to override
+// automatic detection
+#if !defined(NANOARROW_IPC_USE_STDATOMIC)
+#define NANOARROW_IPC_USE_STDATOMIC 0
+
+// Check for C11
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+
+// Check for GCC 4.8, which doesn't include stdatomic.h but does
+// not define __STDC_NO_ATOMICS__
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ >= 5
+
+#if !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#undef NANOARROW_IPC_USE_STDATOMIC
+#define NANOARROW_IPC_USE_STDATOMIC 1
+#endif
+#endif
+#endif
+
+#endif
+
+#include "nanoarrow/ipc/flatcc_generated.h"
+#include "nanoarrow/nanoarrow.h"
+#include "nanoarrow/nanoarrow_ipc.h"
+
+// R 3.6 / Windows builds on a very old toolchain that does not define ENODATA
+#if defined(_WIN32) && !defined(_MSC_VER) && !defined(ENODATA)
+#define ENODATA 120
+#endif
+
+#define ns(x) FLATBUFFERS_WRAP_NAMESPACE(org_apache_arrow_flatbuf, x)
+
+#define FLATCC_RETURN_UNLESS_0(x) \
+  if (ns(x) != 0) return ENOMEM;
+
+struct ArrowIpcEncoderPrivate {
+  flatcc_builder_t builder;
+  struct ArrowBuffer buffers, nodes;
+};
+
+ArrowErrorCode ArrowIpcEncoderInit(struct ArrowIpcEncoder* encoder) {
+  NANOARROW_DCHECK(encoder);

Review Comment:
   nit for consistency:
   
   ```suggestion
     NANOARROW_DCHECK(encoder != NULL);
   ```



##########
src/nanoarrow/ipc/encoder_test.cc:
##########
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/array.h>
+#include <arrow/c/bridge.h>
+#include <arrow/compute/api.h>
+#include <arrow/io/memory.h>
+#include <arrow/ipc/api.h>
+#include <arrow/util/key_value_metadata.h>

Review Comment:
   We probably don't need these quite yet (and it would be helpful to keep the 
Arrow C++ involvement to the files test for the purposes of removing it later)



##########
src/nanoarrow/ipc/encoder.c:
##########
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+// For thread safe shared buffers we need C11 + stdatomic.h
+// Can compile with -DNANOARROW_IPC_USE_STDATOMIC=0 or 1 to override
+// automatic detection
+#if !defined(NANOARROW_IPC_USE_STDATOMIC)
+#define NANOARROW_IPC_USE_STDATOMIC 0
+
+// Check for C11
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+
+// Check for GCC 4.8, which doesn't include stdatomic.h but does
+// not define __STDC_NO_ATOMICS__
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ >= 5
+
+#if !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#undef NANOARROW_IPC_USE_STDATOMIC
+#define NANOARROW_IPC_USE_STDATOMIC 1
+#endif
+#endif
+#endif
+
+#endif
+
+#include "nanoarrow/ipc/flatcc_generated.h"
+#include "nanoarrow/nanoarrow.h"
+#include "nanoarrow/nanoarrow_ipc.h"
+
+// R 3.6 / Windows builds on a very old toolchain that does not define ENODATA
+#if defined(_WIN32) && !defined(_MSC_VER) && !defined(ENODATA)
+#define ENODATA 120
+#endif
+
+#define ns(x) FLATBUFFERS_WRAP_NAMESPACE(org_apache_arrow_flatbuf, x)
+
+#define FLATCC_RETURN_UNLESS_0(x) \
+  if (ns(x) != 0) return ENOMEM;
+
+struct ArrowIpcEncoderPrivate {
+  flatcc_builder_t builder;
+  struct ArrowBuffer buffers, nodes;
+};
+
+ArrowErrorCode ArrowIpcEncoderInit(struct ArrowIpcEncoder* encoder) {
+  NANOARROW_DCHECK(encoder);
+  memset(encoder, 0, sizeof(struct ArrowIpcEncoder));
+  encoder->encode_buffer = NULL;
+  encoder->encode_buffer_state = NULL;
+  encoder->codec = NANOARROW_IPC_COMPRESSION_TYPE_NONE;
+  encoder->private_data = ArrowMalloc(sizeof(struct ArrowIpcEncoderPrivate));
+  struct ArrowIpcEncoderPrivate* private =
+      (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+  ArrowBufferInit(&private->buffers);
+  ArrowBufferInit(&private->nodes);
+  if (flatcc_builder_init(&private->builder) == -1) {
+    return ESPIPE;
+  }
+  return NANOARROW_OK;
+}
+
+void ArrowIpcEncoderReset(struct ArrowIpcEncoder* encoder) {
+  NANOARROW_DCHECK(encoder && encoder->private_data);
+  struct ArrowIpcEncoderPrivate* private =
+      (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+  flatcc_builder_clear(&private->builder);
+  ArrowBufferReset(&private->nodes);
+  ArrowBufferReset(&private->buffers);
+  ArrowFree(private);
+  memset(encoder, 0, sizeof(struct ArrowIpcEncoder));
+}
+
+ArrowErrorCode ArrowIpcEncoderFinalizeBuffer(struct ArrowIpcEncoder* encoder,
+                                             struct ArrowBuffer* out) {
+  NANOARROW_DCHECK(encoder && encoder->private_data && out);

Review Comment:
   ```suggestion
     NANOARROW_DCHECK(encoder != NULL && encoder->private_data != NULL && out 
!= NULL);
   ```



##########
src/nanoarrow/ipc/encoder.c:
##########
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+// For thread safe shared buffers we need C11 + stdatomic.h
+// Can compile with -DNANOARROW_IPC_USE_STDATOMIC=0 or 1 to override
+// automatic detection
+#if !defined(NANOARROW_IPC_USE_STDATOMIC)
+#define NANOARROW_IPC_USE_STDATOMIC 0
+
+// Check for C11
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+
+// Check for GCC 4.8, which doesn't include stdatomic.h but does
+// not define __STDC_NO_ATOMICS__
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ >= 5
+
+#if !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#undef NANOARROW_IPC_USE_STDATOMIC
+#define NANOARROW_IPC_USE_STDATOMIC 1
+#endif
+#endif
+#endif
+
+#endif
+
+#include "nanoarrow/ipc/flatcc_generated.h"
+#include "nanoarrow/nanoarrow.h"
+#include "nanoarrow/nanoarrow_ipc.h"
+
+// R 3.6 / Windows builds on a very old toolchain that does not define ENODATA
+#if defined(_WIN32) && !defined(_MSC_VER) && !defined(ENODATA)
+#define ENODATA 120
+#endif
+
+#define ns(x) FLATBUFFERS_WRAP_NAMESPACE(org_apache_arrow_flatbuf, x)
+
+#define FLATCC_RETURN_UNLESS_0(x) \
+  if (ns(x) != 0) return ENOMEM;
+
+struct ArrowIpcEncoderPrivate {
+  flatcc_builder_t builder;
+  struct ArrowBuffer buffers, nodes;
+};
+
+ArrowErrorCode ArrowIpcEncoderInit(struct ArrowIpcEncoder* encoder) {
+  NANOARROW_DCHECK(encoder);
+  memset(encoder, 0, sizeof(struct ArrowIpcEncoder));
+  encoder->encode_buffer = NULL;
+  encoder->encode_buffer_state = NULL;
+  encoder->codec = NANOARROW_IPC_COMPRESSION_TYPE_NONE;
+  encoder->private_data = ArrowMalloc(sizeof(struct ArrowIpcEncoderPrivate));
+  struct ArrowIpcEncoderPrivate* private =
+      (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+  ArrowBufferInit(&private->buffers);
+  ArrowBufferInit(&private->nodes);
+  if (flatcc_builder_init(&private->builder) == -1) {
+    return ESPIPE;
+  }
+  return NANOARROW_OK;
+}
+
+void ArrowIpcEncoderReset(struct ArrowIpcEncoder* encoder) {
+  NANOARROW_DCHECK(encoder && encoder->private_data);
+  struct ArrowIpcEncoderPrivate* private =
+      (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+  flatcc_builder_clear(&private->builder);
+  ArrowBufferReset(&private->nodes);
+  ArrowBufferReset(&private->buffers);
+  ArrowFree(private);
+  memset(encoder, 0, sizeof(struct ArrowIpcEncoder));
+}
+
+ArrowErrorCode ArrowIpcEncoderFinalizeBuffer(struct ArrowIpcEncoder* encoder,
+                                             struct ArrowBuffer* out) {
+  NANOARROW_DCHECK(encoder && encoder->private_data && out);
+  struct ArrowIpcEncoderPrivate* private =
+      (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+  ArrowBufferReset(out);

Review Comment:
   If the caller hasn't called `ArrowBufferInit()` yet, this will segfault. See 
below too, but if we're assuming that `out` is something the caller has 
initialized, we need to `ArrowBufferAppend()` a copy. If we're injecting a 
fresh `ArrowBuffer` into `out` with a custom allocator that calls the correct 
free from the builder runtime, we can't call `ArrowBufferReset()` on the input.



##########
src/nanoarrow/nanoarrow_ipc.h:
##########
@@ -379,6 +395,44 @@ ArrowErrorCode ArrowIpcArrayStreamReaderInit(
     struct ArrowArrayStream* out, struct ArrowIpcInputStream* input_stream,
     struct ArrowIpcArrayStreamReaderOptions* options);
 
+/// \brief Encoder for Arrow IPC messages
+///
+/// This structure is intended to be allocated by the caller,
+/// initialized using ArrowIpcEncoderInit(), and released with
+/// ArrowIpcEncoderReset().
+struct ArrowIpcEncoder {
+  /// \brief Compression to encode in the next RecordBatch message
+  enum ArrowIpcCompressionType codec;
+
+  /// \brief Finalized body length of the most recently encoded RecordBatch 
message
+  int64_t body_length;
+
+  /// \brief Callback invoked against each buffer to be encoded.
+  ///
+  /// Encoding of buffers is left as a callback to accommodate dissociated 
data storage.
+  /// One implementation of this callback might copy all buffers into a 
contiguous body
+  /// for use in an arrow IPC stream, another implementation might store 
offsets and
+  /// lengths relative to a known arena.
+  ArrowErrorCode (*encode_buffer)(struct ArrowBufferView buffer_view,
+                                  struct ArrowIpcEncoder* encoder, int64_t* 
offset,
+                                  int64_t* length, struct ArrowError* error);
+
+  /// \brief Pointer to arbitrary data used by encode_buffer()
+  void* encode_buffer_state;
+
+  /// \brief Private resources managed by this library
+  void* private_data;
+};
+
+/// \brief Initialize an encoder
+ArrowErrorCode ArrowIpcEncoderInit(struct ArrowIpcEncoder* encoder);
+
+/// \brief Release all resources attached to an encoder
+void ArrowIpcEncoderReset(struct ArrowIpcEncoder* encoder);
+
+/// \brief Finalize the most recently encoded message to a buffer

Review Comment:
   When we've sorted the behaviour of `out` that should be documented here, too!



##########
src/nanoarrow/ipc/encoder.c:
##########
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+// For thread safe shared buffers we need C11 + stdatomic.h
+// Can compile with -DNANOARROW_IPC_USE_STDATOMIC=0 or 1 to override
+// automatic detection
+#if !defined(NANOARROW_IPC_USE_STDATOMIC)
+#define NANOARROW_IPC_USE_STDATOMIC 0
+
+// Check for C11
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+
+// Check for GCC 4.8, which doesn't include stdatomic.h but does
+// not define __STDC_NO_ATOMICS__
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ >= 5
+
+#if !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#undef NANOARROW_IPC_USE_STDATOMIC
+#define NANOARROW_IPC_USE_STDATOMIC 1
+#endif
+#endif
+#endif
+
+#endif
+
+#include "nanoarrow/ipc/flatcc_generated.h"
+#include "nanoarrow/nanoarrow.h"
+#include "nanoarrow/nanoarrow_ipc.h"
+
+// R 3.6 / Windows builds on a very old toolchain that does not define ENODATA
+#if defined(_WIN32) && !defined(_MSC_VER) && !defined(ENODATA)
+#define ENODATA 120
+#endif
+
+#define ns(x) FLATBUFFERS_WRAP_NAMESPACE(org_apache_arrow_flatbuf, x)
+
+#define FLATCC_RETURN_UNLESS_0(x) \
+  if (ns(x) != 0) return ENOMEM;
+
+struct ArrowIpcEncoderPrivate {
+  flatcc_builder_t builder;
+  struct ArrowBuffer buffers, nodes;

Review Comment:
   nit for consistency:
   
   ```suggestion
     struct ArrowBuffer buffers;
     struct ArrowBuffer nodes;
   ```



##########
src/nanoarrow/ipc/encoder.c:
##########
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+// For thread safe shared buffers we need C11 + stdatomic.h
+// Can compile with -DNANOARROW_IPC_USE_STDATOMIC=0 or 1 to override
+// automatic detection
+#if !defined(NANOARROW_IPC_USE_STDATOMIC)
+#define NANOARROW_IPC_USE_STDATOMIC 0
+
+// Check for C11
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+
+// Check for GCC 4.8, which doesn't include stdatomic.h but does
+// not define __STDC_NO_ATOMICS__
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ >= 5
+
+#if !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#undef NANOARROW_IPC_USE_STDATOMIC
+#define NANOARROW_IPC_USE_STDATOMIC 1
+#endif
+#endif
+#endif
+
+#endif
+
+#include "nanoarrow/ipc/flatcc_generated.h"
+#include "nanoarrow/nanoarrow.h"
+#include "nanoarrow/nanoarrow_ipc.h"
+
+// R 3.6 / Windows builds on a very old toolchain that does not define ENODATA
+#if defined(_WIN32) && !defined(_MSC_VER) && !defined(ENODATA)
+#define ENODATA 120
+#endif
+
+#define ns(x) FLATBUFFERS_WRAP_NAMESPACE(org_apache_arrow_flatbuf, x)
+
+#define FLATCC_RETURN_UNLESS_0(x) \
+  if (ns(x) != 0) return ENOMEM;
+
+struct ArrowIpcEncoderPrivate {
+  flatcc_builder_t builder;
+  struct ArrowBuffer buffers, nodes;
+};
+
+ArrowErrorCode ArrowIpcEncoderInit(struct ArrowIpcEncoder* encoder) {
+  NANOARROW_DCHECK(encoder);
+  memset(encoder, 0, sizeof(struct ArrowIpcEncoder));
+  encoder->encode_buffer = NULL;
+  encoder->encode_buffer_state = NULL;
+  encoder->codec = NANOARROW_IPC_COMPRESSION_TYPE_NONE;
+  encoder->private_data = ArrowMalloc(sizeof(struct ArrowIpcEncoderPrivate));
+  struct ArrowIpcEncoderPrivate* private =
+      (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+  ArrowBufferInit(&private->buffers);
+  ArrowBufferInit(&private->nodes);
+  if (flatcc_builder_init(&private->builder) == -1) {
+    return ESPIPE;
+  }
+  return NANOARROW_OK;
+}
+
+void ArrowIpcEncoderReset(struct ArrowIpcEncoder* encoder) {
+  NANOARROW_DCHECK(encoder && encoder->private_data);

Review Comment:
   ```suggestion
     NANOARROW_DCHECK(encoder != NULL && encoder->private_data);
   ```



##########
src/nanoarrow/ipc/encoder_test.cc:
##########
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/array.h>
+#include <arrow/c/bridge.h>
+#include <arrow/compute/api.h>
+#include <arrow/io/memory.h>
+#include <arrow/ipc/api.h>
+#include <arrow/util/key_value_metadata.h>

Review Comment:
   We probably don't need these quite yet (and it would be helpful to keep the 
Arrow C++ involvement to the files test for the purposes of removing it later)



##########
src/nanoarrow/nanoarrow_ipc.h:
##########
@@ -379,6 +395,44 @@ ArrowErrorCode ArrowIpcArrayStreamReaderInit(
     struct ArrowArrayStream* out, struct ArrowIpcInputStream* input_stream,
     struct ArrowIpcArrayStreamReaderOptions* options);
 
+/// \brief Encoder for Arrow IPC messages
+///
+/// This structure is intended to be allocated by the caller,
+/// initialized using ArrowIpcEncoderInit(), and released with
+/// ArrowIpcEncoderReset().
+struct ArrowIpcEncoder {
+  /// \brief Compression to encode in the next RecordBatch message
+  enum ArrowIpcCompressionType codec;
+
+  /// \brief Finalized body length of the most recently encoded RecordBatch 
message
+  int64_t body_length;
+
+  /// \brief Callback invoked against each buffer to be encoded.
+  ///
+  /// Encoding of buffers is left as a callback to accommodate dissociated 
data storage.
+  /// One implementation of this callback might copy all buffers into a 
contiguous body
+  /// for use in an arrow IPC stream, another implementation might store 
offsets and
+  /// lengths relative to a known arena.
+  ArrowErrorCode (*encode_buffer)(struct ArrowBufferView buffer_view,
+                                  struct ArrowIpcEncoder* encoder, int64_t* 
offset,
+                                  int64_t* length, struct ArrowError* error);
+
+  /// \brief Pointer to arbitrary data used by encode_buffer()
+  void* encode_buffer_state;
+
+  /// \brief Private resources managed by this library
+  void* private_data;
+};
+
+/// \brief Initialize an encoder

Review Comment:
   ```suggestion
   /// \brief Initialize an encoder
   ///
   /// If NANOARROW_OK is returned, the caller must call 
`ArrowIpcEncoderReset()`
   /// to release resources allocated by this function.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to