This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a commit to branch pwrite
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 8671e8da7c7e77cb160b58f500b80e06ef889f2f
Author: Robert Newson <[email protected]>
AuthorDate: Sat May 23 16:32:06 2026 +0100

    use pwrite for safety
---
 src/couch/priv/couch_cfile/couch_cfile.c | 83 ++++++++++++++++++++++++++++++++
 src/couch/src/couch_cfile.erl            | 18 +++++++
 src/couch/src/couch_file.erl             | 15 ++----
 3 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/src/couch/priv/couch_cfile/couch_cfile.c 
b/src/couch/priv/couch_cfile/couch_cfile.c
index c7e6f16f0..027af3cfd 100644
--- a/src/couch/priv/couch_cfile/couch_cfile.c
+++ b/src/couch/priv/couch_cfile/couch_cfile.c
@@ -199,6 +199,42 @@ static long efile_writev(int fd, SysIOVec *iov, int 
iovlen, posix_errno_t* res_e
     return result;
 }
 
+// Copied from OTP just like efile_preadv. Differences are:
+//  - Pass file descriptor as int and errno result as a separate arg
+//  - Assume pwritev exists
+//
+long efile_pwritev(int fd, long offset, SysIOVec *iov, int iovlen, 
posix_errno_t* res_errno) {
+
+    long bytes_written;
+    ssize_t result;
+
+    bytes_written = 0;
+
+    do {
+        if(iovlen < 1) {
+            result = 0;
+            break;
+        }
+
+        result = pwritev(fd, (const struct iovec *)iov, MIN(IOV_MAX, iovlen), 
offset);
+
+        if(result > 0) {
+            shift_iov(&iov, &iovlen, result);
+            bytes_written += result;
+            offset += result;
+        }
+    } while(result > 0 || (result < 0 && errno == EINTR));
+
+    *res_errno = errno;
+
+    if(result == 0 && bytes_written > 0) {
+        return bytes_written;
+    }
+
+    return result;
+}
+
+
 // Copied from OTP. Differences are:
 //    - File descriptor and return error passed in as separate args
 //    - This is for datasync only so don't pass that extra argument in
@@ -459,6 +495,52 @@ static ERL_NIF_TERM write_nif(ErlNifEnv* env, int argc, 
const ERL_NIF_TERM argv[
 #endif
 }
 
+// Follows implementation from prim_file_nif.c
+//
+static ERL_NIF_TERM pwrite_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM 
argv[]) {
+#ifdef COUCH_CFILE_SUPPORTED
+    handle_t* hdl;
+    ErlNifIOVec vec, *input = &vec;
+    posix_errno_t  res_errno = 0;
+    long bytes_written, offset;
+    ERL_NIF_TERM tail;
+
+    if (argc != 3
+       || !get_handle(env, argv[0], &hdl)
+       || !enif_is_number(env, argv[1])
+       || !enif_inspect_iovec(env, 64, argv[2], &tail, &input)) {
+      return badarg(env);
+    }
+
+    if(!enif_get_int64(env, argv[1], &offset) || offset < 0) {
+        return err_tup(env, EINVAL);
+    }
+
+    // ------ Critical section start ------
+    READ_LOCK;
+    if (hdl->fd < 0) {
+       READ_UNLOCK;
+       return err_tup(env, EINVAL);
+    }
+    bytes_written = efile_pwritev(hdl->fd, offset, input->iov, input->iovcnt, 
&res_errno);
+    READ_UNLOCK;
+    // ------- Critical section end ------
+
+    if(bytes_written < 0) {
+      return err_tup(env, res_errno);
+    }
+
+    if(!enif_is_empty_list(env, tail)) {
+      return enif_make_tuple3(env, ATOM_CONTINUE, bytes_written, tail);
+    }
+
+    return ATOM_OK;
+#else
+    return err_tup(env, EINVAL)
+#endif
+}
+
+
 static ERL_NIF_TERM seek_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM 
argv[]) {
 #ifdef COUCH_CFILE_SUPPORTED
     handle_t* hdl;
@@ -719,6 +801,7 @@ static ErlNifFunc funcs[] = {
     {"eof_nif",      1, eof_nif,      ERL_NIF_DIRTY_JOB_IO_BOUND},
     {"seek_nif",     3, seek_nif,     ERL_NIF_DIRTY_JOB_IO_BOUND},
     {"write_nif",    2, write_nif,    ERL_NIF_DIRTY_JOB_IO_BOUND},
+    {"pwrite_nif",   3, pwrite_nif,   ERL_NIF_DIRTY_JOB_IO_BOUND},
     {"datasync_nif", 1, datasync_nif, ERL_NIF_DIRTY_JOB_IO_BOUND},
     {"truncate_nif", 1, truncate_nif, ERL_NIF_DIRTY_JOB_IO_BOUND},
     {"info_nif",     1, info_nif}
diff --git a/src/couch/src/couch_cfile.erl b/src/couch/src/couch_cfile.erl
index 3511fc162..431783bf7 100644
--- a/src/couch/src/couch_cfile.erl
+++ b/src/couch/src/couch_cfile.erl
@@ -31,6 +31,7 @@
     position/2,
     datasync/1,
     write/2,
+    pwrite/3,
     truncate/1,
     fd/1,
     advise/4
@@ -53,6 +54,7 @@
     eof_nif/1,
     seek_nif/3,
     write_nif/2,
+    pwrite_nif/3,
     datasync_nif/1,
     truncate_nif/1
 ]).
@@ -119,6 +121,9 @@ datasync(_) ->
 write(#file_descriptor{module = ?MODULE} = Fd, IOData) ->
     write_1(owner_handle(Fd), erlang:iolist_to_iovec(IOData)).
 
+pwrite(#file_descriptor{module = ?MODULE} = Fd, Pos, IOData) ->
+    pwrite_1(owner_handle(Fd), Pos, erlang:iolist_to_iovec(IOData)).
+
 truncate(#file_descriptor{module = ?MODULE} = Fd) ->
     truncate_nif(owner_handle(Fd)).
 
@@ -215,6 +220,16 @@ write_1(Ref, IOVec) ->
             {error, Reason}
     end.
 
+pwrite_1(Ref, Pos, IOVec) ->
+    case pwrite_nif(Ref, Pos, IOVec) of
+        {continue, BytesWritten, Remainder} ->
+            pwrite_1(Ref, Pos + BytesWritten, Remainder);
+        ok ->
+            ok;
+        {error, Reason} ->
+            {error, Reason}
+    end.
+
 init() ->
     case os:type() of
         {unix, _} ->
@@ -283,6 +298,9 @@ seek_nif(_, _, _) ->
 write_nif(_, _) ->
     {error, einval}.
 
+pwrite_nif(_, _, _) ->
+    {error, einval}.
+
 datasync_nif(_) ->
     {error, einval}.
 
diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index 2d0920f7e..e8e31c3b2 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -514,7 +514,7 @@ file_open_options(Options) ->
             true ->
                 [];
             false ->
-                [append]
+                [write]
         end.
 
 maybe_track_open_os_files(Options) ->
@@ -635,9 +635,9 @@ append_bins(#file{fd = Fd, eof = Pos} = File, Bins) ->
         Bins
     ),
     {AllBlocks, Resps} = lists:unzip(BlockResps),
-    case file:write(Fd, AllBlocks) of
+    case file:pwrite(Fd, Pos, AllBlocks) of
         ok -> {{ok, Resps}, File#file{eof = FinalPos}};
-        Error -> {Error, reset_eof(File)}
+        Error -> {Error, File}
     end.
 
 pread(#file{} = File, PosL) ->
@@ -777,9 +777,9 @@ handle_write_header(Bin, #file{fd = Fd, eof = Pos} = File) 
->
         BlockOffset -> Padding = <<0:(8 * (?SIZE_BLOCK - BlockOffset))>>
     end,
     FinalBin = [Padding, <<1, BinSize:32/integer>> | make_blocks(5, [Bin])],
-    case file:write(Fd, FinalBin) of
+    case file:pwrite(Fd, Pos, FinalBin) of
         ok -> {ok, File#file{eof = Pos + iolist_size(FinalBin)}};
-        {error, Error} -> {{error, Error}, reset_eof(File)}
+        {error, Error} -> {{error, Error}, File}
     end.
 
 read_multi_raw_iolists_int(#file{fd = Fd, eof = Eof} = File, PosLens) ->
@@ -959,11 +959,6 @@ is_idle(#file{is_sys = false}) ->
 process_info(Pid) ->
     couch_util:process_dict_get(Pid, couch_file_fd).
 
-%% in event of a partially successful write.
-reset_eof(#file{} = File) ->
-    {ok, Eof} = file:position(File#file.fd, eof),
-    File#file{eof = Eof}.
-
 -spec generate_checksum(binary()) -> <<_:128>>.
 generate_checksum(Bin) when is_binary(Bin) ->
     case generate_xxhash_checksums() of

Reply via email to