nickva commented on a change in pull request #806: FEEDBACK ONLY: Compactor 
optimize emsort
URL: https://github.com/apache/couchdb/pull/806#discussion_r139162419
 
 

 ##########
 File path: src/couch/src/couch_file.erl
 ##########
 @@ -172,21 +175,55 @@ pread_binary(Fd, Pos) ->
 
 pread_iolist(Fd, Pos) ->
     case ioq:call(Fd, {pread_iolist, Pos}, erlang:get(io_priority)) of
-    {ok, IoList, <<>>} ->
-        {ok, IoList};
-    {ok, IoList, Md5} ->
-        case crypto:hash(md5, IoList) of
-        Md5 ->
-            {ok, IoList};
-        _ ->
-            couch_log:emergency("File corruption in ~p at position ~B",
-                     [Fd, Pos]),
-            exit({file_corruption, <<"file corruption">>})
-        end;
-    Error ->
-        Error
+        {ok, IoList, Md5} ->
+            {ok, verify_md5(Fd, Pos, IoList, Md5)};
+        Error ->
+            Error
     end.
 
+
+pread_terms(Fd, PosList) ->
+    {ok, Bins} = pread_binaries(Fd, PosList),
+    Terms = lists:map(fun(Bin) ->
+        couch_compress:decompress(Bin)
+    end, Bins),
+    {ok, Terms}.
+
+
+pread_binaries(Fd, PosList) ->
+    {ok, Data} = pread_iolists(Fd, PosList),
+    {ok, lists:map(fun erlang:iolist_to_binary/1, Data)}.
+
+
+pread_iolists(Fd, PosList) ->
+    case ioq:call(Fd, {pread_iolists, PosList}, erlang:get(io_priority)) of
+        {ok, DataMd5s} ->
+            Data = lists:zipwith(fun(Pos, {IoList, Md5}) ->
+                verify_md5(Fd, Pos, IoList, Md5)
+            end, PosList, DataMd5s),
+            {ok, Data};
+        Error ->
+            Error
+    end.
+
+
+append_terms(Fd, Terms) ->
+    append_terms(Fd, Terms, []).
+
+
+append_terms(Fd, Terms, Options) ->
+    Comp = couch_util:get_value(compression, Options, ?DEFAULT_COMPRESSION),
 
 Review comment:
   Mentioned it above as well, but was thinking if compression is not specified 
do we want to pick snappy as the default? Users in config might specify a 
different default (say deflate) but it will still not be picked here and snappy 
will be used anyway Reading from config on each append term might add up 
quickly, so was wondering if it makes sense to default to not compressing and 
just doing a case here with a ?term_to_bin macro used directly if there are no 
`Options` specified.
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to