This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch fix-couchjs-utf8-conversions-take2
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 2c1bcbf8a33e81be61ce2a158eb779d81b01d08a
Author: Paul J. Davis <[email protected]>
AuthorDate: Tue Apr 14 15:05:58 2020 -0500

    Add tests for couchjs UTF-8 support
---
 src/couch/test/eunit/couch_js_tests.erl | 99 +++++++++++++++++++++++++++++----
 1 file changed, 87 insertions(+), 12 deletions(-)

diff --git a/src/couch/test/eunit/couch_js_tests.erl 
b/src/couch/test/eunit/couch_js_tests.erl
index cd6452c..2a2f3bb 100644
--- a/src/couch/test/eunit/couch_js_tests.erl
+++ b/src/couch/test/eunit/couch_js_tests.erl
@@ -14,17 +14,6 @@
 -include_lib("eunit/include/eunit.hrl").
 
 
--define(FUNC, <<
-  "var state = [];\n"
-  "function(doc) {\n"
-  "  var val = \"0123456789ABCDEF\";\n"
-  "  for(var i = 0; i < 165535; i++) {\n"
-  "    state.push([val, val]);\n"
-  "  }\n"
-  "}\n"
->>).
-
-
 couch_js_test_() ->
     {
         "Test couchjs",
@@ -33,15 +22,101 @@ couch_js_test_() ->
             fun test_util:start_couch/0,
             fun test_util:stop_couch/1,
             [
+                fun should_create_sandbox/0,
+                fun should_roundtrip_utf8/0,
+                fun should_roundtrip_modified_utf8/0,
+                fun should_replace_broken_utf16/0,
                 {timeout, 60000, fun should_exit_on_oom/0}
             ]
         }
     }.
 
 
+should_create_sandbox() ->
+    % Try and detect whether we can see out of the
+    % sandbox or not.
+    Src = <<
+      "function(doc) {\n"
+      "  try {\n"
+      "    emit(false, typeof(Couch.compile_function));\n"
+      "  } catch (e) {\n"
+      "    emit(true, e.message);\n"
+      "  }\n"
+      "}\n"
+    >>,
+    Proc = couch_query_servers:get_os_process(<<"javascript">>),
+    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]),
+    Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, <<"{}">>]),
+    ?assertEqual([[[true, <<"Couch is not defined">>]]], Result).
+
+
+should_roundtrip_utf8() ->
+    % Try round tripping UTF-8 both directions through
+    % couchjs. These tests use hex encoded values of
+    % Ä (C384) and Ü (C39C) so as to avoid odd editor/Erlang encoding
+    % strangeness.
+    Src = <<
+      "function(doc) {\n"
+      "  emit(doc.value, \"", 16#C3, 16#9C, "\");\n"
+      "}\n"
+    >>,
+    Proc = couch_query_servers:get_os_process(<<"javascript">>),
+    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]),
+    Doc = {[
+        {<<"value">>, <<16#C3, 16#84>>}
+    ]},
+    Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]),
+    ?assertEqual([[[<<16#C3, 16#84>>, <<16#C3, 16#9C>>]]], Result).
+
+
+should_roundtrip_modified_utf8() ->
+    % Mimicing the test case from the mailing list
+    Src = <<
+      "function(doc) {\n"
+      "  emit(doc.value.toLowerCase(), \"", 16#C3, 16#9C, "\");\n"
+      "}\n"
+    >>,
+    Proc = couch_query_servers:get_os_process(<<"javascript">>),
+    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]),
+    Doc = {[
+        {<<"value">>, <<16#C3, 16#84>>}
+    ]},
+    Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]),
+    ?assertEqual([[[<<16#C3, 16#A4>>, <<16#C3, 16#9C>>]]], Result).
+
+
+should_replace_broken_utf16() ->
+    % This test reverse the surrogate pair of
+    % the Boom emoji U+1F4A5
+    Src = <<
+      "function(doc) {\n"
+      "  emit(doc.value.split(\"\").reverse().join(\"\"), 1);\n"
+      "}\n"
+    >>,
+    Proc = couch_query_servers:get_os_process(<<"javascript">>),
+    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]),
+    Doc = {[
+        {<<"value">>, list_to_binary(xmerl_ucs:to_utf8([16#1F4A5]))}
+    ]},
+    Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]),
+    % Invalid UTF-8 gets replaced with the 16#FFFD replacement
+    % marker
+    Markers = list_to_binary(xmerl_ucs:to_utf8([16#FFFD, 16#FFFD])),
+    ?assertEqual([[[Markers, 1]]], Result).
+
+
 should_exit_on_oom() ->
+    Src = <<
+      "var state = [];\n"
+      "function(doc) {\n"
+      "  var val = \"0123456789ABCDEF\";\n"
+      "  for(var i = 0; i < 165535; i++) {\n"
+      "    state.push([val, val]);\n"
+      "  }\n"
+      "}\n"
+    >>,
     Proc = couch_query_servers:get_os_process(<<"javascript">>),
-    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, ?FUNC]),
+    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]),
     trigger_oom(Proc).
 
 trigger_oom(Proc) ->

Reply via email to