This is an automated email from the ASF dual-hosted git repository.

willholley pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/main by this push:
     new 26cfe5369 mango: add $beginsWith operator (#4810)
26cfe5369 is described below

commit 26cfe5369f88ce3a7c1ef3a162fed0a636c87d75
Author: Will Holley <[email protected]>
AuthorDate: Mon Oct 30 18:21:09 2023 +0000

    mango: add $beginsWith operator (#4810)
    
    Adds a `$beginsWith` operator to selectors, with json and text index
    support. This is a compliment / precursor to optimising `$regex`
    support as proposed in https://github.com/apache/couchdb/pull/4776.
    
    For `json` indexes, a $beginsWith operator translates into a key
    range query, as is common practice for _view queries. For example,
    to find all rows with a key beginning with "W", we can use a range
    `start_key="W", end_key="W\ufff0"`. Given Mango uses compound keys,
    this is slightly more complex in practice, but the idea is the same.
    As with other range operators (`$gt`, `$gte`, etc), `$beginsWith`
    can be used in combination with equality operators and result sorting
    but must result in a contiguous key range. That is, a range of
    `start_key=[10, "W"], end_key=[10, "W\ufff0", {}]` would be valid,
    but `start_key=["W", 10], end_key=["W\ufff0", 10, {}]` would not,
    because the second element of the key may result in a non-contiguous
    range.
    
    For text indexes, `$beginsWith` translates to a Lucene query on
    the specified field of `W*`.
    
    If a non-string operand is provided to `$beginsWith`, the request will
    fail with a 400 / `invalid_operator` error.
---
 src/docs/src/api/database/find.rst    | 144 ++++++++++++++++++----------------
 src/mango/src/mango_idx_view.erl      |   6 ++
 src/mango/src/mango_selector.erl      |  70 +++++++++++------
 src/mango/src/mango_selector_text.erl |  11 +++
 src/mango/test/03-operator-test.py    |  38 ++++++++-
 src/mango/test/25-beginswith-test.py  | 134 +++++++++++++++++++++++++++++++
 6 files changed, 312 insertions(+), 91 deletions(-)

diff --git a/src/docs/src/api/database/find.rst 
b/src/docs/src/api/database/find.rst
index d25350708..5380280fd 100644
--- a/src/docs/src/api/database/find.rst
+++ b/src/docs/src/api/database/find.rst
@@ -200,8 +200,9 @@ A simple selector, inspecting specific fields:
 
 You can create more complex selector expressions by combining operators.
 For best performance, it is best to combine 'combination' or
-'array logical' operators, such as ``$regex``, with an equality
-operators such as ``$eq``, ``$gt``, ``$gte``, ``$lt``, and ``$lte``
+'array logical' operators, such as ``$regex``, with an operator
+that defines a contiguous range of keys such as ``$eq``,
+``$gt``, ``$gte``, ``$lt``, ``$lte``, and ``$beginsWith``
 (but not ``$ne``). For more information about creating complex
 selector expressions, see :ref:`creating selector expressions
 <find/expressions>`.
@@ -673,68 +674,74 @@ In addition, some 'meta' condition operators are 
available. Some condition
 operators accept any valid JSON content as the argument.  Other condition
 operators require the argument to be in a specific JSON format.
 
-+---------------+-------------+------------+-----------------------------------+
-| Operator type | Operator    | Argument   | Purpose                           
|
-+===============+=============+============+===================================+
-| (In)equality  | ``$lt``     | Any JSON   | The field is less than the        
|
-|               |             |            | argument.                         
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$lte``    | Any JSON   | The field is less than or equal 
to|
-|               |             |            | the argument.                     
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$eq``     | Any JSON   | The field is equal to the 
argument|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$ne``     | Any JSON   | The field is not equal to the     
|
-|               |             |            | argument.                         
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$gte``    | Any JSON   | The field is greater than or 
equal|
-|               |             |            | to the argument.                  
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$gt``     | Any JSON   | The field is greater than the     
|
-|               |             |            | to the argument.                  
|
-+---------------+-------------+------------+-----------------------------------+
-| Object        | ``$exists`` | Boolean    | Check whether the field exists or 
|
-|               |             |            | not, regardless of its value.     
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$type``   | String     | Check the document field's type.  
|
-|               |             |            | Valid values are ``"null"``,      
|
-|               |             |            | ``"boolean"``, ``"number"``,      
|
-|               |             |            | ``"string"``, ``"array"``, and    
|
-|               |             |            | ``"object"``.                     
|
-+---------------+-------------+------------+-----------------------------------+
-| Array         | ``$in``     | Array of   | The document field must exist in  
|
-|               |             | JSON values| the list provided.                
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$nin``    | Array of   | The document field not must exist 
|
-|               |             | JSON values| in the list provided.             
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$size``   | Integer    | Special condition to match the    
|
-|               |             |            | length of an array field in a     
|
-|               |             |            | document. Non-array fields cannot 
|
-|               |             |            | match this condition.             
|
-+---------------+-------------+------------+-----------------------------------+
-| Miscellaneous | ``$mod``    | [Divisor,  | Divisor is a non-zero integer,    
|
-|               |             | Remainder] | Remainder is any integer.         
|
-|               |             |            | Non-integer values result in a    
|
-|               |             |            | 404. Matches documents where      
|
-|               |             |            | ``field % Divisor == Remainder``  
|
-|               |             |            | is true, and only when the        
|
-|               |             |            | document field is an integer.     
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$regex``  | String     | A regular expression pattern to   
|
-|               |             |            | match against the document field. 
|
-|               |             |            | Only matches when the field is a  
|
-|               |             |            | string value and matches the      
|
-|               |             |            | supplied regular expression. The  
|
-|               |             |            | matching algorithms are based on  
|
-|               |             |            | the Perl Compatible Regular       
|
-|               |             |            | Expression (PCRE) library. For    
|
-|               |             |            | more information about what is    
|
-|               |             |            | implemented, see the see the      
|
-|               |             |            | `Erlang Regular Expression        
|
-|               |             |            | <http://erlang.org/doc            
|
-|               |             |            | /man/re.html>`_.                  
|
-+---------------+-------------+------------+-----------------------------------+
++---------------+-----------------+-------------+------------------------------------+
+| Operator type |    Operator     |  Argument   |              Purpose         
      |
++===============+=================+=============+====================================+
+| (In)equality  | ``$lt``         | Any JSON    | The field is less than the   
      |
+|               |                 |             | argument.                    
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$lte``        | Any JSON    | The field is less than or 
equal to |
+|               |                 |             | the argument.                
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$eq``         | Any JSON    | The field is equal to the 
argument |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$ne``         | Any JSON    | The field is not equal to 
the      |
+|               |                 |             | argument.                    
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$gte``        | Any JSON    | The field is greater than or 
equal |
+|               |                 |             | to the argument.             
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$gt``         | Any JSON    | The field is greater than 
the      |
+|               |                 |             | to the argument.             
      |
++---------------+-----------------+-------------+------------------------------------+
+| Object        | ``$exists``     | Boolean     | Check whether the field 
exists or  |
+|               |                 |             | not, regardless of its 
value.      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$type``       | String      | Check the document field's 
type.   |
+|               |                 |             | Valid values are ``"null"``, 
      |
+|               |                 |             | ``"boolean"``, ``"number"``, 
      |
+|               |                 |             | ``"string"``, ``"array"``, 
and     |
+|               |                 |             | ``"object"``.                
      |
++---------------+-----------------+-------------+------------------------------------+
+| Array         | ``$in``         | Array of    | The document field must 
exist in   |
+|               |                 | JSON values | the list provided.           
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$nin``        | Array of    | The document field not must 
exist  |
+|               |                 | JSON values | in the list provided.        
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$size``       | Integer     | Special condition to match 
the     |
+|               |                 |             | length of an array field in 
a      |
+|               |                 |             | document. Non-array fields 
cannot  |
+|               |                 |             | match this condition.        
      |
++---------------+-----------------+-------------+------------------------------------+
+| Miscellaneous | ``$mod``        | [Divisor,   | Divisor is a non-zero 
integer,     |
+|               |                 | Remainder]  | Remainder is any integer.    
      |
+|               |                 |             | Non-integer values result in 
a     |
+|               |                 |             | 404. Matches documents where 
      |
+|               |                 |             | ``field % Divisor == 
Remainder``   |
+|               |                 |             | is true, and only when the   
      |
+|               |                 |             | document field is an 
integer.      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$regex``      | String      | A regular expression pattern 
to    |
+|               |                 |             | match against the document 
field.  |
+|               |                 |             | Only matches when the field 
is a   |
+|               |                 |             | string value and matches the 
      |
+|               |                 |             | supplied regular expression. 
The   |
+|               |                 |             | matching algorithms are 
based on   |
+|               |                 |             | the Perl Compatible Regular  
      |
+|               |                 |             | Expression (PCRE) library. 
For     |
+|               |                 |             | more information about what 
is     |
+|               |                 |             | implemented, see the see the 
      |
+|               |                 |             | `Erlang Regular Expression   
      |
+|               |                 |             | <http://erlang.org/doc       
      |
+|               |                 |             | /man/re.html>`_.             
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$beginsWith`` | String      | Matches where the document 
field   |
+|               |                 |             | begins with the specified 
prefix   |
+|               |                 |             | (case-sensitive). If the 
document  |
+|               |                 |             | field contains a non-string 
value, |
+|               |                 |             | the document is not matched. 
      |
++---------------+-----------------+-------------+------------------------------------+
 
 .. warning::
     Regular expressions do not work with indexes, so they should not be used to
@@ -753,9 +760,12 @@ In general, whenever you have an operator that takes an 
argument, that argument
 can itself be another operator with arguments of its own. This enables us to
 build up more complex selector expressions.
 
-However, only equality operators such as ``$eq``, ``$gt``, ``$gte``, ``$lt``,
-and ``$lte`` (but not ``$ne``) can be used as the basis of a query. You should
-include at least one of these in a selector.
+However, only operators that define a contiguous range of values
+such as ``$eq``, ``$gt``, ``$gte``, ``$lt``, ``$lte``,
+and ``$beginsWith`` (but not ``$ne``) can be used as the basis
+of a query that can make efficient use of a ``json`` index. You should
+include at least one of these in a selector, or consider using
+a ``text`` index if greater flexibility is required.
 
 For example, if you try to perform a query that attempts to match all documents
 that have a field called `afieldname` containing a value that begins with the
diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl
index 25d75d55d..d1650e987 100644
--- a/src/mango/src/mango_idx_view.erl
+++ b/src/mango/src/mango_idx_view.erl
@@ -306,6 +306,8 @@ indexable({[{<<"$gt">>, _}]}) ->
     true;
 indexable({[{<<"$gte">>, _}]}) ->
     true;
+indexable({[{<<"$beginsWith">>, _}]}) ->
+    true;
 % This is required to improve index selection for covering indexes.
 % Making `$exists` indexable should not cause problems in other cases.
 indexable({[{<<"$exists">>, _}]}) ->
@@ -412,6 +414,10 @@ range(_, _, LCmp, Low, HCmp, High) ->
 % operators but its all straight forward once you figure out how
 % we're basically just narrowing our logical ranges.
 
+% beginsWith requires both a high and low bound
+range({[{<<"$beginsWith">>, Arg}]}, LCmp, Low, HCmp, High) ->
+    {LCmp0, Low0, HCmp0, High0} = range({[{<<"$gte">>, Arg}]}, LCmp, Low, 
HCmp, High),
+    range({[{<<"$lte">>, <<Arg/binary, 16#10FFFF>>}]}, LCmp0, Low0, HCmp0, 
High0);
 range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) ->
     case range_pos(Low, Arg, High) of
         min ->
diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl
index 59be7a6eb..93d3b10ca 100644
--- a/src/mango/src/mango_selector.erl
+++ b/src/mango/src/mango_selector.erl
@@ -135,6 +135,8 @@ norm_ops({[{<<"$text">>, Arg}]}) when
     {[{<<"$default">>, {[{<<"$text">>, Arg}]}}]};
 norm_ops({[{<<"$text">>, Arg}]}) ->
     ?MANGO_ERROR({bad_arg, '$text', Arg});
+norm_ops({[{<<"$beginsWith">>, Arg}]} = Cond) when is_binary(Arg) ->
+    Cond;
 % Not technically an operator but we pass it through here
 % so that this function accepts its own output. This exists
 % so that $text can have a field name value which simplifies
@@ -514,6 +516,11 @@ match({[{<<"$mod">>, [D, R]}]}, Value, _Cmp) when 
is_integer(Value) ->
     Value rem D == R;
 match({[{<<"$mod">>, _}]}, _Value, _Cmp) ->
     false;
+match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix), 
is_binary(Value) ->
+    string:prefix(Value, Prefix) /= nomatch;
+% When Value is not a string, do not match
+match({[{<<"$beginsWith">>, Prefix}]}, _, _Cmp) when is_binary(Prefix) ->
+    false;
 match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) ->
     try
         match == re:run(Value, Regex, [{capture, none}])
@@ -652,6 +659,14 @@ fields({[]}) ->
 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").
 
+-define(TEST_DOC,
+    {[
+        {<<"_id">>, <<"foo">>},
+        {<<"_rev">>, <<"bar">>},
+        {<<"user_id">>, 11}
+    ]}
+).
+
 is_constant_field_basic_test() ->
     Selector = normalize({[{<<"A">>, <<"foo">>}]}),
     Field = <<"A">>,
@@ -991,30 +1006,22 @@ has_required_fields_or_nested_or_false_test() ->
     Normalized = normalize(Selector),
     ?assertEqual(false, has_required_fields(Normalized, RequiredFields)).
 
+check_match(Selector) ->
+    % Call match_int/2 to avoid ERROR for missing metric; this is confusing
+    % in the middle of test output.
+    match_int(mango_selector:normalize(Selector), ?TEST_DOC).
+
 %% This test shows the shape match/2 expects for its arguments.
-match_demo_test_() ->
-    Doc =
-        {[
-            {<<"_id">>, <<"foo">>},
-            {<<"_rev">>, <<"bar">>},
-            {<<"user_id">>, 11}
-        ]},
-    Check = fun(Selector) ->
-        % Call match_int/2 to avoid ERROR for missing metric; this is confusing
-        % in the middle of test output.
-        match_int(mango_selector:normalize(Selector), Doc)
-    end,
-    [
-        % matching
-        ?_assertEqual(true, Check({[{<<"user_id">>, 11}]})),
-        ?_assertEqual(true, Check({[{<<"_id">>, <<"foo">>}]})),
-        ?_assertEqual(true, Check({[{<<"_id">>, <<"foo">>}, {<<"_rev">>, 
<<"bar">>}]})),
-        % non-matching
-        ?_assertEqual(false, Check({[{<<"user_id">>, 1234}]})),
-        % string 11 doesn't match number 11
-        ?_assertEqual(false, Check({[{<<"user_id">>, <<"11">>}]})),
-        ?_assertEqual(false, Check({[{<<"_id">>, <<"foo">>}, {<<"_rev">>, 
<<"quux">>}]}))
-    ].
+match_demo_test() ->
+    % matching
+    ?assertEqual(true, check_match({[{<<"user_id">>, 11}]})),
+    ?assertEqual(true, check_match({[{<<"_id">>, <<"foo">>}]})),
+    ?assertEqual(true, check_match({[{<<"_id">>, <<"foo">>}, {<<"_rev">>, 
<<"bar">>}]})),
+    % non-matching
+    ?assertEqual(false, check_match({[{<<"user_id">>, 1234}]})),
+    % string 11 doesn't match number 11
+    ?assertEqual(false, check_match({[{<<"user_id">>, <<"11">>}]})),
+    ?assertEqual(false, check_match({[{<<"_id">>, <<"foo">>}, {<<"_rev">>, 
<<"quux">>}]})).
 
 fields_of(Selector) ->
     fields(test_util:as_selector(Selector)).
@@ -1054,4 +1061,21 @@ fields_nor_test() ->
     },
     ?assertEqual([<<"field1">>, <<"field2">>], fields_of(Selector2)).
 
+check_beginswith(Field, Prefix) ->
+    Selector = {[{Field, {[{<<"$beginsWith">>, Prefix}]}}]},
+    % Call match_int/2 to avoid ERROR for missing metric; this is confusing
+    % in the middle of test output.
+    match_int(mango_selector:normalize(Selector), ?TEST_DOC).
+
+match_beginswith_test() ->
+    % matching
+    ?assertEqual(true, check_beginswith(<<"_id">>, <<"f">>)),
+    % no match (user_id is not a binary string)
+    ?assertEqual(false, check_beginswith(<<"user_id">>, <<"f">>)),
+    % invalid (prefix is not a binary string)
+    ?assertThrow(
+        {mango_error, mango_selector, {invalid_operator, <<"$beginsWith">>}},
+        check_beginswith(<<"user_id">>, 1)
+    ).
+
 -endif.
diff --git a/src/mango/src/mango_selector_text.erl 
b/src/mango/src/mango_selector_text.erl
index 1f8609ac2..e4f15d00d 100644
--- a/src/mango/src/mango_selector_text.erl
+++ b/src/mango/src/mango_selector_text.erl
@@ -142,6 +142,11 @@ convert(Path, {[{<<"$exists">>, ShouldExist}]}) ->
         true -> FieldExists;
         false -> {op_not, {FieldExists, false}}
     end;
+convert(Path, {[{<<"$beginsWith">>, Arg}]}) when is_binary(Arg) ->
+    Prefix = mango_util:lucene_escape_query_value(Arg),
+    Suffix = <<"*">>,
+    PrefixSearch = <<Prefix/binary, Suffix/binary>>,
+    {op_field, {make_field(Path, Arg), PrefixSearch}};
 % We're not checking the actual type here, just looking for
 % anything that has a possibility of matching by checking
 % for the field name. We use the same logic for $exists on
@@ -821,6 +826,12 @@ convert_nor_test() ->
         })
     ).
 
+convert_beginswith_test() ->
+    ?assertEqual(
+        {op_field, {[[<<"field">>], <<":">>, <<"string">>], <<"foo*">>}},
+        convert_selector(#{<<"field">> => #{<<"$beginsWith">> => <<"foo">>}})
+    ).
+
 to_query_test() ->
     F = fun(S) -> iolist_to_binary(to_query(S)) end,
     Input = {<<"name">>, <<"value">>},
diff --git a/src/mango/test/03-operator-test.py 
b/src/mango/test/03-operator-test.py
index 70e3fbc5f..1dfd1a725 100644
--- a/src/mango/test/03-operator-test.py
+++ b/src/mango/test/03-operator-test.py
@@ -15,7 +15,7 @@ import unittest
 
 
 class BaseOperatorTests:
-    class Common(object):
+    class Common(unittest.TestCase):
         def assertUserIds(self, user_ids, docs):
             user_ids_returned = list(d["user_id"] for d in docs)
             user_ids.sort()
@@ -141,6 +141,42 @@ class BaseOperatorTests:
             for d in docs:
                 self.assertNotIn("twitter", d)
 
+        def test_beginswith(self):
+            self.db.save_docs(
+                [
+                    {"user_id": 99, "location": {"state": ":Bar"}},
+                ]
+            )
+
+            cases = [
+                {"prefix": "New", "user_ids": [2, 10]},
+                # test characters that require escaping
+                {"prefix": "New ", "user_ids": [2, 10]},
+                {"prefix": ":", "user_ids": [99]},
+                {"prefix": "Foo", "user_ids": []},
+                {"prefix": '"Foo', "user_ids": []},
+                {"prefix": " New", "user_ids": []},
+            ]
+
+            for case in cases:
+                with self.subTest(prefix=case["prefix"]):
+                    selector = {"location.state": {"$beginsWith": 
case["prefix"]}}
+                    docs = self.db.find(selector)
+                    self.assertEqual(len(docs), len(case["user_ids"]))
+                    self.assertUserIds(case["user_ids"], docs)
+
+        # non-string prefixes should return an error
+        def test_beginswith_invalid_prefix(self):
+            cases = [123, True, [], {}]
+            for prefix in cases:
+                with self.subTest(prefix=prefix):
+                    try:
+                        self.db.find({"location.state": {"$beginsWith": 
prefix}})
+                    except Exception as e:
+                        self.assertEqual(e.response.status_code, 400)
+                    else:
+                        raise AssertionError("expected request to fail")
+
 
 class OperatorJSONTests(mango.UserDocsTests, BaseOperatorTests.Common):
     # START: text indexes do not support range queries across type boundaries 
so only
diff --git a/src/mango/test/25-beginswith-test.py 
b/src/mango/test/25-beginswith-test.py
new file mode 100644
index 000000000..3b5134b65
--- /dev/null
+++ b/src/mango/test/25-beginswith-test.py
@@ -0,0 +1,134 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import copy
+import mango
+
+DOCS = [
+    {"_id": "aaa", "name": "Jimi", "location": "AUS", "age": 27},
+    {"_id": "abc", "name": "Eddie", "location": "AND", "age": 65},
+    {"_id": "bbb", "name": "Harry", "location": "CAN", "age": 21},
+    {"_id": "ccc", "name": "Eddie", "location": "DEN", "age": 37},
+    {"_id": "ddd", "name": "Jones", "location": "ETH", "age": 49},
+]
+
+
+def to_utf8_bytes(list):
+    return [x.encode() for x in list]
+
+
+class BeginsWithOperator(mango.DbPerClass):
+    def setUp(self):
+        self.db.recreate()
+        self.db.save_docs(copy.deepcopy(DOCS))
+        self.db.create_index(["location"])
+        self.db.create_index(["name", "location"])
+
+    def get_mrargs(self, selector, sort=None):
+        explain = self.db.find(selector, sort=sort, explain=True)
+        return explain["mrargs"]
+
+    def assertDocIds(self, user_ids, docs):
+        user_ids_returned = list(d["_id"] for d in docs)
+        user_ids.sort()
+        user_ids_returned.sort()
+        self.assertEqual(user_ids, user_ids_returned)
+
+    def test_basic(self):
+        docs = self.db.find({"location": {"$beginsWith": "A"}})
+
+        self.assertEqual(len(docs), 2)
+        self.assertDocIds(["aaa", "abc"], docs)
+
+    def test_json_range(self):
+        mrargs = self.get_mrargs({"location": {"$beginsWith": "A"}})
+
+        self.assertEqual(mrargs["start_key"], ["A"])
+        end_key_bytes = to_utf8_bytes(mrargs["end_key"])
+        self.assertEqual(end_key_bytes, [b"A\xef\xbf\xbd", b"<MAX>"])
+
+    def test_compound_key(self):
+        selector = {"name": "Eddie", "location": {"$beginsWith": "A"}}
+        mrargs = self.get_mrargs(selector)
+
+        self.assertEqual(mrargs["start_key"], ["Eddie", "A"])
+        end_key_bytes = to_utf8_bytes(mrargs["end_key"])
+        self.assertEqual(end_key_bytes, [b"Eddie", b"A\xef\xbf\xbd", b"<MAX>"])
+
+        docs = self.db.find(selector)
+        self.assertEqual(len(docs), 1)
+        self.assertDocIds(["abc"], docs)
+
+    def test_sort(self):
+        selector = {"location": {"$beginsWith": "A"}}
+        cases = [
+            {
+                "sort": ["location"],
+                "start_key": [b"A"],
+                "end_key": [b"A\xef\xbf\xbd", b"<MAX>"],
+                "direction": "fwd",
+            },
+            {
+                "sort": [{"location": "desc"}],
+                "start_key": [b"A\xef\xbf\xbd", b"<MAX>"],
+                "end_key": [b"A"],
+                "direction": "rev",
+            },
+        ]
+
+        for case in cases:
+            with self.subTest(sort=case["sort"]):
+                mrargs = self.get_mrargs(selector, sort=case["sort"])
+                self.assertEqual(to_utf8_bytes(mrargs["start_key"]), 
case["start_key"])
+                self.assertEqual(to_utf8_bytes(mrargs["end_key"]), 
case["end_key"])
+                self.assertEqual(mrargs["direction"], case["direction"])
+
+    def test_all_docs_range(self):
+        mrargs = self.get_mrargs({"_id": {"$beginsWith": "a"}})
+
+        self.assertEqual(mrargs["start_key"], "a")
+        end_key_bytes = to_utf8_bytes(mrargs["end_key"])
+        self.assertEqual(end_key_bytes, [b"a", b"\xef\xbf\xbd"])
+
+    def test_no_index(self):
+        selector = {"foo": {"$beginsWith": "a"}}
+        resp_explain = self.db.find(selector, explain=True)
+        mrargs = resp_explain["mrargs"]
+
+        self.assertEqual(resp_explain["index"]["type"], "special")
+        self.assertEqual(mrargs["start_key"], None)
+        self.assertEqual(mrargs["end_key"], "<MAX>")
+
+    def test_invalid_operand(self):
+        try:
+            self.db.find({"_id": {"$beginsWith": True}})
+        except Exception as e:
+            self.assertEqual(e.response.status_code, 400)
+            resp = e.response.json()
+            self.assertEqual(resp["error"], "invalid_operator")
+        else:
+            raise AssertionError("expected find error")
+
+    def test_does_not_match_non_string_value(self):
+        docs = self.db.find({"age": {"$beginsWith": "a"}})
+        self.assertEqual(len(docs), 0)
+
+    def test_no_matches(self):
+        docs = self.db.find({"name": {"$beginsWith": "Z"}})
+        self.assertEqual(len(docs), 0)
+
+    def test_case_sensitivity(self):
+        docs = self.db.find({"name": {"$beginsWith": "j"}})
+        self.assertEqual(len(docs), 0)
+
+        docs = self.db.find({"name": {"$beginsWith": "J"}})
+        self.assertEqual(len(docs), 2)

Reply via email to