This is an automated email from the ASF dual-hosted git repository.

willholley pushed a commit to branch mango-beginswith
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 3a94e00d9ccc38427859b391e16f998439109623
Author: Will Holley <[email protected]>
AuthorDate: Thu Oct 26 12:42:02 2023 +0000

    Fix lucene support
---
 src/docs/src/api/database/find.rst    | 136 ++++++++++++++++++----------------
 src/mango/src/mango_selector_text.erl |   3 +-
 src/mango/test/03-operator-test.py    |  41 +++++++---
 3 files changed, 103 insertions(+), 77 deletions(-)

diff --git a/src/docs/src/api/database/find.rst 
b/src/docs/src/api/database/find.rst
index d25350708..e94326b53 100644
--- a/src/docs/src/api/database/find.rst
+++ b/src/docs/src/api/database/find.rst
@@ -673,68 +673,74 @@ In addition, some 'meta' condition operators are 
available. Some condition
 operators accept any valid JSON content as the argument.  Other condition
 operators require the argument to be in a specific JSON format.
 
-+---------------+-------------+------------+-----------------------------------+
-| Operator type | Operator    | Argument   | Purpose                           
|
-+===============+=============+============+===================================+
-| (In)equality  | ``$lt``     | Any JSON   | The field is less than the        
|
-|               |             |            | argument.                         
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$lte``    | Any JSON   | The field is less than or equal 
to|
-|               |             |            | the argument.                     
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$eq``     | Any JSON   | The field is equal to the 
argument|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$ne``     | Any JSON   | The field is not equal to the     
|
-|               |             |            | argument.                         
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$gte``    | Any JSON   | The field is greater than or 
equal|
-|               |             |            | to the argument.                  
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$gt``     | Any JSON   | The field is greater than the     
|
-|               |             |            | to the argument.                  
|
-+---------------+-------------+------------+-----------------------------------+
-| Object        | ``$exists`` | Boolean    | Check whether the field exists or 
|
-|               |             |            | not, regardless of its value.     
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$type``   | String     | Check the document field's type.  
|
-|               |             |            | Valid values are ``"null"``,      
|
-|               |             |            | ``"boolean"``, ``"number"``,      
|
-|               |             |            | ``"string"``, ``"array"``, and    
|
-|               |             |            | ``"object"``.                     
|
-+---------------+-------------+------------+-----------------------------------+
-| Array         | ``$in``     | Array of   | The document field must exist in  
|
-|               |             | JSON values| the list provided.                
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$nin``    | Array of   | The document field not must exist 
|
-|               |             | JSON values| in the list provided.             
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$size``   | Integer    | Special condition to match the    
|
-|               |             |            | length of an array field in a     
|
-|               |             |            | document. Non-array fields cannot 
|
-|               |             |            | match this condition.             
|
-+---------------+-------------+------------+-----------------------------------+
-| Miscellaneous | ``$mod``    | [Divisor,  | Divisor is a non-zero integer,    
|
-|               |             | Remainder] | Remainder is any integer.         
|
-|               |             |            | Non-integer values result in a    
|
-|               |             |            | 404. Matches documents where      
|
-|               |             |            | ``field % Divisor == Remainder``  
|
-|               |             |            | is true, and only when the        
|
-|               |             |            | document field is an integer.     
|
-+---------------+-------------+------------+-----------------------------------+
-|               | ``$regex``  | String     | A regular expression pattern to   
|
-|               |             |            | match against the document field. 
|
-|               |             |            | Only matches when the field is a  
|
-|               |             |            | string value and matches the      
|
-|               |             |            | supplied regular expression. The  
|
-|               |             |            | matching algorithms are based on  
|
-|               |             |            | the Perl Compatible Regular       
|
-|               |             |            | Expression (PCRE) library. For    
|
-|               |             |            | more information about what is    
|
-|               |             |            | implemented, see the see the      
|
-|               |             |            | `Erlang Regular Expression        
|
-|               |             |            | <http://erlang.org/doc            
|
-|               |             |            | /man/re.html>`_.                  
|
-+---------------+-------------+------------+-----------------------------------+
++---------------+-----------------+-------------+------------------------------------+
+| Operator type |    Operator     |  Argument   |              Purpose         
      |
++===============+=================+=============+====================================+
+| (In)equality  | ``$lt``         | Any JSON    | The field is less than the   
      |
+|               |                 |             | argument.                    
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$lte``        | Any JSON    | The field is less than or 
equal to |
+|               |                 |             | the argument.                
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$eq``         | Any JSON    | The field is equal to the 
argument |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$ne``         | Any JSON    | The field is not equal to 
the      |
+|               |                 |             | argument.                    
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$gte``        | Any JSON    | The field is greater than or 
equal |
+|               |                 |             | to the argument.             
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$gt``         | Any JSON    | The field is greater than 
the      |
+|               |                 |             | to the argument.             
      |
++---------------+-----------------+-------------+------------------------------------+
+| Object        | ``$exists``     | Boolean     | Check whether the field 
exists or  |
+|               |                 |             | not, regardless of its 
value.      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$type``       | String      | Check the document field's 
type.   |
+|               |                 |             | Valid values are ``"null"``, 
      |
+|               |                 |             | ``"boolean"``, ``"number"``, 
      |
+|               |                 |             | ``"string"``, ``"array"``, 
and     |
+|               |                 |             | ``"object"``.                
      |
++---------------+-----------------+-------------+------------------------------------+
+| Array         | ``$in``         | Array of    | The document field must 
exist in   |
+|               |                 | JSON values | the list provided.           
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$nin``        | Array of    | The document field not must 
exist  |
+|               |                 | JSON values | in the list provided.        
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$size``       | Integer     | Special condition to match 
the     |
+|               |                 |             | length of an array field in 
a      |
+|               |                 |             | document. Non-array fields 
cannot  |
+|               |                 |             | match this condition.        
      |
++---------------+-----------------+-------------+------------------------------------+
+| Miscellaneous | ``$mod``        | [Divisor,   | Divisor is a non-zero 
integer,     |
+|               |                 | Remainder]  | Remainder is any integer.    
      |
+|               |                 |             | Non-integer values result in 
a     |
+|               |                 |             | 404. Matches documents where 
      |
+|               |                 |             | ``field % Divisor == 
Remainder``   |
+|               |                 |             | is true, and only when the   
      |
+|               |                 |             | document field is an 
integer.      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$regex``      | String      | A regular expression pattern 
to    |
+|               |                 |             | match against the document 
field.  |
+|               |                 |             | Only matches when the field 
is a   |
+|               |                 |             | string value and matches the 
      |
+|               |                 |             | supplied regular expression. 
The   |
+|               |                 |             | matching algorithms are 
based on   |
+|               |                 |             | the Perl Compatible Regular  
      |
+|               |                 |             | Expression (PCRE) library. 
For     |
+|               |                 |             | more information about what 
is     |
+|               |                 |             | implemented, see the see the 
      |
+|               |                 |             | `Erlang Regular Expression   
      |
+|               |                 |             | <http://erlang.org/doc       
      |
+|               |                 |             | /man/re.html>`_.             
      |
++---------------+-----------------+-------------+------------------------------------+
+|               | ``$beginsWith`` | String      | Matches where the document 
field   |
+|               |                 |             | begins with the specified 
prefix   |
+|               |                 |             | (case-sensitive). If the 
document  |
+|               |                 |             | field contains a non-string 
value, |
+|               |                 |             | the document is not matched. 
      |
++---------------+-----------------+-------------+------------------------------------+
 
 .. warning::
     Regular expressions do not work with indexes, so they should not be used to
@@ -754,8 +760,10 @@ can itself be another operator with arguments of its own. 
This enables us to
 build up more complex selector expressions.
 
 However, only equality operators such as ``$eq``, ``$gt``, ``$gte``, ``$lt``,
-and ``$lte`` (but not ``$ne``) can be used as the basis of a query. You should
-include at least one of these in a selector.
+``$lte`` and ``$beginsWith`` (but not ``$ne``) can be used as the basis
+of a query that can make efficient use of a ``json`` index. You should
+include at least one of these in a selector, or consider using
+a ``text`` index if more flexibility is required.
 
 For example, if you try to perform a query that attempts to match all documents
 that have a field called `afieldname` containing a value that begins with the
diff --git a/src/mango/src/mango_selector_text.erl 
b/src/mango/src/mango_selector_text.erl
index 4a50ff9ba..7d8f73923 100644
--- a/src/mango/src/mango_selector_text.erl
+++ b/src/mango/src/mango_selector_text.erl
@@ -143,8 +143,9 @@ convert(Path, {[{<<"$exists">>, ShouldExist}]}) ->
         false -> {op_not, {FieldExists, false}}
     end;
 convert(Path, {[{<<"$beginsWith">>, Arg}]}) when is_binary(Arg) ->
+    Prefix = mango_util:lucene_escape_query_value(Arg),
     Suffix = <<"*">>,
-    PrefixSearch = value_str(<<Arg/binary, Suffix/binary>>),
+    PrefixSearch = <<Prefix/binary, Suffix/binary>>,
     {op_field, {make_field(Path, Arg), PrefixSearch}};
 % We're not checking the actual type here, just looking for
 % anything that has a possibility of matching by checking
diff --git a/src/mango/test/03-operator-test.py 
b/src/mango/test/03-operator-test.py
index b43aacf5f..3b1a46565 100644
--- a/src/mango/test/03-operator-test.py
+++ b/src/mango/test/03-operator-test.py
@@ -10,12 +10,13 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
+from requests.exceptions import HTTPError
 import mango
 import unittest
 
 
 class BaseOperatorTests:
-    class Common(object):
+    class Common(unittest.TestCase):
         def assertUserIds(self, user_ids, docs):
             user_ids_returned = list(d["user_id"] for d in docs)
             user_ids.sort()
@@ -142,20 +143,36 @@ class BaseOperatorTests:
                 self.assertNotIn("twitter", d)
 
         def test_beginswith(self):
-            docs = self.db.find({"location.state": {"$beginsWith": "New"}})
-            self.assertEqual(len(docs), 2)
-            self.assertUserIds([2, 10], docs)
+            cases = [
+                {"prefix": "New", "user_ids": [2, 10]},
+                {
+                    # test escaped characters - note the space in the test 
string
+                    "prefix": "New ",
+                    "user_ids": [2, 10],
+                },
+                {
+                    # non-string values in documents should not match the 
prefix,
+                    # but should not error
+                    "prefix": "Foo",
+                    "user_ids": [],
+                },
+                {"prefix": " New", "user_ids": []},
+            ]
 
-        # non-string prefixes should return an error
-        def test_beginswith_invalid_prefix(self):
-            docs = self.db.find({"location.state": {"$beginsWith": 123}})
-            self.assertEqual(len(docs), 2)
+            for case in cases:
+                with self.subTest(prefix=case["prefix"]):
+                    selector = {"location.state": {"$beginsWith": 
case["prefix"]}}
+                    docs = self.db.find(selector)
+                    self.assertEqual(len(docs), len(case["user_ids"]))
+                    self.assertUserIds(case["user_ids"], docs)
 
-        # non-string values in documents should not match the prefix,
-        # but should not error
+        # non-string prefixes should return an error
         def test_beginswith_invalid_prefix(self):
-            docs = self.db.find({"user_id": {"$beginsWith": "Foo"}})
-            self.assertEqual(len(docs), 0)
+            cases = [123, True, [], {}]
+            for prefix in cases:
+                with self.subTest(prefix=prefix):
+                    with self.assertRaises(HTTPError):
+                        self.db.find({"location.state": {"$beginsWith": 
prefix}})
 
 
 class OperatorJSONTests(mango.UserDocsTests, BaseOperatorTests.Common):

Reply via email to