This is an automated email from the ASF dual-hosted git repository.

kentontaylor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/allura.git

commit ba1c0508bb847613fc7b9f5d117a2e955d260b2e
Author: Dave Brondsema <[email protected]>
AuthorDate: Thu Mar 6 11:58:11 2025 -0500

    ensure_index: deduping on a sparse index only needs to look at records 
where the field exists
---
 Allura/allura/command/show_models.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/Allura/allura/command/show_models.py 
b/Allura/allura/command/show_models.py
index e39677225..8d479bb8e 100644
--- a/Allura/allura/command/show_models.py
+++ b/Allura/allura/command/show_models.py
@@ -296,20 +296,20 @@ def _update_indexes(self, collection, indexes):
         # Ensure all indexes
         for keys, idx in uindexes.items():
             base.log.info('...... ensure %s:%s', collection.name, idx)
-            while True:
+            while True:  # loop in case de-duping takes multiple attempts
+                index_options = idx.index_options.copy()
+                if idx.fields == ('_id',):
+                    # as of mongo 3.4 _id fields can't have these options set
+                    # _id is always non-sparse and unique anyway
+                    del index_options['sparse']
+                    del index_options['unique']
                 try:
-                    index_options = idx.index_options.copy()
-                    if idx.fields == ('_id',):
-                        # as of mongo 3.4 _id fields can't have these options 
set
-                        # _id is always non-sparse and unique anyway
-                        del index_options['sparse']
-                        del index_options['unique']
                     collection.create_index(idx.index_spec, **index_options)
                     break
                 except DuplicateKeyError as err:
                     if self.options.delete_dupes:
                         base.log.warning('Found dupe key(%s), eliminating 
dupes', err)
-                        self._remove_dupes(collection, idx.index_spec)
+                        self._remove_dupes(collection, idx.index_spec, 
index_options)
                     else:
                         print('Error creating unique index.  Run with 
--delete-duplicate-key-records if you want to delete records that violate this 
index', file=sys.stderr)
                         raise
@@ -350,10 +350,16 @@ def _recreate_index(self, collection, iname, keys, 
**creation_options):
                       collection.name, superset_index)
         collection.drop_index(superset_index)
 
-    def _remove_dupes(self, collection, spec):
-        iname = collection.create_index(spec)
+    def _remove_dupes(self, collection, spec, index_options: dict):
+        iname = collection.create_index(spec)  # create it but not unique
         fields = [f[0] for f in spec]
-        q = collection.find({}, projection=fields).sort(spec)
+        query = {}
+        if index_options.get('sparse'):
+            # sparse indexes only apply if the field(s) exist
+            query = {
+                '$or': [{f: {'$exists': True}} for f in fields]
+            }
+        q = collection.find(query, projection=fields).sort(spec)
 
         def keyfunc(doc):
             return tuple(doc.get(f, None) for f in fields)

Reply via email to