y1chi commented on a change in pull request #14460:
URL: https://github.com/apache/beam/pull/14460#discussion_r633686604



##########
File path: sdks/python/apache_beam/io/mongodbio.py
##########
@@ -391,61 +532,55 @@ def int_to_id(cls, number):
 
     Returns: The ObjectId that has the 12 bytes binary converted from the
       integer value.
-
     """
     # converts integer value to object id. Int value should be less than
     # (2 ^ 96) so it can be convert to 12 bytes required by object id.
     if number < 0 or number >= (1 << 96):
-      raise ValueError('number value must be within [0, %s)' % (1 << 96))
-    ints = [(number & 0xffffffff0000000000000000) >> 64,
-            (number & 0x00000000ffffffff00000000) >> 32,
-            number & 0x0000000000000000ffffffff]
+      raise ValueError("number value must be within [0, %s)" % (1 << 96))
+    ints = [
+        (number & 0xFFFFFFFF0000000000000000) >> 64,
+        (number & 0x00000000FFFFFFFF00000000) >> 32,
+        number & 0x0000000000000000FFFFFFFF,
+    ]
 
-    bytes = struct.pack('>III', *ints)
-    return objectid.ObjectId(bytes)
+    number_bytes = struct.pack(">III", *ints)
+    return ObjectId(number_bytes)
 
   @classmethod
-  def increment_id(cls, object_id, inc):
+  def increment_id(
+      cls,
+      object_id: Union[int, str, ObjectId],
+      inc: int,
+  ) -> Union[int, str, ObjectId]:
     """
     Args:
-      object_id: The ObjectId to change.
-      inc(int): The incremental int value to be added to ObjectId.
+      object_id: The `_id` to change.
+      inc(int): The incremental int value to be added to `_id`.
 
     Returns:
-
+        `_id` incremented by `inc` value
     """
+    # Handle integer `_id`
+    if isinstance(object_id, int):
+      return object_id + inc
+
+    # Handle string `_id`
+    if isinstance(object_id, str):
+      object_id = object_id or chr(31)  # handle empty string ('')
+      # incrementing the latest symbol of the string
+      return object_id[:-1] + chr(ord(object_id[-1:]) + inc)

Review comment:
       This doesn't look right, the value of `ord(object_id[-1:]) + inc` can 
get out of range.

##########
File path: sdks/python/apache_beam/io/mongodbio_test.py
##########
@@ -538,34 +542,55 @@ def test_conversion(self):
         (objectid.ObjectId('00000000ffffffffffffffff'), 2**64 - 1),
         (objectid.ObjectId('ffffffffffffffffffffffff'), 2**96 - 1),
     ]
-    for (id, number) in test_cases:
-      self.assertEqual(id, _ObjectIdHelper.int_to_id(number))
-      self.assertEqual(number, _ObjectIdHelper.id_to_int(id))
+    for (_id, number) in test_cases:
+      self.assertEqual(_id, _ObjectIdHelper.int_to_id(number))
+      self.assertEqual(number, _ObjectIdHelper.id_to_int(_id))
 
     # random tests
     for _ in range(100):
-      id = objectid.ObjectId()
-      number = int(id.binary.hex(), 16)
-      self.assertEqual(id, _ObjectIdHelper.int_to_id(number))
-      self.assertEqual(number, _ObjectIdHelper.id_to_int(id))
+      _id = objectid.ObjectId()
+      number = int(_id.binary.hex(), 16)
+      self.assertEqual(_id, _ObjectIdHelper.int_to_id(number))
+      self.assertEqual(number, _ObjectIdHelper.id_to_int(_id))
 
   def test_increment_id(self):
     test_cases = [
+        (0, -1),
+        (1, 0),
+        (100, 99),
+        (1000000000, 999999999),
+        (chr(31), chr(30)),
+        (" ", chr(31)),
+        ("!", " "),
+        ("\x1a", "\x19"),
+        ("AAB", "AAA"),
+        ("000000000000000000001", "000000000000000000000"),
         (
-            objectid.ObjectId('000000000000000100000000'),
-            objectid.ObjectId('0000000000000000ffffffff')),
+            objectid.ObjectId("000000000000000100000000"),
+            objectid.ObjectId("0000000000000000ffffffff"),
+        ),
         (
-            objectid.ObjectId('000000010000000000000000'),
-            objectid.ObjectId('00000000ffffffffffffffff')),
+            objectid.ObjectId("000000010000000000000000"),
+            objectid.ObjectId("00000000ffffffffffffffff"),
+        ),
     ]
-    for (first, second) in test_cases:
+    for first, second in test_cases:
       self.assertEqual(second, _ObjectIdHelper.increment_id(first, -1))
       self.assertEqual(first, _ObjectIdHelper.increment_id(second, 1))
 
     for _ in range(100):
-      id = objectid.ObjectId()
-      self.assertLess(id, _ObjectIdHelper.increment_id(id, 1))
-      self.assertGreater(id, _ObjectIdHelper.increment_id(id, -1))
+      _id = objectid.ObjectId()
+      self.assertLess(_id, _ObjectIdHelper.increment_id(_id, 1))
+      self.assertGreater(_id, _ObjectIdHelper.increment_id(_id, -1))
+
+  @staticmethod
+  def test_increment_id_empty_string():

Review comment:
       could you add cases to cover the unicode character with largest and 
smallest code points?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to