Copilot commented on code in PR #1273:
URL:
https://github.com/apache/cassandra-python-driver/pull/1273#discussion_r2869201701
##########
cassandra/util.py:
##########
@@ -1692,54 +1692,43 @@ def __repr__(self):
self.lower_bound, self.upper_bound, self.value
)
+VERSION_REGEX =
re.compile("(\\d+)\\.(\\d+)(\\.\\d+)?(\\.\\d+)?([~\\-]\\w[.\\w]*(?:-\\w[.\\w]*)*)?(\\+[.\\w]+)?")
@total_ordering
class Version(object):
- """
- Internal minimalist class to compare versions.
- A valid version is: <int>.<int>.<int>.<int or str>.
-
- TODO: when python2 support is removed, use packaging.version.
- """
-
- _version = None
- major = None
- minor = 0
- patch = 0
- build = 0
- prerelease = 0
def __init__(self, version):
self._version = version
Review Comment:
The new `Version` implementation removed the class docstring entirely. Since
`cassandra.util.Version` is used outside this module (e.g. in
`cassandra.metadata` and unit tests), it should keep an up-to-date docstring
describing the supported version formats and comparison semantics.
##########
cassandra/util.py:
##########
@@ -1692,54 +1692,43 @@ def __repr__(self):
self.lower_bound, self.upper_bound, self.value
)
+VERSION_REGEX =
re.compile("(\\d+)\\.(\\d+)(\\.\\d+)?(\\.\\d+)?([~\\-]\\w[.\\w]*(?:-\\w[.\\w]*)*)?(\\+[.\\w]+)?")
@total_ordering
class Version(object):
- """
- Internal minimalist class to compare versions.
- A valid version is: <int>.<int>.<int>.<int or str>.
-
- TODO: when python2 support is removed, use packaging.version.
- """
-
- _version = None
- major = None
- minor = 0
- patch = 0
- build = 0
- prerelease = 0
def __init__(self, version):
self._version = version
- if '-' in version:
- version_without_prerelease, self.prerelease = version.split('-', 1)
- else:
- version_without_prerelease = version
- parts = list(reversed(version_without_prerelease.split('.')))
- if len(parts) > 4:
- prerelease_string = "-{}".format(self.prerelease) if
self.prerelease else ""
- log.warning("Unrecognized version: {}. Only 4 components plus
prerelease are supported. "
- "Assuming version as {}{}".format(version,
'.'.join(parts[:-5:-1]), prerelease_string))
+
+ match = VERSION_REGEX.match(version)
+ if not match:
+ raise ValueError("Version string did not match expected format")
Review Comment:
The `ValueError` raised on parse failure is generic (`"Version string did
not match expected format"`) and omits the offending input. Including the
actual version string in the message would make troubleshooting much easier.
```suggestion
raise ValueError("Version string did not match expected format:
{!r}".format(version))
```
##########
tests/unit/test_util_types.py:
##########
@@ -209,21 +209,25 @@ class VersionTests(unittest.TestCase):
def test_version_parsing(self):
versions = [
- ('2.0.0', (2, 0, 0, 0, 0)),
- ('3.1.0', (3, 1, 0, 0, 0)),
- ('2.4.54', (2, 4, 54, 0, 0)),
- ('3.1.1.12', (3, 1, 1, 12, 0)),
- ('3.55.1.build12', (3, 55, 1, 'build12', 0)),
- ('3.55.1.20190429-TEST', (3, 55, 1, 20190429, 'TEST')),
- ('4.0-SNAPSHOT', (4, 0, 0, 0, 'SNAPSHOT')),
+ # Test cases here adapted from the Java driver cases
+ #
(https://github.com/apache/cassandra-java-driver/blob/4.19.2/core/src/test/java/com/datastax/oss/driver/api/core/VersionTest.java)
+ ('1.2.19', (1, 2, 19, 0, 0)),
+ ('1.2', (1, 2, 0, 0, 0)),
+ ('1.2-beta1-SNAPSHOT', (1, 2, 0, 0, 'beta1-SNAPSHOT')),
+ ('1.2~beta1-SNAPSHOT', (1, 2, 0, 0, 'beta1-SNAPSHOT')),
+ ('1.2.19.2-SNAPSHOT', (1, 2, 19, 2, 'SNAPSHOT')),
+
+ # We also include a few test cases from the former impl of this
class, mainly to note differences in behaviours
+
+ # Note that prerelease tags are expected to start with a hyphen or
tilde so the expected tag is
+ # lost in all cases below
+ ('3.55.1.build12', (3, 55, 1, 0, 0)),
('1.0.5.4.3', (1, 0, 5, 4, 0)),
- ('1-SNAPSHOT', (1, 0, 0, 0, 'SNAPSHOT')),
- ('4.0.1.2.3.4.5-ABC-123-SNAP-TEST.blah', (4, 0, 1, 2,
'ABC-123-SNAP-TEST.blah')),
- ('2.1.hello', (2, 1, 0, 0, 0)),
- ('2.test.1', (2, 0, 0, 0, 0)),
+ ('2.1.hello', (2, 1, 0, 0, 0))
]
for str_version, expected_result in versions:
+ print(str_version)
v = Version(str_version)
Review Comment:
Remove the `print(...)` statements from this unit test. They add noise to
test output and can slow down/obscure CI logs; the asserts already provide
sufficient failure context.
##########
cassandra/util.py:
##########
@@ -1692,54 +1692,43 @@ def __repr__(self):
self.lower_bound, self.upper_bound, self.value
)
+VERSION_REGEX =
re.compile("(\\d+)\\.(\\d+)(\\.\\d+)?(\\.\\d+)?([~\\-]\\w[.\\w]*(?:-\\w[.\\w]*)*)?(\\+[.\\w]+)?")
@total_ordering
class Version(object):
- """
- Internal minimalist class to compare versions.
- A valid version is: <int>.<int>.<int>.<int or str>.
-
- TODO: when python2 support is removed, use packaging.version.
- """
-
- _version = None
- major = None
- minor = 0
- patch = 0
- build = 0
- prerelease = 0
def __init__(self, version):
self._version = version
- if '-' in version:
- version_without_prerelease, self.prerelease = version.split('-', 1)
- else:
- version_without_prerelease = version
- parts = list(reversed(version_without_prerelease.split('.')))
- if len(parts) > 4:
- prerelease_string = "-{}".format(self.prerelease) if
self.prerelease else ""
- log.warning("Unrecognized version: {}. Only 4 components plus
prerelease are supported. "
- "Assuming version as {}{}".format(version,
'.'.join(parts[:-5:-1]), prerelease_string))
+
+ match = VERSION_REGEX.match(version)
+ if not match:
+ raise ValueError("Version string did not match expected format")
+
+ self.major = int(match[1])
+ self.minor = int(match[2])
try:
- self.major = int(parts.pop())
- except ValueError as e:
- raise ValueError(
- "Couldn't parse version {}. Version should start with a
number".format(version))\
- .with_traceback(e.__traceback__)
+ self.patch = self._cleanup_int(match[3])
+ except:
+ self.patch = 0
+
try:
- self.minor = int(parts.pop()) if parts else 0
- self.patch = int(parts.pop()) if parts else 0
+ self.build = self._cleanup_int(match[4])
+ except:
+ self.build = 0
- if parts: # we have a build version
- build = parts.pop()
- try:
- self.build = int(build)
- except ValueError:
- self.build = build
- except ValueError:
- assumed_version = "{}.{}.{}.{}-{}".format(self.major, self.minor,
self.patch, self.build, self.prerelease)
- log.warning("Unrecognized version {}. Assuming version as
{}".format(version, assumed_version))
+ try:
+ self.prerelease = self._cleanup_str(match[5])
+ except:
+ self.prerelease = 0
Review Comment:
Avoid bare `except:` here. These blocks will also swallow unexpected
exceptions and make debugging harder. Since `_cleanup_int/_cleanup_str` already
handle `None`, the `try/except` may be unnecessary; otherwise, catch specific
exceptions (e.g. `ValueError`, `TypeError`) and consider whether an invalid
numeric component should raise vs. default to 0.
##########
cassandra/util.py:
##########
@@ -1692,54 +1692,43 @@ def __repr__(self):
self.lower_bound, self.upper_bound, self.value
)
+VERSION_REGEX =
re.compile("(\\d+)\\.(\\d+)(\\.\\d+)?(\\.\\d+)?([~\\-]\\w[.\\w]*(?:-\\w[.\\w]*)*)?(\\+[.\\w]+)?")
@total_ordering
class Version(object):
- """
- Internal minimalist class to compare versions.
- A valid version is: <int>.<int>.<int>.<int or str>.
-
- TODO: when python2 support is removed, use packaging.version.
- """
-
- _version = None
- major = None
- minor = 0
- patch = 0
- build = 0
- prerelease = 0
def __init__(self, version):
self._version = version
- if '-' in version:
- version_without_prerelease, self.prerelease = version.split('-', 1)
- else:
- version_without_prerelease = version
- parts = list(reversed(version_without_prerelease.split('.')))
- if len(parts) > 4:
- prerelease_string = "-{}".format(self.prerelease) if
self.prerelease else ""
- log.warning("Unrecognized version: {}. Only 4 components plus
prerelease are supported. "
- "Assuming version as {}{}".format(version,
'.'.join(parts[:-5:-1]), prerelease_string))
+
+ match = VERSION_REGEX.match(version)
+ if not match:
+ raise ValueError("Version string did not match expected format")
Review Comment:
`VERSION_REGEX.match(version)` is not anchored, so it can accept only a
prefix of the string and silently ignore trailing characters (e.g.
`3.55.1.build12` will match `3.55.1`). If the intent is strict validation, use
`re.fullmatch` (or `^...$`) so malformed versions raise. If some legacy/lenient
parsing is desired, it would be safer to make that behavior explicit (e.g., a
separate fallback parse with a warning) rather than relying on partial regex
matches.
##########
cassandra/util.py:
##########
@@ -1692,54 +1692,43 @@ def __repr__(self):
self.lower_bound, self.upper_bound, self.value
)
+VERSION_REGEX =
re.compile("(\\d+)\\.(\\d+)(\\.\\d+)?(\\.\\d+)?([~\\-]\\w[.\\w]*(?:-\\w[.\\w]*)*)?(\\+[.\\w]+)?")
@total_ordering
class Version(object):
- """
- Internal minimalist class to compare versions.
- A valid version is: <int>.<int>.<int>.<int or str>.
-
- TODO: when python2 support is removed, use packaging.version.
- """
-
- _version = None
- major = None
- minor = 0
- patch = 0
- build = 0
- prerelease = 0
def __init__(self, version):
self._version = version
- if '-' in version:
- version_without_prerelease, self.prerelease = version.split('-', 1)
- else:
- version_without_prerelease = version
- parts = list(reversed(version_without_prerelease.split('.')))
- if len(parts) > 4:
- prerelease_string = "-{}".format(self.prerelease) if
self.prerelease else ""
- log.warning("Unrecognized version: {}. Only 4 components plus
prerelease are supported. "
- "Assuming version as {}{}".format(version,
'.'.join(parts[:-5:-1]), prerelease_string))
+
+ match = VERSION_REGEX.match(version)
+ if not match:
+ raise ValueError("Version string did not match expected format")
+
+ self.major = int(match[1])
+ self.minor = int(match[2])
try:
- self.major = int(parts.pop())
- except ValueError as e:
- raise ValueError(
- "Couldn't parse version {}. Version should start with a
number".format(version))\
- .with_traceback(e.__traceback__)
+ self.patch = self._cleanup_int(match[3])
+ except:
+ self.patch = 0
+
try:
- self.minor = int(parts.pop()) if parts else 0
- self.patch = int(parts.pop()) if parts else 0
+ self.build = self._cleanup_int(match[4])
+ except:
+ self.build = 0
- if parts: # we have a build version
- build = parts.pop()
- try:
- self.build = int(build)
- except ValueError:
- self.build = build
- except ValueError:
- assumed_version = "{}.{}.{}.{}-{}".format(self.major, self.minor,
self.patch, self.build, self.prerelease)
- log.warning("Unrecognized version {}. Assuming version as
{}".format(version, assumed_version))
+ try:
+ self.prerelease = self._cleanup_str(match[5])
+ except:
+ self.prerelease = 0
+
+ # Trim off the leading '.' characters and convert the discovered value to
an integer
+ def _cleanup_int(self, s):
+ return int(s[1:]) if s else 0
+
+ # Trim off the leading '.' or '~' characters and just return the string
directly
+ def _cleanup_str(self, str):
+ return str[1:] if str else 0
Review Comment:
`_cleanup_str` uses a parameter named `str`, which shadows Python’s built-in
`str` type. Rename the parameter to avoid shadowing.
```suggestion
def _cleanup_str(self, s):
return s[1:] if s else 0
```
##########
tests/unit/test_util_types.py:
##########
@@ -232,9 +236,18 @@ def test_version_parsing(self):
self.assertEqual(v.build, expected_result[3])
self.assertEqual(v.prerelease, expected_result[4])
- # not supported version formats
- with self.assertRaises(ValueError):
- Version('test.1.0')
+ # Note that a few of these formats used to be supported when this
class was based on the Python versioning scheme.
+ # This has been updated to more directly correspond to the Cassandra
versioning scheme. See CASSPYTHON-10 for more
+ # detail.
+ unsupported_versions = [
+ "test.1.0",
+ '2.test.1'
+ ]
+
+ for v in unsupported_versions:
+ print(v)
Review Comment:
Remove the `print(v)` statement inside the unsupported-version loop; tests
should not emit output in normal operation.
```suggestion
```
##########
tests/unit/test_util_types.py:
##########
@@ -251,41 +264,43 @@ def test_version_compare(self):
# patch wins
self.assertTrue(Version('2.3.1') > Version('2.3.0'))
- self.assertTrue(Version('2.3.1') > Version('2.3.0.4post0'))
+ self.assertTrue(Version('2.3.1') > Version('2.3.0-4post0'))
self.assertTrue(Version('2.3.1') > Version('2.3.0.44'))
# various
self.assertTrue(Version('2.3.0.1') > Version('2.3.0.0'))
self.assertTrue(Version('2.3.0.680') > Version('2.3.0.670'))
self.assertTrue(Version('2.3.0.681') > Version('2.3.0.680'))
- self.assertTrue(Version('2.3.0.1build0') > Version('2.3.0.1')) # 4th
part fallback to str cmp
- self.assertTrue(Version('2.3.0.build0') > Version('2.3.0.1')) # 4th
part fallback to str cmp
- self.assertTrue(Version('2.3.0') < Version('2.3.0.build'))
-
- self.assertTrue(Version('4-a') <= Version('4.0.0'))
- self.assertTrue(Version('4-a') <= Version('4.0-alpha1'))
- self.assertTrue(Version('4-a') <= Version('4.0-beta1'))
- self.assertTrue(Version('4.0.0') >= Version('4.0.0'))
- self.assertTrue(Version('4.0.0.421') >= Version('4.0.0'))
- self.assertTrue(Version('4.0.1') >= Version('4.0.0'))
+
+ # If builds are equal then a prerelease always comes before
+ self.assertTrue(Version('2.3.0.1-SNAPSHOT') < Version('2.3.0.1'))
+
+ # If both have prereleases we fall back to a string compare
+ self.assertTrue(Version('2.3.0.1-SNAPSHOT') <
Version('2.3.0.1-ZNAPSHOT'))
+
self.assertTrue(Version('2.3.0') == Version('2.3.0'))
self.assertTrue(Version('2.3.32') == Version('2.3.32'))
self.assertTrue(Version('2.3.32') == Version('2.3.32.0'))
- self.assertTrue(Version('2.3.0.build') == Version('2.3.0.build'))
+ self.assertTrue(Version('2.3.0-SNAPSHOT') == Version('2.3.0-SNAPSHOT'))
- self.assertTrue(Version('4') == Version('4.0.0'))
self.assertTrue(Version('4.0') == Version('4.0.0.0'))
self.assertTrue(Version('4.0') > Version('3.9.3'))
self.assertTrue(Version('4.0') > Version('4.0-SNAPSHOT'))
self.assertTrue(Version('4.0-SNAPSHOT') == Version('4.0-SNAPSHOT'))
self.assertTrue(Version('4.0.0-SNAPSHOT') == Version('4.0-SNAPSHOT'))
self.assertTrue(Version('4.0.0-SNAPSHOT') == Version('4.0.0-SNAPSHOT'))
- self.assertTrue(Version('4.0.0.build5-SNAPSHOT') ==
Version('4.0.0.build5-SNAPSHOT'))
+ self.assertTrue(Version('4.0.0.5-SNAPSHOT') ==
Version('4.0.0.5-SNAPSHOT'))
Review Comment:
Given the new normalization behavior (e.g. `Version('4.0.0-SNAPSHOT') ==
Version('4.0-SNAPSHOT')`), add a unit test that `hash()` works and is
consistent with equality for equivalent versions. This would catch regressions
where `__hash__` raises or returns inconsistent values.
##########
cassandra/util.py:
##########
@@ -1692,54 +1692,43 @@ def __repr__(self):
self.lower_bound, self.upper_bound, self.value
)
+VERSION_REGEX =
re.compile("(\\d+)\\.(\\d+)(\\.\\d+)?(\\.\\d+)?([~\\-]\\w[.\\w]*(?:-\\w[.\\w]*)*)?(\\+[.\\w]+)?")
@total_ordering
class Version(object):
- """
- Internal minimalist class to compare versions.
- A valid version is: <int>.<int>.<int>.<int or str>.
-
- TODO: when python2 support is removed, use packaging.version.
- """
-
- _version = None
- major = None
- minor = 0
- patch = 0
- build = 0
- prerelease = 0
def __init__(self, version):
self._version = version
- if '-' in version:
- version_without_prerelease, self.prerelease = version.split('-', 1)
- else:
- version_without_prerelease = version
- parts = list(reversed(version_without_prerelease.split('.')))
- if len(parts) > 4:
- prerelease_string = "-{}".format(self.prerelease) if
self.prerelease else ""
- log.warning("Unrecognized version: {}. Only 4 components plus
prerelease are supported. "
- "Assuming version as {}{}".format(version,
'.'.join(parts[:-5:-1]), prerelease_string))
+
+ match = VERSION_REGEX.match(version)
+ if not match:
+ raise ValueError("Version string did not match expected format")
+
+ self.major = int(match[1])
+ self.minor = int(match[2])
try:
- self.major = int(parts.pop())
- except ValueError as e:
- raise ValueError(
- "Couldn't parse version {}. Version should start with a
number".format(version))\
- .with_traceback(e.__traceback__)
+ self.patch = self._cleanup_int(match[3])
+ except:
+ self.patch = 0
+
try:
- self.minor = int(parts.pop()) if parts else 0
- self.patch = int(parts.pop()) if parts else 0
+ self.build = self._cleanup_int(match[4])
+ except:
+ self.build = 0
- if parts: # we have a build version
- build = parts.pop()
- try:
- self.build = int(build)
- except ValueError:
- self.build = build
- except ValueError:
- assumed_version = "{}.{}.{}.{}-{}".format(self.major, self.minor,
self.patch, self.build, self.prerelease)
- log.warning("Unrecognized version {}. Assuming version as
{}".format(version, assumed_version))
+ try:
+ self.prerelease = self._cleanup_str(match[5])
+ except:
+ self.prerelease = 0
+
+ # Trim off the leading '.' characters and convert the discovered value to
an integer
+ def _cleanup_int(self, s):
+ return int(s[1:]) if s else 0
+
+ # Trim off the leading '.' or '~' characters and just return the string
directly
+ def _cleanup_str(self, str):
+ return str[1:] if str else 0
def __hash__(self):
return self._version
Review Comment:
`Version.__hash__` returns `self._version` (a `str`). In Python 3,
`__hash__` must return an `int`, and it also needs to be consistent with
`__eq__`. As written, calling `hash(Version(...))` will raise `TypeError`, and
even if fixed to return a string hash, it would still violate the hash/eq
contract for versions that compare equal but have different original strings
(e.g. `4.0-SNAPSHOT` vs `4.0.0-SNAPSHOT`).
```suggestion
# Hash based on the same components used for equality, to satisfy
the hash/eq contract.
return hash((self.major, self.minor, self.patch, self.build,
self.prerelease))
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]