This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new 8d2d1240 feat(python): Hardcoding metastring into passable parameters
(#1987)
8d2d1240 is described below
commit 8d2d1240825cbfa32548ecb8afc978cea533ec23
Author: PAN <[email protected]>
AuthorDate: Mon Dec 23 10:29:46 2024 +0800
feat(python): Hardcoding metastring into passable parameters (#1987)
<!--
**Thanks for contributing to Fury.**
**If this is your first time opening a PR on fury, you can refer to
[CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).**
Contribution Checklist
- The **Apache Fury (incubating)** community has restrictions on the
naming of pr titles. You can also find instructions in
[CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).
- Fury has a strong focus on performance. If the PR you submit will have
an impact on performance, please benchmark it first and provide the
benchmark result here.
-->
## What does this PR do?
In the original MetaString, MetaStringEncoder used hard coding directly
to solve the special char1/2 situation, but this was not the best
choice. So it's passable, allowing MetaString to select the special char
it passes.
<!-- Describe the purpose of this PR. -->
## Related issues
Close #1983
<!--
Is there any related issue? Please attach here.
- #xxxx0
- #xxxx1
- #xxxx2
-->
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fury/issues/new/choose) describing the
need to do so and update the document if necessary.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
-->
---
python/pyfury/meta/metastring.py | 108 ++++++++++++++++++++++++++++-----
python/pyfury/tests/test_metastring.py | 51 +++++++++-------
2 files changed, 122 insertions(+), 37 deletions(-)
diff --git a/python/pyfury/meta/metastring.py b/python/pyfury/meta/metastring.py
index 63232b56..4ff06510 100644
--- a/python/pyfury/meta/metastring.py
+++ b/python/pyfury/meta/metastring.py
@@ -48,12 +48,20 @@ _METASTRING_NUM_CHARS_LIMIT = 32767
class MetaString:
def __init__(
- self, original: str, encoding: Encoding, encoded_data: bytes, length:
int
+ self,
+ original: str,
+ encoding: Encoding,
+ encoded_data: bytes,
+ length: int,
+ special_char1: str = ".",
+ special_char2: str = "|",
):
self.original = original
self.encoding = encoding
self.encoded_data = encoded_data
self.length = length
+ self.special_char1 = special_char1
+ self.special_char2 = special_char2
if self.encoding != Encoding.UTF_8:
self.strip_last_char = (encoded_data[0] & 0x80) != 0
else:
@@ -65,6 +73,17 @@ class MetaStringDecoder:
Decodes MetaString objects back into their original plain text form.
"""
+ def __init__(self, special_char1: str, special_char2: str):
+ """
+ Creates a MetaStringDecoder with specified special characters used for
decoding.
+
+ Args:
+ special_char1 (str): The first special character used for encoding.
+ special_char2 (str): The second special character used for
encoding.
+ """
+ self.special_char1 = special_char1
+ self.special_char2 = special_char2
+
def decode(self, encoded_data: bytes, encoding: Encoding) -> str:
"""
Decodes the encoded data using the specified encoding.
@@ -203,9 +222,9 @@ class MetaStringDecoder:
elif 52 <= char_value <= 61:
return chr(ord("0") + (char_value - 52))
elif char_value == 62:
- return "."
+ return self.special_char1 # Use special_char1 for the encoding
elif char_value == 63:
- return "_"
+ return self.special_char2 # Use special_char2 for the encoding
else:
raise ValueError(
f"Invalid character value for LOWER_UPPER_DIGIT_SPECIAL:
{char_value}"
@@ -250,9 +269,16 @@ class MetaStringDecoder:
class MetaStringEncoder:
- """
- Encodes plain text strings into MetaString objects with specified encoding
mechanisms.
- """
+ def __init__(self, special_char1: str, special_char2: str):
+ """
+ Creates a MetaStringEncoder with specified special characters used for
encoding.
+
+ Args:
+ special_char1 (str): The first special character used in custom
encoding.
+ special_char2 (str): The second special character used in custom
encoding.
+ """
+ self.special_char1 = special_char1
+ self.special_char2 = special_char2
def encode(self, input_string: str) -> MetaString:
"""
@@ -270,7 +296,14 @@ class MetaStringEncoder:
), "Long meta string than _METASTRING_NUM_CHARS_LIMIT is not allowed."
if not input_string:
- return MetaString(input_string, Encoding.UTF_8, bytes(), 0)
+ return MetaString(
+ input_string,
+ Encoding.UTF_8,
+ bytes(),
+ 0,
+ self.special_char1,
+ self.special_char2,
+ )
encoding = self.compute_encoding(input_string)
return self.encode_with_encoding(input_string, encoding)
@@ -292,29 +325,67 @@ class MetaStringEncoder:
), "Long meta string than _METASTRING_NUM_CHARS_LIMIT is not allowed."
if not input_string:
- return MetaString(input_string, Encoding.UTF_8, bytes(), 0)
+ return MetaString(
+ input_string,
+ Encoding.UTF_8,
+ bytes(),
+ 0,
+ self.special_char1,
+ self.special_char2,
+ )
length = len(input_string)
if encoding == Encoding.LOWER_SPECIAL:
encoded_data = self._encode_lower_special(input_string)
- return MetaString(input_string, encoding, encoded_data, length * 5)
+ return MetaString(
+ input_string,
+ encoding,
+ encoded_data,
+ length * 5,
+ self.special_char1,
+ self.special_char2,
+ )
elif encoding == Encoding.LOWER_UPPER_DIGIT_SPECIAL:
encoded_data = self._encode_lower_upper_digit_special(input_string)
- return MetaString(input_string, encoding, encoded_data, length * 6)
+ return MetaString(
+ input_string,
+ encoding,
+ encoded_data,
+ length * 6,
+ self.special_char1,
+ self.special_char2,
+ )
elif encoding == Encoding.FIRST_TO_LOWER_SPECIAL:
encoded_data = self._encode_first_to_lower_special(input_string)
- return MetaString(input_string, encoding, encoded_data, length * 5)
+ return MetaString(
+ input_string,
+ encoding,
+ encoded_data,
+ length * 5,
+ self.special_char1,
+ self.special_char2,
+ )
elif encoding == Encoding.ALL_TO_LOWER_SPECIAL:
chars = list(input_string)
upper_count = sum(1 for c in chars if c.isupper())
encoded_data = self._encode_all_to_lower_special(chars)
return MetaString(
- input_string, encoding, encoded_data, (upper_count + length) *
5
+ input_string,
+ encoding,
+ encoded_data,
+ (upper_count + length) * 5,
+ self.special_char1,
+ self.special_char2,
)
else:
encoded_data = bytes(input_string, "utf-8")
return MetaString(
- input_string, Encoding.UTF_8, encoded_data, len(encoded_data)
* 8
+ input_string,
+ Encoding.UTF_8,
+ encoded_data,
+ len(encoded_data) * 8,
+ self.special_char1,
+ self.special_char2,
)
def compute_encoding(self, input_string: str) -> Encoding:
@@ -363,7 +434,12 @@ class MetaStringEncoder:
upper_count = 0
for c in chars:
if can_lower_upper_digit_special_encoded:
- if not (c.islower() or c.isupper() or c.isdigit() or c in
{".", "_"}):
+ if not (
+ c.islower()
+ or c.isupper()
+ or c.isdigit()
+ or c in {self.special_char1, self.special_char2}
+ ):
can_lower_upper_digit_special_encoded = False
if can_lower_special_encoded:
if not (c.islower() or c in {".", "_", "$", "|"}):
@@ -500,9 +576,9 @@ class MetaStringEncoder:
return 26 + (ord(c) - ord("A"))
elif "0" <= c <= "9":
return 52 + (ord(c) - ord("0"))
- elif c == ".":
+ elif c == self.special_char1:
return 62
- elif c == "_":
+ elif c == self.special_char2:
return 63
else:
raise ValueError(
diff --git a/python/pyfury/tests/test_metastring.py
b/python/pyfury/tests/test_metastring.py
index 7dd98ff7..95596edf 100644
--- a/python/pyfury/tests/test_metastring.py
+++ b/python/pyfury/tests/test_metastring.py
@@ -24,8 +24,10 @@ from pyfury.meta.metastring import (
def test_encode_metastring_lower_special():
- encoder = MetaStringEncoder()
- decoder = MetaStringDecoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+ decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
+ # Test for encoding and decoding
encoded = encoder._encode_lower_special("abc_def")
assert len(encoded) == 5
assert len(encoder.encode("org.apache.fury.benchmark.data").encoded_data)
== 19
@@ -41,10 +43,12 @@ def test_encode_metastring_lower_special():
def test_encode_metastring_lower_upper_digit_special():
- encoder = MetaStringEncoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+ decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
+ # Test for encoding and decoding
encoded = encoder._encode_lower_upper_digit_special("ExampleInput123")
assert len(encoded) == 12
- decoder = MetaStringDecoder()
decoded = decoder.decode(encoded, Encoding.LOWER_UPPER_DIGIT_SPECIAL)
assert decoded == "ExampleInput123"
@@ -73,8 +77,9 @@ def create_string(length):
def test_metastring():
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+ decoder = MetaStringDecoder(special_char1=".", special_char2="_")
- encoder = MetaStringEncoder()
for i in range(1, 128):
try:
string = create_string(i)
@@ -82,7 +87,6 @@ def test_metastring():
assert metastring.encoding != Encoding.UTF_8
assert metastring.original == string
- decoder = MetaStringDecoder()
new_string = decoder.decode(metastring.encoded_data,
metastring.encoding)
assert new_string == string
except Exception as e:
@@ -90,8 +94,9 @@ def test_metastring():
def test_encode_empty_string():
- encoder = MetaStringEncoder()
- decoder = MetaStringDecoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+ decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
for encoding in [
Encoding.LOWER_SPECIAL,
Encoding.LOWER_UPPER_DIGIT_SPECIAL,
@@ -106,7 +111,7 @@ def test_encode_empty_string():
def test_encode_characters_outside_of_lower_special():
- encoder = MetaStringEncoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
test_string = "abcdefABCDEF1234!@#"
metastring = encoder.encode(test_string)
@@ -114,8 +119,9 @@ def test_encode_characters_outside_of_lower_special():
def test_all_to_upper_special_encoding():
- encoder = MetaStringEncoder()
- decoder = MetaStringDecoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+ decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
test_string = "ABC_DEF"
metastring = encoder.encode(test_string)
assert metastring.encoding == Encoding.LOWER_UPPER_DIGIT_SPECIAL
@@ -124,8 +130,9 @@ def test_all_to_upper_special_encoding():
def test_first_to_lower_special_encoding():
- encoder = MetaStringEncoder()
- decoder = MetaStringDecoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+ decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
test_string = "Aabcdef"
metastring = encoder.encode(test_string)
assert metastring.encoding == Encoding.FIRST_TO_LOWER_SPECIAL
@@ -134,8 +141,9 @@ def test_first_to_lower_special_encoding():
def test_utf8_encoding():
- encoder = MetaStringEncoder()
- decoder = MetaStringDecoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+ decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
test_string = "你好,世界" # Non-Latin characters
metastring = encoder.encode(test_string)
assert metastring.encoding == Encoding.UTF_8
@@ -144,7 +152,7 @@ def test_utf8_encoding():
def test_strip_last_char():
- encoder = MetaStringEncoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
test_string = "abc" # encoded as 1|00000|00, 001|00010, exactly two bytes
encoded_metastring = encoder.encode(test_string)
@@ -156,8 +164,9 @@ def test_strip_last_char():
def test_empty_string():
- encoder = MetaStringEncoder()
- decoder = MetaStringDecoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+ decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
metastring = encoder.encode("")
assert metastring.encoded_data == bytes()
@@ -166,7 +175,7 @@ def test_empty_string():
def test_ascii_encoding():
- encoder = MetaStringEncoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
test_string = "asciiOnly"
encoded_metastring = encoder.encode(test_string)
@@ -175,7 +184,7 @@ def test_ascii_encoding():
def test_non_ascii_encoding():
- encoder = MetaStringEncoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
test_string = "こんにちは" # Non-ASCII string
encoded_metastring = encoder.encode(test_string)
@@ -183,7 +192,7 @@ def test_non_ascii_encoding():
def test_non_ascii_encoding_and_non_utf8():
- encoder = MetaStringEncoder()
+ encoder = MetaStringEncoder(special_char1=".", special_char2="_")
non_ascii_string = "こんにちは" # Non-ASCII string
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]