This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-fury.git
The following commit(s) were added to refs/heads/main by this push:
new 911f0536 feat(go/java): Add ASCII check before meta string encoding
(#1620)
911f0536 is described below
commit 911f0536f5fd5355a77f47cbfcc166a9c9fcc319
Author: Jason Mok <[email protected]>
AuthorDate: Fri May 10 02:43:36 2024 -0500
feat(go/java): Add ASCII check before meta string encoding (#1620)
## What does this PR do?
<!-- Describe the purpose of this PR. -->
This PR introduces a validation method to ensure that all input strings
to the `MetaString` encoder are ASCII.
## Related issues
<!--
Is there any related issue? Please attach here.
- #1619
- #xxxx1
- #xxxx2
-->
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/incubator-fury/issues/new/choose)
describing the need to do so and update the document if necessary.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
-->
---------
Signed-off-by: Jason Mok <[email protected]>
---
go/fury/meta/meta_string_encoder.go | 12 ++++++++++++
.../main/java/org/apache/fury/meta/MetaStringEncoder.java | 4 ++++
2 files changed, 16 insertions(+)
diff --git a/go/fury/meta/meta_string_encoder.go
b/go/fury/meta/meta_string_encoder.go
index f88e2c1b..6d3b89bf 100644
--- a/go/fury/meta/meta_string_encoder.go
+++ b/go/fury/meta/meta_string_encoder.go
@@ -42,6 +42,9 @@ func (e *Encoder) Encode(input string) (MetaString, error) {
// EncodeWithEncoding Encodes the input string to MetaString using specified
encoding.
func (e *Encoder) EncodeWithEncoding(input string, encoding Encoding)
(MetaString, error) {
+ if encoding != UTF_8 && !isASCII(input) {
+ return MetaString{}, errors.New("non-ASCII characters in meta
string are not allowed")
+ }
if len(input) > 32767 {
return MetaString{}, errors.New("long meta string than 32767 is
not allowed")
}
@@ -167,6 +170,15 @@ func (e *Encoder) ComputeEncoding(input string) Encoding {
return UTF_8
}
+func isASCII(input string) bool {
+ for _, r := range input {
+ if r > 127 {
+ return false
+ }
+ }
+ return true
+}
+
type stringStatistics struct {
digitCount int
upperCount int
diff --git
a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
index 43b48329..619a441c 100644
--- a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
+++ b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
@@ -23,6 +23,7 @@ import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import org.apache.fury.collection.Collections;
import org.apache.fury.meta.MetaString.Encoding;
+import org.apache.fury.serializer.StringSerializer;
import org.apache.fury.util.Preconditions;
/** Encodes plain text strings into MetaString objects with specified encoding
mechanisms. */
@@ -70,6 +71,9 @@ public class MetaStringEncoder {
public MetaString encode(String input, Encoding encoding) {
Preconditions.checkArgument(
input.length() < Short.MAX_VALUE, "Long meta string than 32767 is not
allowed");
+ if (encoding != Encoding.UTF_8 &&
!StringSerializer.isLatin(input.toCharArray())) {
+ throw new IllegalArgumentException("Non-ASCII characters in meta string
are not allowed");
+ }
if (input.isEmpty()) {
return new MetaString(input, Encoding.UTF_8, specialChar1, specialChar2,
new byte[0]);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]