This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-fury.git


The following commit(s) were added to refs/heads/main by this push:
     new 911f0536 feat(go/java): Add ASCII check before meta string encoding 
(#1620)
911f0536 is described below

commit 911f0536f5fd5355a77f47cbfcc166a9c9fcc319
Author: Jason Mok <[email protected]>
AuthorDate: Fri May 10 02:43:36 2024 -0500

    feat(go/java): Add ASCII check before meta string encoding (#1620)
    
    
    
    ## What does this PR do?
    
    <!-- Describe the purpose of this PR. -->
    This PR introduces a validation method to ensure that all input strings
    to the `MetaString` encoder are ASCII.
    
    ## Related issues
    
    <!--
    Is there any related issue? Please attach here.
    
    - #1619
    - #xxxx1
    - #xxxx2
    -->
    
    
    ## Does this PR introduce any user-facing change?
    
    <!--
    If any user-facing interface changes, please [open an
    issue](https://github.com/apache/incubator-fury/issues/new/choose)
    describing the need to do so and update the document if necessary.
    -->
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    
    ## Benchmark
    
    <!--
    When the PR has an impact on performance (if you don't know whether the
    PR will have an impact on performance, you can submit the PR first, and
    if it will have impact on performance, the code reviewer will explain
    it), be sure to attach a benchmark data here.
    -->
    
    ---------
    
    Signed-off-by: Jason Mok <[email protected]>
---
 go/fury/meta/meta_string_encoder.go                          | 12 ++++++++++++
 .../main/java/org/apache/fury/meta/MetaStringEncoder.java    |  4 ++++
 2 files changed, 16 insertions(+)

diff --git a/go/fury/meta/meta_string_encoder.go 
b/go/fury/meta/meta_string_encoder.go
index f88e2c1b..6d3b89bf 100644
--- a/go/fury/meta/meta_string_encoder.go
+++ b/go/fury/meta/meta_string_encoder.go
@@ -42,6 +42,9 @@ func (e *Encoder) Encode(input string) (MetaString, error) {
 
 // EncodeWithEncoding Encodes the input string to MetaString using specified 
encoding.
 func (e *Encoder) EncodeWithEncoding(input string, encoding Encoding) 
(MetaString, error) {
+       if encoding != UTF_8 && !isASCII(input) {
+           return MetaString{}, errors.New("non-ASCII characters in meta 
string are not allowed")
+       }
        if len(input) > 32767 {
                return MetaString{}, errors.New("long meta string than 32767 is 
not allowed")
        }
@@ -167,6 +170,15 @@ func (e *Encoder) ComputeEncoding(input string) Encoding {
        return UTF_8
 }
 
+func isASCII(input string) bool {
+    for _, r := range input {
+        if r > 127 {
+                       return false
+        }
+    }
+    return true
+}
+
 type stringStatistics struct {
        digitCount                       int
        upperCount                       int
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java 
b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
index 43b48329..619a441c 100644
--- a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
+++ b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
@@ -23,6 +23,7 @@ import java.nio.charset.StandardCharsets;
 import java.util.HashSet;
 import org.apache.fury.collection.Collections;
 import org.apache.fury.meta.MetaString.Encoding;
+import org.apache.fury.serializer.StringSerializer;
 import org.apache.fury.util.Preconditions;
 
 /** Encodes plain text strings into MetaString objects with specified encoding 
mechanisms. */
@@ -70,6 +71,9 @@ public class MetaStringEncoder {
   public MetaString encode(String input, Encoding encoding) {
     Preconditions.checkArgument(
         input.length() < Short.MAX_VALUE, "Long meta string than 32767 is not 
allowed");
+    if (encoding != Encoding.UTF_8 && 
!StringSerializer.isLatin(input.toCharArray())) {
+      throw new IllegalArgumentException("Non-ASCII characters in meta string 
are not allowed");
+    }
     if (input.isEmpty()) {
       return new MetaString(input, Encoding.UTF_8, specialChar1, specialChar2, 
new byte[0]);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to