This is an automated email from the ASF dual-hosted git repository.

xiangfu0 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 5526d5b8601 Add OVERLAY scalar function for SQL-standard substring 
replacement (#18790)
5526d5b8601 is described below

commit 5526d5b86018bc8d22e10ece356d107dff5e280f
Author: Akanksha kedia <[email protected]>
AuthorDate: Fri Jun 19 08:58:05 2026 +0530

    Add OVERLAY scalar function for SQL-standard substring replacement (#18790)
    
    * Add TRANSLATE scalar function for character-level string substitution
    
    Implements the SQL standard TRANSLATE(string, from, to) function, which
    replaces each character in 'from' with the corresponding character in
    'to'. Characters beyond the length of 'to' are deleted from the output.
    This matches the behavior in PostgreSQL, Oracle, and Trino.
    
      translate('hello', 'aeiou', 'AEIOU')  → 'hEllO'
      translate('abc',   'abc',   'xy')     → 'xy'    (c deleted)
      translate('abc',   'abc',   '')       → ''      (all deleted)
    
    Adds unit tests covering basic replacement, deletion, no-op (no match),
    empty inputs, and duplicate characters in 'from'.
    
    Co-Authored-By: Claude Sonnet 4.6 <[email protected]>
    
    * Add OVERLAY scalar function for SQL-standard substring replacement
    
    Implements OVERLAY(string PLACING replacement FROM start [FOR length]),
    which replaces `length` characters of `string` starting at 1-based
    position `start` with `replacement`.
    
    When `length` is omitted it defaults to the length of `replacement`,
    so replaced and inserted substrings are the same width — matching the
    SQL standard and the behaviour in PostgreSQL, Trino, and DuckDB.
    
      overlay('hello world', 'there', 7)        → 'hello there'
      overlay('abcdef', 'XY', 3, 0)             → 'abXYcdef'  (insert)
      overlay('abcdef', 'XY', 3, 4)             → 'abXYf'     (delete more)
      overlay('abcdef', '', 3, 2)               → 'abef'       (delete only)
    
    Two overloads are registered:
      overlay(str, replacement, start)             -- length defaults to 
len(replacement)
      overlay(str, replacement, start, length)     -- explicit length
    
    Adds unit tests covering basic replacement, insertion (length=0),
    out-of-range start/length clamping, empty inputs, and full-string 
replacement.
    
    Co-Authored-By: Claude Sonnet 4.6 <[email protected]>
    
    * Fix wrong test expectation in testOverlay
    
    overlay("abcdef", "XY", 3, 4) deletes positions 3-6 (cdef), leaving
    nothing after position 6, so the result is "abXY" not "abXYf".
    Also correct the same example in the Javadoc.
    
    Co-Authored-By: Claude Sonnet 4.6 <[email protected]>
    
    ---------
    
    Co-authored-by: Claude Sonnet 4.6 <[email protected]>
---
 .../common/function/scalar/StringFunctions.java    | 83 ++++++++++++++++++++++
 .../function/scalar/StringFunctionsTest.java       | 71 ++++++++++++++++++
 2 files changed, 154 insertions(+)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
index c195efeb855..d3a4c626357 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
@@ -1120,4 +1120,87 @@ public class StringFunctions {
     Matcher matcher = Pattern.compile(regexp).matcher(input);
     return matcher.find() ? matcher.group() : null;
   }
+
+  /**
+   * Returns a string with every occurrence of a character in {@code from} 
replaced by the corresponding character in
+   * {@code to}. Characters in {@code input} that do not appear in {@code 
from} are left unchanged. If {@code from} is
+   * longer than {@code to}, characters beyond the length of {@code to} are 
deleted from the result. Equivalent to the
+   * SQL standard {@code TRANSLATE(string, from, to)} function supported by 
PostgreSQL, Oracle, and Trino.
+   *
+   * <p>Examples:
+   * <pre>
+   *   translate("hello", "aeiou", "AEIOU") → "hEllO"
+   *   translate("abc", "abc", "xy")        → "xy"   (c has no target → 
deleted)
+   *   translate("abcdef", "ace", "XY")     → "XbYdf" (e has no target → 
deleted)
+   * </pre>
+   *
+   * @param input the source string
+   * @param from  characters to search for (must not be null)
+   * @param to    replacement characters (must not be null; may be shorter 
than {@code from})
+   * @return the translated string
+   */
+  @ScalarFunction
+  public static String translate(String input, String from, String to) {
+    if (input.isEmpty() || from.isEmpty()) {
+      return input;
+    }
+    StringBuilder sb = new StringBuilder(input.length());
+    for (int i = 0; i < input.length(); i++) {
+      char c = input.charAt(i);
+      int idx = from.indexOf(c);
+      if (idx < 0) {
+        sb.append(c);
+      } else if (idx < to.length()) {
+        sb.append(to.charAt(idx));
+      }
+      // idx >= to.length(): character is deleted (not appended)
+    }
+    return sb.toString();
+  }
+
+  /**
+   * Replaces a substring of {@code input} with {@code replacement}, starting 
at the 1-based position {@code start}
+   * and replacing {@code length} characters. Equivalent to the SQL standard
+   * {@code OVERLAY(string PLACING replacement FROM start FOR length)} 
function supported by PostgreSQL and Trino.
+   *
+   * <p>When {@code length} is omitted (i.e. {@code OVERLAY(string PLACING 
new_str FROM start)}), it defaults to the
+   * length of {@code replacement}, so the replaced and inserted substrings 
are the same width.
+   *
+   * <p>Examples:
+   * <pre>
+   *   overlay("hello world", "there", 7)     → "hello there"   (length 
defaults to len("there") = 5)
+   *   overlay("hello world", "there", 7, 5)  → "hello there"
+   *   overlay("abcdef", "XY", 3, 0)          → "abXYcdef"      (insertion 
without deletion)
+   *   overlay("abcdef", "XY", 3, 4)          → "abXY"          (delete 4 
chars cdef, insert 2)
+   * </pre>
+   *
+   * @param input       the source string
+   * @param replacement the string to insert
+   * @param start       1-based position in {@code input} at which replacement 
begins (clamped to [1, len+1])
+   * @return the result string
+   */
+  @ScalarFunction
+  public static String overlay(String input, String replacement, int start) {
+    return overlay(input, replacement, start, replacement.length());
+  }
+
+  /**
+   * Replaces {@code length} characters of {@code input} with {@code 
replacement}, starting at the 1-based position
+   * {@code start}. Equivalent to the SQL standard
+   * {@code OVERLAY(string PLACING replacement FROM start FOR length)} 
function.
+   *
+   * @param input       the source string
+   * @param replacement the string to insert
+   * @param start       1-based start position (clamped to [1, len+1])
+   * @param length      number of characters to delete from {@code input} 
(clamped to [0, remaining])
+   * @return the result string
+   */
+  @ScalarFunction
+  public static String overlay(String input, String replacement, int start, 
int length) {
+    int len = input.length();
+    // Convert to 0-based, clamping to valid range
+    int s = Math.max(0, Math.min(start - 1, len));
+    int e = Math.min(s + Math.max(0, length), len);
+    return input.substring(0, s) + replacement + input.substring(e);
+  }
 }
diff --git 
a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
 
b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
index 60b35629f68..8befc66b45f 100644
--- 
a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
+++ 
b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
@@ -568,4 +568,75 @@ public class StringFunctionsTest {
     assertEquals(StringFunctions.regexpSubstr("", "a"), null);
     assertEquals(StringFunctions.regexpSubstr("Hello World", "[A-Z][a-z]+"), 
"Hello");
   }
+
+  // ==================== Tests for translate ====================
+
+  @Test
+  public void testTranslate() {
+    // Basic character replacement
+    assertEquals(StringFunctions.translate("hello", "aeiou", "AEIOU"), 
"hEllO");
+
+    // Replacement and deletion: 'c' has no target in 'to' → deleted
+    assertEquals(StringFunctions.translate("abc", "abc", "xy"), "xy");
+
+    // Mixed: replacement and deletion
+    assertEquals(StringFunctions.translate("abcdef", "ace", "XY"), "XbYdf");
+
+    // No characters in 'from' match → input unchanged
+    assertEquals(StringFunctions.translate("hello", "xyz", "123"), "hello");
+
+    // Empty 'from' → input unchanged
+    assertEquals(StringFunctions.translate("hello", "", "abc"), "hello");
+
+    // Empty input → empty result
+    assertEquals(StringFunctions.translate("", "abc", "xyz"), "");
+
+    // All characters deleted (to is empty)
+    assertEquals(StringFunctions.translate("abc", "abc", ""), "");
+
+    // SQL standard example: partial substitution — digits 0-3 map to z/e/r/o; 
digits 4-9 are deleted
+    assertEquals(StringFunctions.translate("12300", "0123456789", "zero"), 
"erozz");
+
+    // Duplicate characters in 'from': first occurrence wins
+    assertEquals(StringFunctions.translate("aaa", "aa", "XY"), "XXX");
+  }
+
+  // ==================== Tests for overlay ====================
+
+  @Test
+  public void testOverlay() {
+    // Basic replacement: length defaults to length of replacement
+    assertEquals(StringFunctions.overlay("hello world", "there", 7), "hello 
there");
+
+    // Explicit length equal to length of replacement — same result
+    assertEquals(StringFunctions.overlay("hello world", "there", 7, 5), "hello 
there");
+
+    // Zero-length deletion: pure insertion
+    assertEquals(StringFunctions.overlay("abcdef", "XY", 3, 0), "abXYcdef");
+
+    // Delete more than replacement length: replacement is shorter than 
deleted span
+    // FROM 3 FOR 4 removes positions 3-6 (cdef), nothing remains after 
position 6
+    assertEquals(StringFunctions.overlay("abcdef", "XY", 3, 4), "abXY");
+
+    // Replace at start (position 1)
+    assertEquals(StringFunctions.overlay("abcdef", "Z", 1, 1), "Zbcdef");
+
+    // Replace at end
+    assertEquals(StringFunctions.overlay("abcdef", "Z", 6, 1), "abcdeZ");
+
+    // Replace entire string
+    assertEquals(StringFunctions.overlay("abcdef", "XY", 1, 6), "XY");
+
+    // Empty replacement (deletion only)
+    assertEquals(StringFunctions.overlay("abcdef", "", 3, 2), "abef");
+
+    // Empty input: result is the replacement
+    assertEquals(StringFunctions.overlay("", "abc", 1), "abc");
+
+    // start beyond end: appends replacement
+    assertEquals(StringFunctions.overlay("abc", "XY", 10), "abcXY");
+
+    // length clamped: cannot delete past end of string
+    assertEquals(StringFunctions.overlay("abc", "Z", 2, 100), "aZ");
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to