hilaryRope commented on code in PR #38099:
URL: https://github.com/apache/beam/pull/38099#discussion_r3054447828


##########
sdks/go/pkg/beam/io/filesystem/gcs/gcs.go:
##########
@@ -38,6 +39,91 @@ const (
        projectBillingHook = "beam:go:hook:filesystem:billingproject"
 )
 
+// globToRegex translates a glob pattern to a regular expression.
+// It differs from filepath.Match in that:
+//   - / is treated as a regular character (not a separator), since GCS object
+//     names are flat with / being just another character
+//   - ** matches any sequence of characters including / (zero or more)
+//   - **/  matches zero or more path segments (e.g., "" or "dir/" or 
"dir/subdir/")
+//   - * matches any sequence of characters except / (zero or more)
+//   - ? matches a single character (any character including /)
+//
+// This matches the behavior of the Python and Java SDKs.
+func globToRegex(pattern string) (*regexp.Regexp, error) {
+       var result strings.Builder
+       result.WriteString("^")
+
+       for i := 0; i < len(pattern); i++ {
+               c := pattern[i]
+               switch c {
+               case '*':
+                       // Check for ** (double asterisk)
+                       if i+1 < len(pattern) && pattern[i+1] == '*' {
+                               // Check if followed by / (e.g., "**/" matches 
zero or more path segments)
+                               if i+2 < len(pattern) && pattern[i+2] == '/' {
+                                       // **/ matches "" or "something/" or 
"a/b/c/"
+                                       result.WriteString("(.*/)?")
+                                       i += 2 // Skip the second * and the /
+                               } else {
+                                       // ** at end or before non-slash 
matches any characters
+                                       result.WriteString(".*")
+                                       i++ // Skip the second *
+                               }
+                       } else {
+                               result.WriteString("[^/]*")
+                       }
+               case '?':
+                       result.WriteString(".")
+               case '[':
+                       // Character class - find the closing bracket
+                       j := i + 1
+                       if j < len(pattern) && pattern[j] == '!' {

Review Comment:
   Done, resolved the inconsistency and updated the comment



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to