JoshRosen commented on code in PR #46894:
URL: https://github.com/apache/spark/pull/46894#discussion_r1630217904


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala:
##########
@@ -63,22 +63,26 @@ object ExternalCatalogUtils {
     bitSet
   }
 
-  def needsEscaping(c: Char): Boolean = {
+  @inline private final def needsEscaping(c: Char): Boolean = {
     c < charToEscape.size() && charToEscape.get(c)
   }
 
   def escapePathName(path: String): String = {
-    val builder = new StringBuilder()
-    path.foreach { c =>
-      if (needsEscaping(c)) {
-        builder.append('%')
-        builder.append(f"${c.asInstanceOf[Int]}%02X")
-      } else {
-        builder.append(c)
+    val firstIndex = path.indexWhere(needsEscaping)
+    if (firstIndex == -1) {
+      path
+    } else {
+      val builder = new StringBuilder(path.substring(0, firstIndex))
+      path.substring(firstIndex).foreach { c =>
+        if (needsEscaping(c)) {
+          builder.append('%')
+          builder.append(f"${c.asInstanceOf[Int]}%02X")
+        } else {
+          builder.append(c)
+        }
       }
+      builder.toString()
     }
-
-    builder.toString()
   }
 
 

Review Comment:
   We can probably apply similar perf. optimizations to the `unescapePathName` 
on the next line below.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala:
##########
@@ -63,22 +63,26 @@ object ExternalCatalogUtils {
     bitSet
   }
 
-  def needsEscaping(c: Char): Boolean = {
+  @inline private final def needsEscaping(c: Char): Boolean = {

Review Comment:
   I wonder whether something like
   
   ```scala
     def escapePathName(path: String): String = {
       var builder: StringBuilder = null
       var i = 0
       while (i < path.length) {
         val c = path.charAt(i)
         if (c < charToEscape.size() && charToEscape.get(c)) {
           // Character needs escaping:
           if (builder eq null) {
             builder = new StringBuilder(path.substring(0, i))
             builder.append(path, 0, i)
           }
           builder.append('%')
           builder.append(f"${c.asInstanceOf[Int]}%02X")
         } else {
           // Character does not need escaping:
           builder.append(c)
         }
         i += 1
       }
       if (builder eq null) {
         path
       } else {
         builder.toString()
       }
     }
   ```
   
   could be faster since it would avoid StringOps function calls for 
`indexWhere` plus function calls in `.foreach`. I think the cost of the builder 
null checks in the loop should be pretty cheap, although I can also appreciate 
how those checks might potentially impact loop unrolling. I think my suggestion 
should wind up being faster than what you have, even if it's not the 
theoretically fastest possible solution.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to