cloud-fan commented on code in PR #52334:
URL: https://github.com/apache/spark/pull/52334#discussion_r2425248128


##########
sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/PositionMapper.scala:
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser
+
+/**
+ * Case class representing a text substitution.
+ */
+case class Substitution(start: Int, end: Int, replacement: String)
+
+/**
+ * Represents a range mapping from substituted positions to original 
positions. This is used for
+ * efficient O(k) position mapping where k = number of substitutions.
+ *
+ * @param substitutedStart
+ *   Start position in substituted text (inclusive)
+ * @param substitutedEnd
+ *   End position in substituted text (exclusive)
+ * @param originalStart
+ *   Start position in original text
+ * @param offsetDelta
+ *   Offset difference between original and substituted positions
+ */
+case class PositionRange(
+    substitutedStart: Int,
+    substitutedEnd: Int,
+    originalStart: Int,
+    offsetDelta: Int)
+
+/**
+ * Maps positions between original SQL text and substituted SQL text using 
sparse ranges.
+ *
+ * This implementation uses O(k) space and O(log k) lookup time where k = 
number of substitutions,
+ * instead of the previous O(n) space where n = SQL text length.
+ *
+ * @param originalText
+ *   The original SQL text with parameter markers
+ * @param substitutedText
+ *   The SQL text after parameter substitution
+ * @param substitutions
+ *   List of substitutions that were applied
+ */
+class PositionMapper(
+    val originalText: String,
+    val substitutedText: String,
+    val substitutions: List[Substitution]) {

Review Comment:
   shall we add an assert to make sure the substitutions have no overlap?



##########
sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/PositionMapper.scala:
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser
+
+/**
+ * Case class representing a text substitution.
+ */
+case class Substitution(start: Int, end: Int, replacement: String)
+
+/**
+ * Represents a range mapping from substituted positions to original 
positions. This is used for
+ * efficient O(k) position mapping where k = number of substitutions.
+ *
+ * @param substitutedStart
+ *   Start position in substituted text (inclusive)
+ * @param substitutedEnd
+ *   End position in substituted text (exclusive)
+ * @param originalStart
+ *   Start position in original text
+ * @param offsetDelta
+ *   Offset difference between original and substituted positions
+ */
+case class PositionRange(
+    substitutedStart: Int,
+    substitutedEnd: Int,
+    originalStart: Int,
+    offsetDelta: Int)
+
+/**
+ * Maps positions between original SQL text and substituted SQL text using 
sparse ranges.
+ *
+ * This implementation uses O(k) space and O(log k) lookup time where k = 
number of substitutions,
+ * instead of the previous O(n) space where n = SQL text length.
+ *
+ * @param originalText
+ *   The original SQL text with parameter markers
+ * @param substitutedText
+ *   The SQL text after parameter substitution
+ * @param substitutions
+ *   List of substitutions that were applied
+ */
+class PositionMapper(
+    val originalText: String,
+    val substitutedText: String,
+    val substitutions: List[Substitution]) {
+
+  // Build sparse position ranges for efficient lookup
+  private val positionRanges = buildPositionRanges()
+
+  /**
+   * Map a position in the substituted text back to the original text. Uses 
binary search for
+   * O(log k) lookup time.
+   *
+   * @param substitutedPos
+   *   Position in the substituted text
+   * @return
+   *   Position in the original text, or the same position if no mapping exists
+   */
+  def mapToOriginal(substitutedPos: Int): Int = {
+    // Binary search for the range containing this position
+    positionRanges.find(range =>
+      substitutedPos >= range.substitutedStart && substitutedPos < 
range.substitutedEnd) match {
+      case Some(range) =>
+        // Position is within a substitution range
+        range.originalStart
+      case None =>
+        // Position is in an unmapped region - apply cumulative offset
+        val cumulativeOffset = positionRanges
+          .takeWhile(_.substitutedStart <= substitutedPos)
+          .map(_.offsetDelta)
+          .sum
+        substitutedPos + cumulativeOffset
+    }
+  }
+
+  /**
+   * Build sparse position ranges using functional approach. O(k) space 
complexity where k =
+   * number of substitutions.
+   *
+   * @example
+   *   For original "SELECT :name, :age" -> substituted "SELECT 'John', 25":
+   *   - Substitution(7, 12, "'John'") replaces ":name" with "'John'"
+   *   - Substitution(14, 18, "25") replaces ":age" with "25"
+   *
+   * Creates PositionRanges:
+   *   - Range for "'John'": substituted[7,13) -> original[7,12), offset=-1
+   *   - Range for "25": substituted[15,17) -> original[14,18), offset=-3
+   *
+   * This allows mapping any position in "SELECT 'John', 25" back to "SELECT 
:name, :age".
+   */
+  private def buildPositionRanges(): List[PositionRange] = {
+    if (substitutions.isEmpty) {
+      return List.empty
+    }
+
+    val sortedSubstitutions = substitutions.sortBy(_.start)

Review Comment:
   shall we add an assert to make sure the substitutions have no overlap?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to