Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/21246#discussion_r187837123
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
---
@@ -0,0 +1,569 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.commons.codec.digest.DigestUtils
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.expressions.MaskExpressionsUtils._
+import org.apache.spark.sql.catalyst.expressions.MaskLike._
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext,
CodeGenerator, ExprCode}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+
+trait MaskLike {
+ def upper: String
+ def lower: String
+ def digit: String
+
+ protected lazy val upperReplacement: Int = getReplacementChar(upper,
defaultMaskedUppercase)
+ protected lazy val lowerReplacement: Int = getReplacementChar(lower,
defaultMaskedLowercase)
+ protected lazy val digitReplacement: Int = getReplacementChar(digit,
defaultMaskedDigit)
+
+ protected val maskUtilsClassName: String =
classOf[MaskExpressionsUtils].getName
+
+ def inputStringLengthCode(inputString: String, length: String): String =
{
+ s"${CodeGenerator.JAVA_INT} $length = $inputString.codePointCount(0,
$inputString.length());"
+ }
+
+ def appendMaskedToStringBuilderCode(
+ ctx: CodegenContext,
+ sb: String,
+ inputString: String,
+ offset: String,
+ numChars: String): String = {
+ val i = ctx.freshName("i")
+ val codePoint = ctx.freshName("codePoint")
+ s"""
+ |for (${CodeGenerator.JAVA_INT} $i = 0; $i < $numChars; $i++) {
+ | ${CodeGenerator.JAVA_INT} $codePoint =
$inputString.codePointAt($offset);
+ | $sb.appendCodePoint($maskUtilsClassName.transformChar($codePoint,
+ | $upperReplacement, $lowerReplacement,
+ | $digitReplacement, $defaultMaskedOther));
+ | $offset += Character.charCount($codePoint);
+ |}
+ """.stripMargin
+ }
+
+ def appendUnchangedToStringBuilderCode(
+ ctx: CodegenContext,
+ sb: String,
+ inputString: String,
+ offset: String,
+ numChars: String): String = {
+ val i = ctx.freshName("i")
+ val codePoint = ctx.freshName("codePoint")
+ s"""
+ |for (${CodeGenerator.JAVA_INT} $i = 0; $i < $numChars; $i++) {
+ | ${CodeGenerator.JAVA_INT} $codePoint =
$inputString.codePointAt($offset);
+ | $sb.appendCodePoint($codePoint);
+ | $offset += Character.charCount($codePoint);
+ |}
+ """.stripMargin
+ }
+
+ def appendMaskedToStringBuffer(
+ sb: StringBuffer,
+ inputString: String,
+ startOffset: Int,
+ numChars: Int): Int = {
+ var offset = startOffset
+ (1 to numChars) foreach { _ =>
+ val codePoint = inputString.codePointAt(offset)
+ sb.appendCodePoint(transformChar(
+ codePoint,
+ upperReplacement,
+ lowerReplacement,
+ digitReplacement,
+ defaultMaskedOther))
+ offset += Character.charCount(codePoint)
+ }
+ offset
+ }
+
+ def appendUnchangedToStringBuffer(
+ sb: StringBuffer,
+ inputString: String,
+ startOffset: Int,
+ numChars: Int): Int = {
+ var offset = startOffset
+ (1 to numChars) foreach { _ =>
+ val codePoint = inputString.codePointAt(offset)
+ sb.appendCodePoint(codePoint)
+ offset += Character.charCount(codePoint)
+ }
+ offset
+ }
+}
+
+trait MaskLikeWithN extends MaskLike {
+ def n: Int
+ protected lazy val charCount: Int = if (n < 0) 0 else n
+}
+
+/**
+ * Utils for mask operations.
+ */
+object MaskLike {
+ val defaultCharCount = 4
+ val defaultMaskedUppercase: Int = 'X'
+ val defaultMaskedLowercase: Int = 'x'
+ val defaultMaskedDigit: Int = 'n'
+ val defaultMaskedOther: Int = MaskExpressionsUtils.UNMASKED_VAL
+
+ def extractCharCount(e: Expression): Int = e match {
+ case Literal(i, IntegerType|NullType) =>
+ if (i == null) defaultCharCount else i.asInstanceOf[Int]
+ case Literal(_, dt) => throw new AnalysisException(s"Expected literal
expression of type " +
+ s"${IntegerType.simpleString}, but got literal of
${dt.simpleString}")
+ case _ => defaultCharCount
--- End diff --
This causes unexpected behavior for users?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]