[GitHub] [spark] grundprinzip commented on a diff in pull request #40447: [SPARK-42816][CONNECT] Support Max Message size up to 128MB
grundprinzip commented on code in PR #40447: URL: https://github.com/apache/spark/pull/40447#discussion_r1138163227 ## python/pyspark/sql/connect/client.py: ## @@ -122,6 +122,8 @@ class ChannelBuilder: PARAM_TOKEN = "token" PARAM_USER_ID = "user_id" PARAM_USER_AGENT = "user_agent" +MAX_MESSAGE_LENGTH = 128 * 1024 * 1024 +MAX_METADATA_LENGTH = 16 * 1024 * 1024 Review Comment: No, this is a leftover. Removing. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] navinvishy commented on pull request #38947: [SPARK-41233][SQL][PYTHON] Add `array_prepend` function
navinvishy commented on PR #38947: URL: https://github.com/apache/spark/pull/38947#issuecomment-1471352117 > @navinvishy would you mind addressing wenchen's comments? we can merge it then. I've addressed them. Thanks for checking, @zhengruifeng ! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] navinvishy commented on a diff in pull request #38947: [SPARK-41233][SQL][PYTHON] Add `array_prepend` function
navinvishy commented on code in PR #38947: URL: https://github.com/apache/spark/pull/38947#discussion_r1138152130 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala: ## @@ -1399,6 +1399,151 @@ case class ArrayContains(left: Expression, right: Expression) copy(left = newLeft, right = newRight) } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(array, element) - Add the element at the beginning of the array passed as first + argument. Type of element should be similar to type of the elements of the array. + Null element is also prepended to the array. But if the array passed is NULL + output is NULL +""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd'); + ["d","b","d","c","a"] + > SELECT _FUNC_(array(1, 2, 3, null), null); + [null,1,2,3,null] + > SELECT _FUNC_(CAST(null as Array), 2); + NULL + """, + group = "array_funcs", + since = "3.5.0") +case class ArrayPrepend(left: Expression, right: Expression) + extends BinaryExpression +with ImplicitCastInputTypes +with ComplexTypeMergingExpression +with QueryErrorsBase { + + override def nullable: Boolean = left.nullable + + @transient protected lazy val elementType: DataType = +inputTypes.head.asInstanceOf[ArrayType].elementType + + override def eval(input: InternalRow): Any = { +val value1 = left.eval(input) +if (value1 == null) { + null +} else { + val value2 = right.eval(input) + nullSafeEval(value1, value2) +} + } + override def nullSafeEval(arr: Any, elementData: Any): Any = { +val arrayData = arr.asInstanceOf[ArrayData] +val numberOfElements = arrayData.numElements() + 1 +if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) { + throw QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements) +} +val finalData = new Array[Any](numberOfElements) +finalData.update(0, elementData) +arrayData.foreach(elementType, (i: Int, v: Any) => finalData.update(i + 1, v)) +new GenericArrayData(finalData) + } + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +val leftGen = left.genCode(ctx) +val rightGen = right.genCode(ctx) +val f = (arr: String, value: String) => { + val newArraySize = ctx.freshName("newArraySize") + val newArray = ctx.freshName("newArray") + val i = ctx.freshName("i") + val iPlus1 = s"$i+1" + val zero = "0" + val allocation = CodeGenerator.createArrayData( +newArray, +elementType, +newArraySize, +s" $prettyName failed.") + val assignment = +CodeGenerator.createArrayAssignment(newArray, elementType, arr, iPlus1, i, false) + val newElemAssignment = +CodeGenerator.setArrayElement(newArray, elementType, zero, value, Some(rightGen.isNull)) + s""" + |int $newArraySize = $arr.numElements() + 1; + |$allocation + |$newElemAssignment + |for (int $i = 0; $i < $arr.numElements(); $i ++) { + | $assignment + |} + |${ev.value} = $newArray; + |""".stripMargin +} +val resultCode = f(leftGen.value, rightGen.value) +if(nullable) { + val nullSafeEval = leftGen.code + rightGen.code + ctx.nullSafeExec(nullable, leftGen.isNull) { +s""" + |${ev.isNull} = false; + |${resultCode} + |""".stripMargin + } + ev.copy(code = +code""" +boolean ${ev.isNull} = true; +${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; +$nullSafeEval + """) Review Comment: Changed this, thanks! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] navinvishy commented on a diff in pull request #38947: [SPARK-41233][SQL][PYTHON] Add `array_prepend` function
navinvishy commented on code in PR #38947: URL: https://github.com/apache/spark/pull/38947#discussion_r1138150738 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala: ## @@ -1399,6 +1399,151 @@ case class ArrayContains(left: Expression, right: Expression) copy(left = newLeft, right = newRight) } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(array, element) - Add the element at the beginning of the array passed as first + argument. Type of element should be similar to type of the elements of the array. + Null element is also prepended to the array. But if the array passed is NULL + output is NULL +""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd'); + ["d","b","d","c","a"] + > SELECT _FUNC_(array(1, 2, 3, null), null); + [null,1,2,3,null] + > SELECT _FUNC_(CAST(null as Array), 2); + NULL + """, + group = "array_funcs", + since = "3.5.0") +case class ArrayPrepend(left: Expression, right: Expression) + extends BinaryExpression +with ImplicitCastInputTypes +with ComplexTypeMergingExpression +with QueryErrorsBase { + + override def nullable: Boolean = left.nullable + + @transient protected lazy val elementType: DataType = +inputTypes.head.asInstanceOf[ArrayType].elementType + + override def eval(input: InternalRow): Any = { +val value1 = left.eval(input) +if (value1 == null) { + null +} else { + val value2 = right.eval(input) + nullSafeEval(value1, value2) +} + } + override def nullSafeEval(arr: Any, elementData: Any): Any = { +val arrayData = arr.asInstanceOf[ArrayData] +val numberOfElements = arrayData.numElements() + 1 +if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) { + throw QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements) +} +val finalData = new Array[Any](numberOfElements) +finalData.update(0, elementData) +arrayData.foreach(elementType, (i: Int, v: Any) => finalData.update(i + 1, v)) +new GenericArrayData(finalData) + } + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +val leftGen = left.genCode(ctx) +val rightGen = right.genCode(ctx) +val f = (arr: String, value: String) => { + val newArraySize = ctx.freshName("newArraySize") + val newArray = ctx.freshName("newArray") + val i = ctx.freshName("i") + val iPlus1 = s"$i+1" + val zero = "0" + val allocation = CodeGenerator.createArrayData( +newArray, +elementType, +newArraySize, Review Comment: Yes, that wasn't really necessary. I've removed it, thanks! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] zhouyejoe commented on pull request #40412: [SPARK-42784] should still create subDir when the number of subDir in merge dir is less than conf
zhouyejoe commented on PR #40412: URL: https://github.com/apache/spark/pull/40412#issuecomment-1471342162 Thanks for creating the PR. Will review ASAP @Stove-hust -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun closed pull request #40453: [SPARK-42820][BUILD] Update ORC to 1.8.3
dongjoon-hyun closed pull request #40453: [SPARK-42820][BUILD] Update ORC to 1.8.3 URL: https://github.com/apache/spark/pull/40453 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on pull request #40453: [SPARK-42820][BUILD] Update ORC to 1.8.3
dongjoon-hyun commented on PR #40453: URL: https://github.com/apache/spark/pull/40453#issuecomment-1471315962 Let me merge this~ Merged to master/3.4. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on pull request #40453: [SPARK-42820][BUILD] Update ORC to 1.8.3
dongjoon-hyun commented on PR #40453: URL: https://github.com/apache/spark/pull/40453#issuecomment-1471314521 It seems that there is some GitHub Action setting issue on William side. Actually, I was the release manager of Apache ORC 1.8.3 and tested this here in my repo. - https://github.com/dongjoon-hyun/spark/pull/13 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you commented on a diff in pull request #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut expression
ulysses-you commented on code in PR #40446: URL: https://github.com/apache/spark/pull/40446#discussion_r1138120937 ## sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala: ## @@ -864,6 +864,14 @@ object SQLConf { .checkValue(_ >= 0, "The maximum must not be negative") .createWithDefault(100) + val SUBEXPRESSION_ELIMINATION_SKIP_FOR_SHORTCUT_EXPR = +buildConf("spark.sql.subexpressionElimination.skipForShortcutExpr") + .internal() + .doc("When true, shortcut eliminate subexpression with `AND`, `OR`.") Review Comment: sure, thank you @viirya -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] zhengruifeng commented on pull request #40432: [SPARK-42800][CONNECT][PYTHON][ML] Implement ml function `{array_to_vector, vector_to_array}`
zhengruifeng commented on PR #40432: URL: https://github.com/apache/spark/pull/40432#issuecomment-1471307277 @WeichenXu123 not ready. `sql slow` failed with message related to `mllib-common`: ``` [error] /home/runner/work/spark/spark/mllib/common/src/test/scala/org/apache/spark/ml/attribute/AttributeGroupSuite.scala:35:11: exception during macro expansion: [error] java.util.MissingResourceException: Can't find bundle for base name org.scalactic.ScalacticBundle, locale en [error] at java.util.ResourceBundle.throwMissingResourceException(ResourceBundle.java:1581) [error] at java.util.ResourceBundle.getBundleImpl(ResourceBundle.java:1396) [error] at java.util.ResourceBundle.getBundle(ResourceBundle.java:782) [error] at org.scalactic.Resources$.resourceBundle$lzycompute(Resources.scala:8) [error] at org.scalactic.Resources$.resourceBundle(Resources.scala:8) [error] at org.scalactic.Resources$.pleaseDefineScalacticFillFilePathnameEnvVar(Resources.scala:256) [error] at org.scalactic.source.PositionMacro$PositionMacroImpl.apply(PositionMacro.scala:65) [error] at org.scalactic.source.PositionMacro$.genPosition(PositionMacro.scala:85) [error] Caused by: java.io.IOException: Stream closed [error] at java.util.zip.InflaterInputStream.ensureOpen(InflaterInputStream.java:67) ``` the current test seems fine, but let's wait for the CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you commented on a diff in pull request #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut expression
ulysses-you commented on code in PR #40446: URL: https://github.com/apache/spark/pull/40446#discussion_r1138102904 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala: ## @@ -128,10 +131,23 @@ class EquivalentExpressions { // There are some special expressions that we should not recurse into all of its children. // 1. CodegenFallback: it's children will not be used to generate code (call eval() instead) // 2. ConditionalExpression: use its children that will always be evaluated. - private def childrenToRecurse(expr: Expression): Seq[Expression] = expr match { -case _: CodegenFallback => Nil -case c: ConditionalExpression => c.alwaysEvaluatedInputs -case other => other.children + private def childrenToRecurse(expr: Expression): Seq[Expression] = { +val alwaysEvaluated = expr match { + case _: CodegenFallback => Nil + case c: ConditionalExpression => c.alwaysEvaluatedInputs + case other => other.children +} +if (shortcut) { + // The subexpression may not need to eval even if it appears more than once. + // e.g., `if(or(a, and(b, b)))`, the expression `b` would be skipped if `a` is true. + alwaysEvaluated.map { +case and: And => and.left +case or: Or => or.left +case other => other + } Review Comment: It is why we add a new config with disabled by default. We can not decide which subexpression would be evaluated before running. When enable this config, it assumes that the left child is a shotcut, then the right child can be skipped whatever it contains common subexpression. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] WeichenXu123 commented on pull request #40432: [SPARK-42800][CONNECT][PYTHON][ML] Implement ml function `{array_to_vector, vector_to_array}`
WeichenXu123 commented on PR #40432: URL: https://github.com/apache/spark/pull/40432#issuecomment-1471290106 Is it ready to merge ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you commented on a diff in pull request #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut expression
ulysses-you commented on code in PR #40446: URL: https://github.com/apache/spark/pull/40446#discussion_r1138102904 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala: ## @@ -128,10 +131,23 @@ class EquivalentExpressions { // There are some special expressions that we should not recurse into all of its children. // 1. CodegenFallback: it's children will not be used to generate code (call eval() instead) // 2. ConditionalExpression: use its children that will always be evaluated. - private def childrenToRecurse(expr: Expression): Seq[Expression] = expr match { -case _: CodegenFallback => Nil -case c: ConditionalExpression => c.alwaysEvaluatedInputs -case other => other.children + private def childrenToRecurse(expr: Expression): Seq[Expression] = { +val alwaysEvaluated = expr match { + case _: CodegenFallback => Nil + case c: ConditionalExpression => c.alwaysEvaluatedInputs + case other => other.children +} +if (shortcut) { + // The subexpression may not need to eval even if it appears more than once. + // e.g., `if(or(a, and(b, b)))`, the expression `b` would be skipped if `a` is true. + alwaysEvaluated.map { +case and: And => and.left +case or: Or => or.left +case other => other + } Review Comment: It is why we add a new config with disabled by default. We can not decide which subexpression would be evaluated before running. When enable this config, it assumes that the left child is a shotcut, then the righ child can be skipped whatever it contains common subexpression. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] shuwang21 commented on pull request #40448: [SPARK-42817][CORE] Logging the shuffle service name once in ApplicationMaster
shuwang21 commented on PR #40448: URL: https://github.com/apache/spark/pull/40448#issuecomment-1471288187 LGTM. thanks! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] viirya commented on a diff in pull request #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut expression
viirya commented on code in PR #40446: URL: https://github.com/apache/spark/pull/40446#discussion_r1138097640 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala: ## @@ -128,10 +131,23 @@ class EquivalentExpressions { // There are some special expressions that we should not recurse into all of its children. // 1. CodegenFallback: it's children will not be used to generate code (call eval() instead) // 2. ConditionalExpression: use its children that will always be evaluated. - private def childrenToRecurse(expr: Expression): Seq[Expression] = expr match { -case _: CodegenFallback => Nil -case c: ConditionalExpression => c.alwaysEvaluatedInputs -case other => other.children + private def childrenToRecurse(expr: Expression): Seq[Expression] = { +val alwaysEvaluated = expr match { + case _: CodegenFallback => Nil + case c: ConditionalExpression => c.alwaysEvaluatedInputs + case other => other.children +} +if (shortcut) { + // The subexpression may not need to eval even if it appears more than once. + // e.g., `if(or(a, and(b, b)))`, the expression `b` would be skipped if `a` is true. + alwaysEvaluated.map { +case and: And => and.left +case or: Or => or.left +case other => other + } Review Comment: Hmm, if other expression (e.g. `or.right`) is not added into recursing list, how can we look into if it needs to be eliminated? If `or.left` is false, it will be evaluated, isn't? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] viirya commented on a diff in pull request #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut expression
viirya commented on code in PR #40446: URL: https://github.com/apache/spark/pull/40446#discussion_r1138095296 ## sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala: ## @@ -864,6 +864,14 @@ object SQLConf { .checkValue(_ >= 0, "The maximum must not be negative") .createWithDefault(100) + val SUBEXPRESSION_ELIMINATION_SKIP_FOR_SHORTCUT_EXPR = +buildConf("spark.sql.subexpressionElimination.skipForShortcutExpr") + .internal() + .doc("When true, shortcut eliminate subexpression with `AND`, `OR`.") Review Comment: The description is very unclear. Can you add some more? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] LuciferYang commented on pull request #40452: [MINOR] Add comments of `xercesImpl` upgrade precautions in `pom.xml`
LuciferYang commented on PR #40452: URL: https://github.com/apache/spark/pull/40452#issuecomment-1471278961 Thanks @dongjoon-hyun @yaooqinn -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun closed pull request #40452: [MINOR] Add comments of `xercesImpl` upgrade precautions in `pom.xml`
dongjoon-hyun closed pull request #40452: [MINOR] Add comments of `xercesImpl` upgrade precautions in `pom.xml` URL: https://github.com/apache/spark/pull/40452 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] LuciferYang commented on a diff in pull request #40452: [MINOR] Add comments of `xercesImpl` upgrade precautions in `pom.xml`
LuciferYang commented on code in PR #40452: URL: https://github.com/apache/spark/pull/40452#discussion_r1138092261 ## pom.xml: ## @@ -1426,6 +1426,7 @@ test + Review Comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] LuciferYang commented on pull request #40389: [SPARK-42767][CONNECT][TESTS] Add a precondition to start connect server fallback with `in-memory` and auto ignored some tests strongly d
LuciferYang commented on PR #40389: URL: https://github.com/apache/spark/pull/40389#issuecomment-1471275097 Thanks @HyukjinKwon and @Hisoka-X -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] yaooqinn commented on pull request #40453: [SPARK-42820][BUILD] Update ORC to 1.8.3
yaooqinn commented on PR #40453: URL: https://github.com/apache/spark/pull/40453#issuecomment-1471274601 LGTM, thanks @williamhyun @dongjoon-hyun -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] HyukjinKwon closed pull request #40389: [SPARK-42767][CONNECT][TESTS] Add a precondition to start connect server fallback with `in-memory` and auto ignored some tests strongly depend
HyukjinKwon closed pull request #40389: [SPARK-42767][CONNECT][TESTS] Add a precondition to start connect server fallback with `in-memory` and auto ignored some tests strongly depend on hive URL: https://github.com/apache/spark/pull/40389 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] HyukjinKwon commented on pull request #40389: [SPARK-42767][CONNECT][TESTS] Add a precondition to start connect server fallback with `in-memory` and auto ignored some tests strongly d
HyukjinKwon commented on PR #40389: URL: https://github.com/apache/spark/pull/40389#issuecomment-1471270622 Merged to master and branch-3.4. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on pull request #40448: [SPARK-42817][CORE] Logging the shuffle service name once in ApplicationMaster
dongjoon-hyun commented on PR #40448: URL: https://github.com/apache/spark/pull/40448#issuecomment-1471270199 cc @mridulm -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] HyukjinKwon commented on pull request #39239: [SPARK-41730][PYTHON] Set tz to UTC while converting of timestamps to python's datetime
HyukjinKwon commented on PR #39239: URL: https://github.com/apache/spark/pull/39239#issuecomment-1471268820 Yup, I meant that most of cases work except few cases that can happen because of timezone. we're on the same page. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #40448: [SPARK-42817][CORE] Logging the shuffle service name once in ApplicationMaster
dongjoon-hyun commented on code in PR #40448: URL: https://github.com/apache/spark/pull/40448#discussion_r1138090342 ## resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala: ## @@ -498,7 +498,10 @@ private[spark] class ApplicationMaster( // that when the driver sends an initial executor request (e.g. after an AM restart), // the allocator is ready to service requests. rpcEnv.setupEndpoint("YarnAM", new AMEndpoint(rpcEnv, driverRef)) - +if (_sparkConf.get(SHUFFLE_SERVICE_ENABLED)) { + logInfo(s"Initializing service data for shuffle service using name '" + Review Comment: nit. `s"` -> `"`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #40452: [MINOR] Add comments of `xercesImpl` upgrade precautions in `pom.xml`
dongjoon-hyun commented on code in PR #40452: URL: https://github.com/apache/spark/pull/40452#discussion_r1138089863 ## pom.xml: ## @@ -1426,6 +1426,7 @@ test + Review Comment: Thank you, @LuciferYang . Could you split this into two lines? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] zhengruifeng commented on pull request #40432: [SPARK-42800][CONNECT][PYTHON][ML] Implement ml function `{array_to_vector, vector_to_array}`
zhengruifeng commented on PR #40432: URL: https://github.com/apache/spark/pull/40432#issuecomment-1471248695 `sql - slow` failed, not sure whether it is related, let me investigate it first -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you commented on a diff in pull request #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut conditional expression
ulysses-you commented on code in PR #40446: URL: https://github.com/apache/spark/pull/40446#discussion_r1138066754 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala: ## @@ -130,7 +133,19 @@ class EquivalentExpressions { // 2. ConditionalExpression: use its children that will always be evaluated. private def childrenToRecurse(expr: Expression): Seq[Expression] = expr match { case _: CodegenFallback => Nil -case c: ConditionalExpression => c.alwaysEvaluatedInputs +case c: ConditionalExpression => + if (shortcut) { +// The subexpression in conditional expression may not need to eval even if it appears Review Comment: make sense, addressed ! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] cloud-fan commented on a diff in pull request #40116: [SPARK-41391][SQL] The output column name of groupBy.agg(count_distinct) is incorrect
cloud-fan commented on code in PR #40116: URL: https://github.com/apache/spark/pull/40116#discussion_r1138054456 ## sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala: ## @@ -40,12 +40,15 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits { */ implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { - new ColumnName(sc.s(args: _*)) + if (sc.parts.length == 1 && sc.parts.contains("*")) { Review Comment: what does this change fix? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] cloud-fan commented on pull request #40116: [SPARK-41391][SQL] The output column name of groupBy.agg(count_distinct) is incorrect
cloud-fan commented on PR #40116: URL: https://github.com/apache/spark/pull/40116#issuecomment-1471235372 The single quote indicates that the expression is unresolved, I think it doesn't matter here. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] zhengruifeng commented on pull request #40451: [SPARK-42818][CONNECT][PYTHON][FOLLOWUP] Add versionchanged
zhengruifeng commented on PR #40451: URL: https://github.com/apache/spark/pull/40451#issuecomment-1471228184 merged to master/branch-3.4 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] zhengruifeng closed pull request #40451: [SPARK-42818][CONNECT][PYTHON][FOLLOWUP] Add versionchanged
zhengruifeng closed pull request #40451: [SPARK-42818][CONNECT][PYTHON][FOLLOWUP] Add versionchanged URL: https://github.com/apache/spark/pull/40451 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] beliefer commented on pull request #40396: [SPARK-42772][SQL] Change the default value of JDBC options about push down to true
beliefer commented on PR #40396: URL: https://github.com/apache/spark/pull/40396#issuecomment-1471226525 @dongjoon-hyun @cloud-fan @huaxingao @sadikovi Thank you. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] cloud-fan commented on a diff in pull request #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut conditional expression
cloud-fan commented on code in PR #40446: URL: https://github.com/apache/spark/pull/40446#discussion_r1138040356 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala: ## @@ -130,7 +133,19 @@ class EquivalentExpressions { // 2. ConditionalExpression: use its children that will always be evaluated. private def childrenToRecurse(expr: Expression): Seq[Expression] = expr match { case _: CodegenFallback => Nil -case c: ConditionalExpression => c.alwaysEvaluatedInputs +case c: ConditionalExpression => + if (shortcut) { +// The subexpression in conditional expression may not need to eval even if it appears Review Comment: not likely, but possible, `select or(a, and(b, b))` is also valid. BTW, I think we should not enable this new change by default, as it may lead to perf regression. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] panbingkun opened a new pull request, #40454: [SPARK-42821][SQL] Remove unused parameters in splitFiles methods
panbingkun opened a new pull request, #40454: URL: https://github.com/apache/spark/pull/40454 ### What changes were proposed in this pull request? The pr aims to remove unused parameters in PartitionedFileUtil.splitFiles methods ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] zhengruifeng commented on pull request #38947: [SPARK-41233][SQL][PYTHON] Add `array_prepend` function
zhengruifeng commented on PR #38947: URL: https://github.com/apache/spark/pull/38947#issuecomment-1471205562 @navinvishy would you mind addressing wenchen's comments? we can merge it then. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] zhengruifeng commented on pull request #40450: [SPARK-42818][CONNECT][PYTHON] Implement DataFrameReader/Writer.jdbc
zhengruifeng commented on PR #40450: URL: https://github.com/apache/spark/pull/40450#issuecomment-1471199868 Late LGTM -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] williamhyun opened a new pull request, #40453: [SPARK-42820][BUILD] Update ORC to 1.8.3
williamhyun opened a new pull request, #40453: URL: https://github.com/apache/spark/pull/40453 ### What changes were proposed in this pull request? This PR aims to update ORC to 1.8.3. ### Why are the changes needed? This will bring the following bug fixes. - https://orc.apache.org/news/2023/03/15/ORC-1.8.3/ ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you commented on a diff in pull request #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut conditional expression
ulysses-you commented on code in PR #40446: URL: https://github.com/apache/spark/pull/40446#discussion_r1138020573 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala: ## @@ -130,7 +133,19 @@ class EquivalentExpressions { // 2. ConditionalExpression: use its children that will always be evaluated. private def childrenToRecurse(expr: Expression): Seq[Expression] = expr match { case _: CodegenFallback => Nil -case c: ConditionalExpression => c.alwaysEvaluatedInputs +case c: ConditionalExpression => + if (shortcut) { +// The subexpression in conditional expression may not need to eval even if it appears Review Comment: IIUC, CSE only supports project and aggregate. `Predicate` seems not likely appear without conditional expr. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] zhengruifeng commented on a diff in pull request #40432: [SPARK-42800][CONNECT][PYTHON][ML] Implement ml function `{array_to_vector, vector_to_array}`
zhengruifeng commented on code in PR #40432: URL: https://github.com/apache/spark/pull/40432#discussion_r1138017400 ## dev/sparktestsupport/modules.py: ## @@ -781,6 +740,57 @@ def __hash__(self): ], ) + +pyspark_connect = Module( +name="pyspark-connect", +dependencies=[pyspark_sql, pyspark_ml, connect], +source_file_regexes=[ +"python/pyspark/sql/connect", +"python/pyspark/ml/connect", Review Comment: @HyukjinKwon I made an additional change, make `pyspark-connect` depends on `pyspark_ml` and then move ml tests here. since I think it maybe not a good idea to make `pyspark_ml` depends on `connect` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] cloud-fan commented on a diff in pull request #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut conditional expression
cloud-fan commented on code in PR #40446: URL: https://github.com/apache/spark/pull/40446#discussion_r1138016464 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala: ## @@ -130,7 +133,19 @@ class EquivalentExpressions { // 2. ConditionalExpression: use its children that will always be evaluated. private def childrenToRecurse(expr: Expression): Seq[Expression] = expr match { case _: CodegenFallback => Nil -case c: ConditionalExpression => c.alwaysEvaluatedInputs +case c: ConditionalExpression => + if (shortcut) { +// The subexpression in conditional expression may not need to eval even if it appears Review Comment: why only in conditional expression? `where or(a, and(b, b))` has the same problem, right? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] zhengruifeng commented on a diff in pull request #40432: [SPARK-42800][CONNECT][PYTHON][ML] Implement ml function `{array_to_vector, vector_to_array}`
zhengruifeng commented on code in PR #40432: URL: https://github.com/apache/spark/pull/40432#discussion_r1138015354 ## python/pyspark/ml/connect/functions.py: ## @@ -0,0 +1,76 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from pyspark.sql.connect.utils import check_dependencies + +check_dependencies(__name__) + +from pyspark.ml import functions as PyMLFunctions + +from pyspark.sql.connect.column import Column +from pyspark.sql.connect.functions import _invoke_function, _to_col, lit + + +def vector_to_array(col: Column, dtype: str = "float64") -> Column: +return _invoke_function("vector_to_array", _to_col(col), lit(dtype)) + + +vector_to_array.__doc__ = PyMLFunctions.vector_to_array.__doc__ + + +def array_to_vector(col: Column) -> Column: +return _invoke_function("array_to_vector", _to_col(col)) + + +array_to_vector.__doc__ = PyMLFunctions.array_to_vector.__doc__ + + +def _test() -> None: +import sys +import doctest +from pyspark.sql import SparkSession as PySparkSession +import pyspark.ml.connect.functions + +globs = pyspark.ml.connect.functions.__dict__.copy() + +# TODO: split vector_to_array doctest since it includes .mllib vectors Review Comment: @HyukjinKwon I guess `test_connect_function.py` is still needed, since we can not enable the doctests for now -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] LuciferYang commented on pull request #40431: [SPARK-42799][BUILD] Update SBT build `xercesImpl` version to match with `pom.xml`
LuciferYang commented on PR #40431: URL: https://github.com/apache/spark/pull/40431#issuecomment-1471187025 https://github.com/apache/spark/pull/40452/files : I added a comment in `pom.xml` to prevent us from forgetting this -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] zhengruifeng commented on a diff in pull request #40432: [SPARK-42800][CONNECT][PYTHON][ML] Implement ml function `{array_to_vector, vector_to_array}`
zhengruifeng commented on code in PR #40432: URL: https://github.com/apache/spark/pull/40432#discussion_r1138014700 ## dev/sparktestsupport/modules.py: ## @@ -655,6 +655,7 @@ def __hash__(self): "pyspark.ml.tests.test_wrapper", "pyspark.ml.torch.tests.test_distributor", "pyspark.ml.torch.tests.test_log_communication", +"pyspark.ml.tests.connect.test_connect_function", Review Comment: good catch! I am thinking whether we should put the tests into `pyspark-connect` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] LuciferYang opened a new pull request, #40452: [MINOR] Add comments of `xercesImpl` upgrade precautions in `pom.xml`
LuciferYang opened a new pull request, #40452: URL: https://github.com/apache/spark/pull/40452 ### What changes were proposed in this pull request? This pr just add comments of `xercesImpl` upgrade precautions in `pom.xml`. ### Why are the changes needed? Add comments to remind developers that `xercesImpl` should update versions in both `pom.xml` and `SparkBuild.scala` at the same time. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] cloud-fan commented on a diff in pull request #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut conditional expression
cloud-fan commented on code in PR #40446: URL: https://github.com/apache/spark/pull/40446#discussion_r1138011206 ## sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala: ## @@ -864,6 +864,15 @@ object SQLConf { .checkValue(_ >= 0, "The maximum must not be negative") .createWithDefault(100) + val SUBEXPRESSION_ELIMINATION_SHORTCUT_ENABLE = +buildConf("spark.sql.subexpressionElimination.shortcut.enabled") Review Comment: ```suggestion buildConf("spark.sql.subexpressionElimination.skipForShortcutExpr") ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #40447: [SPARK-42816][CONNECT] Support Max Message size up to 128MB
amaliujia commented on code in PR #40447: URL: https://github.com/apache/spark/pull/40447#discussion_r1138008293 ## connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala: ## @@ -47,6 +47,13 @@ object Connect { .bytesConf(ByteUnit.MiB) .createWithDefaultString("4m") + val CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE = +ConfigBuilder("spark.connect.grpc.maxInboundMessageSize") + .doc("Set the default inbound message size for the GRPC requests.") + .version("3.4.0") + .bytesConf(ByteUnit.MiB) Review Comment: Should be `ByteUnit.BYTE`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #40447: [SPARK-42816][CONNECT] Support Max Message size up to 128MB
amaliujia commented on code in PR #40447: URL: https://github.com/apache/spark/pull/40447#discussion_r1138007108 ## connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala: ## @@ -47,6 +47,13 @@ object Connect { .bytesConf(ByteUnit.MiB) .createWithDefaultString("4m") + val CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE = +ConfigBuilder("spark.connect.grpc.maxInboundMessageSize") Review Comment: +1 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ueshin opened a new pull request, #40451: [SPARK-42818][CONNECT][PYTHON][FOLLOWUP] Add versionchanged
ueshin opened a new pull request, #40451: URL: https://github.com/apache/spark/pull/40451 ### What changes were proposed in this pull request? Follow-up of #40450. Adds `versionchanged` to the docstring. ### Why are the changes needed? The `versionchanged` is missing in the API newly supported in Spark Connect. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] HyukjinKwon closed pull request #40450: [SPARK-42818][CONNECT][PYTHON] Implement DataFrameReader/Writer.jdbc
HyukjinKwon closed pull request #40450: [SPARK-42818][CONNECT][PYTHON] Implement DataFrameReader/Writer.jdbc URL: https://github.com/apache/spark/pull/40450 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] HyukjinKwon commented on pull request #40450: [SPARK-42818][CONNECT][PYTHON] Implement DataFrameReader/Writer.jdbc
HyukjinKwon commented on PR #40450: URL: https://github.com/apache/spark/pull/40450#issuecomment-1471147451 Merged to master and branch-3.4. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you commented on pull request #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut conditional expression
ulysses-you commented on PR #40446: URL: https://github.com/apache/spark/pull/40446#issuecomment-1471123749 cc @viirya @cloud-fan thank you -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] cloud-fan closed pull request #40396: [SPARK-42772][SQL] Change the default value of JDBC options about push down to true
cloud-fan closed pull request #40396: [SPARK-42772][SQL] Change the default value of JDBC options about push down to true URL: https://github.com/apache/spark/pull/40396 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] cloud-fan commented on pull request #40396: [SPARK-42772][SQL] Change the default value of JDBC options about push down to true
cloud-fan commented on PR #40396: URL: https://github.com/apache/spark/pull/40396#issuecomment-1471116456 thanks, merging to master! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #40447: [SPARK-42816][CONNECT] Support Max Message size up to 128MB
dongjoon-hyun commented on code in PR #40447: URL: https://github.com/apache/spark/pull/40447#discussion_r1137915076 ## python/pyspark/sql/connect/client.py: ## @@ -122,6 +122,8 @@ class ChannelBuilder: PARAM_TOKEN = "token" PARAM_USER_ID = "user_id" PARAM_USER_AGENT = "user_agent" +MAX_MESSAGE_LENGTH = 128 * 1024 * 1024 +MAX_METADATA_LENGTH = 16 * 1024 * 1024 Review Comment: This seems to be unused in this PR. Do we need to add this in this `Support Max Message size up to 128MB` PR? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #40447: [SPARK-42816][CONNECT] Support Max Message size up to 128MB
dongjoon-hyun commented on code in PR #40447: URL: https://github.com/apache/spark/pull/40447#discussion_r1137915076 ## python/pyspark/sql/connect/client.py: ## @@ -122,6 +122,8 @@ class ChannelBuilder: PARAM_TOKEN = "token" PARAM_USER_ID = "user_id" PARAM_USER_AGENT = "user_agent" +MAX_MESSAGE_LENGTH = 128 * 1024 * 1024 +MAX_METADATA_LENGTH = 16 * 1024 * 1024 Review Comment: This seems to be unused in this PR. Do we need to add this in this PR? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #40447: [SPARK-42816][CONNECT] Support Max Message size up to 128MB
dongjoon-hyun commented on code in PR #40447: URL: https://github.com/apache/spark/pull/40447#discussion_r1137914674 ## connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala: ## @@ -47,6 +47,13 @@ object Connect { .bytesConf(ByteUnit.MiB) .createWithDefaultString("4m") + val CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE = +ConfigBuilder("spark.connect.grpc.maxInboundMessageSize") Review Comment: +1 for documenting. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] github-actions[bot] commented on pull request #37725: [DO-NOT-MERGE] Exceptions without error classes in SQL golden files
github-actions[bot] commented on PR #37725: URL: https://github.com/apache/spark/pull/37725#issuecomment-1471019944 We're closing this PR because it hasn't been updated in a while. This isn't a judgement on the merit of the PR in any way. It's just a way of keeping the PR queue manageable. If you'd like to revive this PR, please reopen it and ask a committer to remove the Stale tag! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] HyukjinKwon commented on pull request #40448: [SPARK-42817][CORE] Logging the shuffle service name once in ApplicationMaster
HyukjinKwon commented on PR #40448: URL: https://github.com/apache/spark/pull/40448#issuecomment-1471014495 AppVeyor failure (`continuous-integration/appveyor/pr`) should be fine to ignore for now. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #40444: [SPARK-42813][K8S] Print application info when waitAppCompletion is false
dongjoon-hyun commented on code in PR #40444: URL: https://github.com/apache/spark/pull/40444#discussion_r1137906960 ## resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala: ## @@ -95,8 +95,8 @@ private[k8s] class LoggingPodStatusWatcherImpl(conf: KubernetesDriverConf) this.notifyAll() } - override def watchOrStop(sId: String): Boolean = if (conf.get(WAIT_FOR_APP_COMPLETION)) { -logInfo(s"Waiting for application ${conf.appName} with submission ID $sId to finish...") + override def watchOrStop(sId: String): Boolean = { +logInfo(s"Waiting for application ${conf.appId} with submission ID $sId to finish...") Review Comment: Got it. Now, I understand what you meant. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] HyukjinKwon commented on a diff in pull request #40432: [SPARK-42800][CONNECT][PYTHON][ML] Implement ml function `{array_to_vector, vector_to_array}`
HyukjinKwon commented on code in PR #40432: URL: https://github.com/apache/spark/pull/40432#discussion_r1137906670 ## python/pyspark/ml/tests/connect/test_connect_function.py: ## @@ -0,0 +1,113 @@ +# Review Comment: I think you can remove this test - I believe the doctests in `pyspark.ml.connect.functions` cover the same cases. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] HyukjinKwon commented on a diff in pull request #40432: [SPARK-42800][CONNECT][PYTHON][ML] Implement ml function `{array_to_vector, vector_to_array}`
HyukjinKwon commented on code in PR #40432: URL: https://github.com/apache/spark/pull/40432#discussion_r1137906453 ## dev/sparktestsupport/modules.py: ## @@ -655,6 +655,7 @@ def __hash__(self): "pyspark.ml.tests.test_wrapper", "pyspark.ml.torch.tests.test_distributor", "pyspark.ml.torch.tests.test_log_communication", +"pyspark.ml.tests.connect.test_connect_function", Review Comment: Should also add `pyspark.ml.connect.functions` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] HyukjinKwon commented on a diff in pull request #40447: [SPARK-42816] Support Max Message size up to 128MB
HyukjinKwon commented on code in PR #40447: URL: https://github.com/apache/spark/pull/40447#discussion_r1137904888 ## connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala: ## @@ -47,6 +47,13 @@ object Connect { .bytesConf(ByteUnit.MiB) .createWithDefaultString("4m") + val CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE = +ConfigBuilder("spark.connect.grpc.maxInboundMessageSize") Review Comment: If this is a user-facing conf, should probably document at https://github.com/apache/spark/blob/master/docs/configuration.md#spark-connect -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] HyukjinKwon closed pull request #40442: [SPARK-42809][BUILD] Upgrade scala-maven-plugin from 4.8.0 to 4.8.1
HyukjinKwon closed pull request #40442: [SPARK-42809][BUILD] Upgrade scala-maven-plugin from 4.8.0 to 4.8.1 URL: https://github.com/apache/spark/pull/40442 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] HyukjinKwon commented on pull request #40442: [SPARK-42809][BUILD] Upgrade scala-maven-plugin from 4.8.0 to 4.8.1
HyukjinKwon commented on PR #40442: URL: https://github.com/apache/spark/pull/40442#issuecomment-1471000337 Merged to master. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dtenedor commented on pull request #40449: [SPARK-42791][SQL] Create a new golden file test framework for analysis
dtenedor commented on PR #40449: URL: https://github.com/apache/spark/pull/40449#issuecomment-1470971482 @gengliangwang alright I made this change, please look again when you are ready. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ueshin opened a new pull request, #40450: [SPARK-42818][CONNECT][PYTHON] Implement DataFrameReader/Writer.jdbc
ueshin opened a new pull request, #40450: URL: https://github.com/apache/spark/pull/40450 ### What changes were proposed in this pull request? Implements `DataFrameReader/Writer.jdbc`. ### Why are the changes needed? Missing API. ### Does this PR introduce _any_ user-facing change? Yes, `DataFrameReader/Writer.jdbc` will be available. ### How was this patch tested? Added related tests. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] gengliangwang commented on pull request #40449: [SPARK-42791][SQL] Create a new golden file test framework for analysis
gengliangwang commented on PR #40449: URL: https://github.com/apache/spark/pull/40449#issuecomment-1470835260 > I will put the analyzer results in separate files. Sounds great! Thanks for the work! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dtenedor commented on pull request #40449: [SPARK-42791][SQL] Create a new golden file test framework for analysis
dtenedor commented on PR #40449: URL: https://github.com/apache/spark/pull/40449#issuecomment-1470824639 @gengliangwang from past experience we will want to keep the query plans separate from the SQL results, otherwise the SQL results become hard to read. I will put the analyzer results in separate files. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dtenedor commented on pull request #40449: [SPARK-42791][SQL] Create a new golden file test framework for analysis
dtenedor commented on PR #40449: URL: https://github.com/apache/spark/pull/40449#issuecomment-1470823193 @gengliangwang Sure, I was thinking about this too. We can reuse the same input SQL query files if we want, and just generate and test against different analyzer test output files. Let me update the PR to do that and I can ping the thread again. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] rithwik-db closed pull request #40423: [SPARK-41775][PYTHON][FOLLOW-UP] Torch distributor multiple gpus per task
rithwik-db closed pull request #40423: [SPARK-41775][PYTHON][FOLLOW-UP] Torch distributor multiple gpus per task URL: https://github.com/apache/spark/pull/40423 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] rithwik-db commented on pull request #40423: [SPARK-41775][PYTHON][FOLLOW-UP] Torch distributor multiple gpus per task
rithwik-db commented on PR #40423: URL: https://github.com/apache/spark/pull/40423#issuecomment-1470815851 This ticket will be closed for now; related changes may be in a V2 of the TorchDistributor -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] steveloughran commented on pull request #39124: [DON'T MERGE] Test build and test with hadoop 3.3.5-RC2
steveloughran commented on PR #39124: URL: https://github.com/apache/spark/pull/39124#issuecomment-1470813750 got a new RC up to play with...hopefully RC3 will ship. main changes are fixes to some HDFS cases which can trigger NPEs -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] bjornjorgensen commented on pull request #40431: [SPARK-42799][BUILD] Update SBT build `xercesImpl` version to match with `pom.xml`
bjornjorgensen commented on PR #40431: URL: https://github.com/apache/spark/pull/40431#issuecomment-1470803805 CC @panbingkun so you too are aware of this and hopefully don't make the same mistake I did. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] gengliangwang commented on pull request #40449: [SPARK-42791][SQL] Create a new golden file test framework for analysis
gengliangwang commented on PR #40449: URL: https://github.com/apache/spark/pull/40449#issuecomment-1470801546 @dtenedor Since we already have `SQLQueryTestSuite` which has good basic Spark SQL features coverage, shall we combine both? E.g. let `SQLQueryTestSuite` show analyzed plan/optimized plan/execution results for comparison, and port more tests from [zetasql](https://github.com/google/zetasql/tree/master/zetasql/analyzer/testdata) What is the reason for having a new analysis test only? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dtenedor commented on pull request #40449: [SPARK-42791][SQL] Create a new golden file test framework for analysis
dtenedor commented on PR #40449: URL: https://github.com/apache/spark/pull/40449#issuecomment-1470712114 Hi @gengliangwang this should be ready for a first look! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on pull request #39239: [SPARK-41730][PYTHON] Set tz to UTC while converting of timestamps to python's datetime
MaxGekk commented on PR #39239: URL: https://github.com/apache/spark/pull/39239#issuecomment-1470569694 > So the problem here would be implementation detail. @HyukjinKwon I think this is not impl details but a fundamental problem of `datetime`, especially in the corner case of `0001-01-01 00:00:00+00:00`. Since the last one is a physical timestamp in the `+00:00` zone offset, it can be mapped to **local** timestamps >= `0001-01-01 00:00:00` as well as to timestamps < `0001-01-01 00:00:00`, for instance `0001-01-01 16:00:00 B.C.` ( the local date/time before the common (or current) era). And the last one is not acceptable by python's `datetime`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] NarekDW commented on pull request #40422: [SPARK-42803][CORE][SQL][ML] Use getParameterCount function instead of getParameterTypes.length
NarekDW commented on PR #40422: URL: https://github.com/apache/spark/pull/40422#issuecomment-1470557942 > @NarekDW Are there any more similar cases? > > cc @srowen FYI @LuciferYang no, these are all -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a diff in pull request #39239: [SPARK-41730][PYTHON] Set tz to UTC while converting of timestamps to python's datetime
MaxGekk commented on code in PR #39239: URL: https://github.com/apache/spark/pull/39239#discussion_r1137571293 ## python/pyspark/sql/types.py: ## @@ -276,7 +276,18 @@ def toInternal(self, dt: datetime.datetime) -> int: def fromInternal(self, ts: int) -> datetime.datetime: if ts is not None: # using int to avoid precision loss in float -return datetime.datetime.fromtimestamp(ts // 100).replace(microsecond=ts % 100) +return ( +datetime.datetime +# Set the time zone to UTC because the TIMESTAMP type stores timestamps +# as the number of microseconds from the epoch of 1970-01-01T00:00:00.00Z +# in the UTC time zone. +.fromtimestamp(ts // 100, tz=datetime.timezone.utc) +.replace(microsecond=ts % 100) +# Convert to the local time zone and remove the time zone info to +# to have the result type `datetime64[us]` for compatibility. +.astimezone(None) +.replace(tzinfo=None) Review Comment: Let me check this, and add a roundtrip test. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dtenedor opened a new pull request, #40449: [SPARK-42791][SQL] Create a new golden file test framework for analysis
dtenedor opened a new pull request, #40449: URL: https://github.com/apache/spark/pull/40449 ### What changes were proposed in this pull request? This PR creates a new `SQLAnalyzerTestSuite` that consumes input SQL queries from files and then performs analysis and generates the string representation of the analyzed plans. It works much the same way as the existing `SQLQueryTestSuite` except that it produces analyzed plan string instead of query results in the output golden files. ### Why are the changes needed? This framework will help us guard against bugs in future development by showing a clear signal when analyzer updates result in changes to the query plans for a body of test queries. PR authors and reviewers will be able to see the diffs for all changed plans during the review, and any unintentional plan changes will act as a signal for PR authors to adjust their code to prevent the changes from happening. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This test adds a new test suite and initial golden file. The new test suite passes as of this PR. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] otterc commented on pull request #40393: [SPARK-40082] Schedule mergeFinalize when push merge shuffleMapStage retry but no running tasks
otterc commented on PR #40393: URL: https://github.com/apache/spark/pull/40393#issuecomment-1470501033 @akpatnam25 @shuwang21 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] otterc commented on pull request #40448: Logging the shuffle service name once in ApplicationMaster
otterc commented on PR #40448: URL: https://github.com/apache/spark/pull/40448#issuecomment-1470498081 @mridulm @xkrogen @akpatnam25 @shuwang21 Please help review. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] otterc opened a new pull request, #40448: Logging the shuffle service name once in ApplicationMaster
otterc opened a new pull request, #40448: URL: https://github.com/apache/spark/pull/40448 ### What changes were proposed in this pull request? Removed the logging of shuffle service name multiple times in the driver log. It gets logged everytime a new executor is allocated. ### Why are the changes needed? This is needed because currently the driver logs gets polluted by these logs: ``` 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' 22/08/03 20:42:07 INFO ExecutorRunnable: Initializing service data for shuffle service using name 'spark_shuffle_311' ``` ### Does this PR introduce _any_ user-facing change? Yes, the shuffle service name will be just logged once in the driver. ### How was this patch tested? Tested manually since it just changes the logging. With this see this logged in the driver logs: `23/03/15 16:50:54 INFO ApplicationMaster: Initializing service data for shuffle service using name 'spark_shuffle_311'` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] LuciferYang commented on pull request #40395: [SPARK-42770][CONNECT] Add `truncatedTo(ChronoUnit.MICROS)` to make `SQLImplicitsTestSuite` in Java 17 daily test GA task pass
LuciferYang commented on PR #40395: URL: https://github.com/apache/spark/pull/40395#issuecomment-1470433831 https://github.com/apache/spark/actions/runs/4420600519 https://user-images.githubusercontent.com/1475305/225388240-9f85593f-f6d6-47dd-be07-9ab906bf53a8.png;> The latest Java 17 daily test passed. Thanks all ~ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] pan3793 commented on a diff in pull request #40447: [SPARK-42816] Support Max Message size up to 128MB
pan3793 commented on code in PR #40447: URL: https://github.com/apache/spark/pull/40447#discussion_r1137446054 ## connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala: ## @@ -47,6 +47,13 @@ object Connect { .bytesConf(ByteUnit.MiB) .createWithDefaultString("4m") + val CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE = +ConfigBuilder("spark.connect.grpc.maxInboundMessageSize") + .doc("Set the default inbound message size for the GRPC requests.") Review Comment: nit: "GRPC" -> "gRPC" The "default" is misleading, because it's actually a "boundary" or "limitation" -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] jdferreira commented on pull request #40398: [MINOR][DOCS] Update `translate` docblock
jdferreira commented on PR #40398: URL: https://github.com/apache/spark/pull/40398#issuecomment-1470420003 @srowen I haev enabled it, but now I don't know how to progress. Is there a "re-run" button to re-trigger the build? Or do I push an empty commit into this branch? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] pan3793 commented on a diff in pull request #40447: [SPARK-42816] Support Max Message size up to 128MB
pan3793 commented on code in PR #40447: URL: https://github.com/apache/spark/pull/40447#discussion_r1137446054 ## connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala: ## @@ -47,6 +47,13 @@ object Connect { .bytesConf(ByteUnit.MiB) .createWithDefaultString("4m") + val CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE = +ConfigBuilder("spark.connect.grpc.maxInboundMessageSize") + .doc("Set the default inbound message size for the GRPC requests.") Review Comment: nit: "GRPC" -> "gRPC" -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] pan3793 commented on a diff in pull request #40444: [SPARK-42813][K8S] Print application info when waitAppCompletion is false
pan3793 commented on code in PR #40444: URL: https://github.com/apache/spark/pull/40444#discussion_r1137429300 ## resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala: ## @@ -95,8 +95,8 @@ private[k8s] class LoggingPodStatusWatcherImpl(conf: KubernetesDriverConf) this.notifyAll() } - override def watchOrStop(sId: String): Boolean = if (conf.get(WAIT_FOR_APP_COMPLETION)) { -logInfo(s"Waiting for application ${conf.appName} with submission ID $sId to finish...") + override def watchOrStop(sId: String): Boolean = { +logInfo(s"Waiting for application ${conf.appId} with submission ID $sId to finish...") Review Comment: Sorry, I don't get your point, would you please elaborate more about the risk? IMO, this does not break [SPARK-24266](https://issues.apache.org/jira/browse/SPARK-24266). In the whole codebase except for the testing code, only one place invokes `watchOrStop` https://github.com/apache/spark/blob/8860f69455e5a722626194c4797b4b42cccd4510/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala#L183-L205 I claim it related to [SPARK-35174](https://issues.apache.org/jira/browse/SPARK-35174) because after [SPARK-35174](https://issues.apache.org/jira/browse/SPARK-35174), `conf.get(WAIT_FOR_APP_COMPLETION)` is always `true`, then the change just like ```patch - if (true) { - logic1 - } else { - logic2 - } + logic1 ``` Please let me know if I misunderstand your comment. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] grundprinzip opened a new pull request, #40447: [SPARK-42816] Support Max Message size up to 128MB
grundprinzip opened a new pull request, #40447: URL: https://github.com/apache/spark/pull/40447 ### What changes were proposed in this pull request? This change lifts the default message size of 4MB to 128MB and makes it configurable. While 128MB is a "random number" it supports creating DataFrames from reasonably sized local data without failing. ### Why are the changes needed? Usability ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ritikam2 commented on pull request #40116: [SPARK-41391][SQL] The output column name of groupBy.agg(count_distinct) is incorrect
ritikam2 commented on PR #40116: URL: https://github.com/apache/spark/pull/40116#issuecomment-1470374550 Can anyone tell me how I am getting this single quote in count expression. Attaching the picture. This can potentially cause problems down the lance where tree nodes are compared in the transformDownWithPruning where the two nodes are not same because of this single quote https://user-images.githubusercontent.com/13139216/225378952-74ba895b-2c36-407a-ab1c-7ad46b469ae7.png;> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #40396: [SPARK-42772][SQL] Change the default value of JDBC options about push down to true
dongjoon-hyun commented on code in PR #40396: URL: https://github.com/apache/spark/pull/40396#discussion_r1137367307 ## sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala: ## @@ -583,12 +583,16 @@ abstract class JdbcDialect extends Serializable with Logging { * {@link OracleDialect.OracleSQLQueryBuilder} and * {@link MsSqlServerDialect.MsSqlServerSQLQueryBuilder}. */ - def supportsLimit: Boolean = true + def supportsLimit: Boolean = false Review Comment: Oh, got it~ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on pull request #40410: [SPARK-42783][SQL] Infer window group limit should run as late as possible
dongjoon-hyun commented on PR #40410: URL: https://github.com/apache/spark/pull/40410#issuecomment-1470311818 Thank you, @beliefer and @cloud-fan . -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #40358: [SPARK-42733][CONNECT][Followup] Write without path or table
dongjoon-hyun commented on code in PR #40358: URL: https://github.com/apache/spark/pull/40358#discussion_r1137339742 ## connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala: ## @@ -175,6 +176,26 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper { } } + test("write without table or path") { +// Should receive no error to write noop +spark.range(10).write.format("noop").mode("append").save() + } + + test("write jdbc") { Review Comment: Yes, it's resolved for now. Thank you for checking, @zhenlineo ! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #40358: [SPARK-42733][CONNECT][Followup] Write without path or table
dongjoon-hyun commented on code in PR #40358: URL: https://github.com/apache/spark/pull/40358#discussion_r1137339742 ## connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala: ## @@ -175,6 +176,26 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper { } } + test("write without table or path") { +// Should receive no error to write noop +spark.range(10).write.format("noop").mode("append").save() + } + + test("write jdbc") { Review Comment: Yes, it's triaged for now. Thank you for checking, @zhenlineo ! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #40444: [SPARK-42813][K8S] Print application info when waitAppCompletion is false
dongjoon-hyun commented on code in PR #40444: URL: https://github.com/apache/spark/pull/40444#discussion_r1137331510 ## resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala: ## @@ -95,8 +95,8 @@ private[k8s] class LoggingPodStatusWatcherImpl(conf: KubernetesDriverConf) this.notifyAll() } - override def watchOrStop(sId: String): Boolean = if (conf.get(WAIT_FOR_APP_COMPLETION)) { -logInfo(s"Waiting for application ${conf.appName} with submission ID $sId to finish...") + override def watchOrStop(sId: String): Boolean = { +logInfo(s"Waiting for application ${conf.appId} with submission ID $sId to finish...") Review Comment: This part is SPARK-24266 instead of SPARK-35174. This change looks risky to me. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #40444: [SPARK-42813][K8S] Print application info when waitAppCompletion is false
dongjoon-hyun commented on code in PR #40444: URL: https://github.com/apache/spark/pull/40444#discussion_r1137331510 ## resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala: ## @@ -95,8 +95,8 @@ private[k8s] class LoggingPodStatusWatcherImpl(conf: KubernetesDriverConf) this.notifyAll() } - override def watchOrStop(sId: String): Boolean = if (conf.get(WAIT_FOR_APP_COMPLETION)) { -logInfo(s"Waiting for application ${conf.appName} with submission ID $sId to finish...") + override def watchOrStop(sId: String): Boolean = { +logInfo(s"Waiting for application ${conf.appId} with submission ID $sId to finish...") Review Comment: This part is SPARK-24266 instead of `SPARK-35174`. This change looks risky to me. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you opened a new pull request, #40446: [SPARK-42815][SQL] Subexpression elimination support shortcut conditional expression
ulysses-you opened a new pull request, #40446: URL: https://github.com/apache/spark/pull/40446 ### What changes were proposed in this pull request? Add a new config to shortcut subexpression elimination for conditional expression. The subexpression in conditional expression may not need to eval even if it appears more than once. e.g., `if(or(a, and(b, b)))`, the expression `b` would be skipped if `a` is true. ### Why are the changes needed? avoid eval unnecessary expression. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? add test -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] LuciferYang opened a new pull request, #40445: [SPARK-42814][BUILD] Upgrade some maven plugins
LuciferYang opened a new pull request, #40445: URL: https://github.com/apache/spark/pull/40445 ### What changes were proposed in this pull request? This pr aims to upgrade the following maven plugins - maven-enforcer-plugin 3.0.0-M2 -> 3.2.1 - build-helper-maven-plugin 3.2.0 -> 3.3.0 - maven-compiler-plugin 3.10.1 -> 3.11.0 - maven-surefire-plugin 3.0.0-M9 -> 3.0.0 - maven-javadoc-plugin 3.4.1 -> 3.5.0 - maven-deploy-plugin 3.0.0 -> 3.1.0 ### Why are the changes needed? The release notes as follows: - maven-enforcer-plugin - https://github.com/apache/maven-enforcer/releases/tag/enforcer-3.2.1 - https://github.com/apache/maven-enforcer/releases/tag/enforcer-3.1.0 - build-helper-maven-plugin - https://github.com/mojohaus/build-helper-maven-plugin/releases/tag/build-helper-maven-plugin-3.3.0 - maven-compiler-plugin - https://github.com/apache/maven-compiler-plugin/releases/tag/maven-compiler-plugin-3.11.0 - maven-surefire-plugin - https://github.com/apache/maven-surefire/releases/tag/surefire-3.0.0 - maven-javadoc-plugin - https://github.com/apache/maven-javadoc-plugin/releases/tag/maven-javadoc-plugin-3.5.0 - maven-deploy-plugin - https://github.com/apache/maven-deploy-plugin/releases/tag/maven-deploy-plugin-3.1.0 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GitHub Actions -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] pan3793 commented on pull request #40444: [SPARK-42813][K8S] Print application info when waitAppCompletion is false
pan3793 commented on PR #40444: URL: https://github.com/apache/spark/pull/40444#issuecomment-1470098995 cc @slothspot @dongjoon-hyun @yaooqinn, please take a look when you get time, thanks. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org