taiyang-li commented on code in PR #6725:
URL: https://github.com/apache/incubator-gluten/pull/6725#discussion_r1709133968
##########
backends-clickhouse/src/main/scala/org/apache/gluten/vectorized/CHColumnarBatchSerializer.scala:
##########
@@ -54,8 +54,14 @@ private class CHColumnarBatchSerializerInstance(
extends SerializerInstance
with Logging {
- private lazy val compressionCodec =
-
GlutenShuffleUtils.getCompressionCodec(SparkEnv.get.conf).toUpperCase(Locale.ROOT)
+ private lazy val conf = SparkEnv.get.conf
+ private lazy val compressionCodec =
GlutenShuffleUtils.getCompressionCodec(conf)
+ private lazy val customizedCompressCodec =
compressionCodec.toUpperCase(Locale.ROOT)
Review Comment:
CaptalizedCompressionCodec is better
##########
backends-clickhouse/src/main/scala/org/apache/spark/shuffle/CHColumnarShuffleWriter.scala:
##########
@@ -51,8 +51,13 @@ class CHColumnarShuffleWriter[K, V](
.mkString(",")
private val subDirsPerLocalDir =
blockManager.diskBlockManager.subDirsPerLocalDir
private val splitSize = GlutenConfig.getConf.maxBatchSize
- private val customizedCompressCodec =
- GlutenShuffleUtils.getCompressionCodec(conf).toUpperCase(Locale.ROOT)
+ private val compressionCodec = GlutenShuffleUtils.getCompressionCodec(conf)
+ private val customizedCompressCodec =
compressionCodec.toUpperCase(Locale.ROOT)
Review Comment:
the same with above
##########
cpp-ch/local-engine/Shuffle/ShuffleWriter.cpp:
##########
@@ -25,13 +25,14 @@ using namespace DB;
namespace local_engine
{
ShuffleWriter::ShuffleWriter(
- jobject output_stream, jbyteArray buffer, const std::string & codecStr,
bool enable_compression, size_t customize_buffer_size)
+ jobject output_stream, jbyteArray buffer, const std::string & codecStr,
jint level, bool enable_compression, size_t customize_buffer_size)
{
compression_enable = enable_compression;
write_buffer =
std::make_unique<WriteBufferFromJavaOutputStream>(output_stream, buffer,
customize_buffer_size);
if (compression_enable)
{
- auto codec =
DB::CompressionCodecFactory::instance().get(boost::to_upper_copy(codecStr), {});
+ auto family = boost::to_upper_copy(codecStr);
+ auto codec = DB::CompressionCodecFactory::instance().get(family,
family == "ZSTD" ? std::optional<int>(level) : std::nullopt);
Review Comment:
the default value of level is INT_MIN, it is ok to pass it directly to c++
codes ?
##########
cpp-ch/local-engine/Shuffle/ShuffleWriter.cpp:
##########
@@ -25,13 +25,14 @@ using namespace DB;
namespace local_engine
{
ShuffleWriter::ShuffleWriter(
- jobject output_stream, jbyteArray buffer, const std::string & codecStr,
bool enable_compression, size_t customize_buffer_size)
+ jobject output_stream, jbyteArray buffer, const std::string & codecStr,
jint level, bool enable_compression, size_t customize_buffer_size)
{
compression_enable = enable_compression;
write_buffer =
std::make_unique<WriteBufferFromJavaOutputStream>(output_stream, buffer,
customize_buffer_size);
if (compression_enable)
{
- auto codec =
DB::CompressionCodecFactory::instance().get(boost::to_upper_copy(codecStr), {});
+ auto family = boost::to_upper_copy(codecStr);
+ auto codec = DB::CompressionCodecFactory::instance().get(family,
family == "ZSTD" ? std::optional<int>(level) : std::nullopt);
Review Comment:
Is level only useful for ZSTD CODEC ?
Maybe we can use `xxx` instead of `std::optional<int>(xxx)`,
##########
backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala:
##########
@@ -59,14 +59,16 @@ object CHExecUtil extends Logging {
dataSize: SQLMetric,
iter: Iterator[ColumnarBatch],
compressionCodec: Option[String] = Some("lz4"),
+ compressionLevel: Option[Int] = Some(Int.MinValue),
Review Comment:
why not leave its default value as None
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]