morningman commented on code in PR #57821:
URL: https://github.com/apache/doris/pull/57821#discussion_r2544440165


##########
be/src/vec/data_types/serde/data_type_varbinary_serde.h:
##########
@@ -39,18 +41,22 @@ class DataTypeVarbinarySerDe : public DataTypeSerDe {
     std::string get_name() const override { return "Varbinary"; }
 
     Status serialize_one_cell_to_json(const IColumn& column, int64_t row_num, 
BufferWritable& bw,
-                                      FormatOptions& options) const override {
-        return Status::NotSupported("serialize_one_cell_to_json with type " + 
column.get_name());
-    }
+                                      FormatOptions& options) const override;
 
     Status serialize_column_to_json(const IColumn& column, int64_t start_idx, 
int64_t end_idx,
                                     BufferWritable& bw, FormatOptions& 
options) const override {
         return Status::NotSupported("serialize_column_to_json with type " + 
column.get_name());
     }
     Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
                                           const FormatOptions& options) const 
override {
-        return Status::NotSupported("deserialize_one_cell_from_text with type 
" +
-                                    column.get_name());
+        if (_nesting_level >= 2) {
+            slice.trim_quote();
+        }
+        if (options.escape_char != 0) {
+            escape_string(slice.data, &slice.size, options.escape_char);

Review Comment:
   Do we need to escape for binary?
   BTW, is Json really support binary data type?



##########
be/src/vec/exec/format/parquet/parquet_column_convert.cpp:
##########
@@ -251,6 +254,15 @@ std::unique_ptr<PhysicalToLogicalConverter> 
PhysicalToLogicalConverter::get_conv
             physical_converter =
                     std::make_unique<UnsupportedConverter>(src_physical_type, 
src_logical_type);
         }
+    } else if (src_logical_primitive == TYPE_VARBINARY) {
+        if (src_physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY) {
+            DCHECK(parquet_schema.logicalType.__isset.UUID) << 
parquet_schema.name;

Review Comment:
   Why does this have to be of type UUID?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyCastRule.java:
##########
@@ -124,6 +127,19 @@ public static Expression simplifyCast(Cast cast) {
                             return cast;
                         }
                     }
+                } else if (castType instanceof VarBinaryType) {

Review Comment:
   What is this for?



##########
fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java:
##########
@@ -70,6 +70,8 @@ public class ScalarType extends Type {
 
     public static final int MAX_JSONB_LENGTH = 0x7fffffff - 4;
 
+    public static final int MAX_VARBINARY_LENGTH = 0x7fffffff;

Review Comment:
   add comment about human readable size



##########
be/src/vec/exec/format/column_type_convert.h:
##########
@@ -920,5 +927,59 @@ class DecimalToDecimalConverter : public 
ColumnTypeConverter {
     }
 };
 
+/*
+we treat the byte_array type in parquet

Review Comment:
   This comment is a little bit confusing.
   Use LLM to refine it



##########
fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java:
##########
@@ -109,6 +109,10 @@ public abstract class ExternalCatalog
     public static final String DORIS_VERSION_VALUE = 
Version.DORIS_BUILD_VERSION + "-" + Version.DORIS_BUILD_SHORT_HASH;
     public static final String USE_META_CACHE = "use_meta_cache";
 
+    // default: false, so mapping to string directly keep compatibility
+    public static final String ENABLE_MAPPING_VARBINARY = 
"enable.mapping.varbinary";
+    protected Optional<Boolean> enableMappingVarbinary = Optional.empty();

Review Comment:
   I think this field should be in CatalogProperty.java



##########
be/src/vec/exec/format/parquet/schema_desc.cpp:
##########
@@ -291,6 +299,11 @@ std::pair<DataTypePtr, bool> 
FieldDescriptor::convert_to_doris_type(
     } else if (logicalType.__isset.TIMESTAMP) {
         ans.first = DataTypeFactory::instance().create_data_type(
                 TYPE_DATETIMEV2, nullable, 0, 
logicalType.TIMESTAMP.unit.__isset.MILLIS ? 3 : 6);
+    } else if (logicalType.__isset.JSON) {

Review Comment:
   is JSON a new type that we do not support before?



##########
fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/FileFormatProperties.java:
##########
@@ -45,6 +45,7 @@ public abstract class FileFormatProperties {
     protected TFileFormatType fileFormatType;
 
     protected TFileCompressType compressionType;
+    public boolean enableMappingVarbinary = false;

Review Comment:
   Add comment to this field



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to