This is an automated email from the ASF dual-hosted git repository.
xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7229751bd9 [Improve](map-type) Add contains_null for map (#16948)
7229751bd9 is described below
commit 7229751bd9c55b0325b384a1508ba6cbd2b4c258
Author: amory <[email protected]>
AuthorDate: Thu Feb 23 20:47:26 2023 +0800
[Improve](map-type) Add contains_null for map (#16948)
Add contains_null for map type.
---
be/src/olap/types.h | 36 +++++++++++--
be/src/runtime/types.cpp | 34 +++++-------
be/src/vec/data_types/data_type_factory.cpp | 6 +--
be/src/vec/exprs/vmap_literal.cpp | 20 +++----
be/src/vec/functions/function_cast.h | 1 +
be/test/vec/exec/parquet/parquet_thrift_test.cpp | 5 +-
.../java/org/apache/doris/catalog/ArrayType.java | 3 +-
.../java/org/apache/doris/catalog/MapType.java | 21 ++++++++
.../main/java/org/apache/doris/catalog/Type.java | 3 +-
.../java/org/apache/doris/analysis/CastExpr.java | 2 +-
.../java/org/apache/doris/analysis/MapLiteral.java | 61 +++++++++++-----------
.../main/java/org/apache/doris/catalog/Column.java | 2 +
gensrc/thrift/Types.thrift | 2 +-
13 files changed, 124 insertions(+), 72 deletions(-)
diff --git a/be/src/olap/types.h b/be/src/olap/types.h
index 58ec1eb469..b26a89ba6f 100644
--- a/be/src/olap/types.h
+++ b/be/src/olap/types.h
@@ -339,7 +339,18 @@ public:
} else if (l_size > r_size) {
return 1;
} else {
- return 0;
+ // now we use collection value in array to pack map k-v
+ auto l_k = reinterpret_cast<const
CollectionValue*>(l_value->key_data());
+ auto l_v = reinterpret_cast<const
CollectionValue*>(l_value->value_data());
+ auto r_k = reinterpret_cast<const
CollectionValue*>(r_value->key_data());
+ auto r_v = reinterpret_cast<const
CollectionValue*>(r_value->value_data());
+ auto key_arr = new
ArrayTypeInfo(create_static_type_info_ptr(_key_type_info.get()));
+ auto val_arr = new
ArrayTypeInfo(create_static_type_info_ptr(_value_type_info.get()));
+ if (int kc = key_arr->cmp(l_k, r_k) != 0) {
+ return kc;
+ } else {
+ return val_arr->cmp(l_v, r_v);
+ }
}
}
@@ -356,7 +367,27 @@ public:
return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
}
- std::string to_string(const void* src) const override { return "{}"; }
+ std::string to_string(const void* src) const override {
+ auto src_ = reinterpret_cast<const MapValue*>(src);
+ auto src_key = reinterpret_cast<const
CollectionValue*>(src_->key_data());
+ auto src_val = reinterpret_cast<const
CollectionValue*>(src_->value_data());
+ size_t key_slot_size = _key_type_info->size();
+ size_t val_slot_size = _value_type_info->size();
+ std::string result = "{";
+
+ for (size_t i = 0; i < src_key->length(); ++i) {
+ std::string k_s =
+ _key_type_info->to_string((uint8_t*)(src_key->data()) +
key_slot_size);
+ std::string v_s =
+ _key_type_info->to_string((uint8_t*)(src_val->data()) +
val_slot_size);
+ result += k_s + ":" + v_s;
+ if (i != src_key->length() - 1) {
+ result += ", ";
+ }
+ }
+ result += "}";
+ return result;
+ }
void set_to_max(void* buf) const override {
DCHECK(false) << "set_to_max of list is not implemented.";
@@ -366,7 +397,6 @@ public:
DCHECK(false) << "set_to_min of list is not implemented.";
}
- // todo . is here only to need return 16 for two ptr?
size_t size() const override { return sizeof(MapValue); }
FieldType type() const override { return OLAP_FIELD_TYPE_MAP; }
diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp
index e6366a1681..fca1c86954 100644
--- a/be/src/runtime/types.cpp
+++ b/be/src/runtime/types.cpp
@@ -55,9 +55,10 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>&
types, int* idx)
case TTypeNodeType::ARRAY: {
DCHECK(!node.__isset.scalar_type);
DCHECK_LT(*idx, types.size() - 1);
+ DCHECK_EQ(node.contains_nulls.size(), 1);
type = TYPE_ARRAY;
contains_nulls.reserve(1);
- contains_nulls.push_back(node.contains_null);
+ contains_nulls.push_back(node.contains_nulls[0]);
++(*idx);
children.push_back(TypeDescriptor(types, idx));
break;
@@ -65,7 +66,7 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>&
types, int* idx)
case TTypeNodeType::STRUCT: {
DCHECK(!node.__isset.scalar_type);
DCHECK_LT(*idx, types.size() - 1);
- DCHECK(!node.__isset.contains_null);
+ DCHECK(!node.__isset.contains_nulls);
DCHECK(node.__isset.struct_fields);
DCHECK_GE(node.struct_fields.size(), 1);
type = TYPE_STRUCT;
@@ -85,31 +86,19 @@ TypeDescriptor::TypeDescriptor(const
std::vector<TTypeNode>& types, int* idx)
type = TYPE_VARIANT;
break;
}
- // case TTypeNodeType::STRUCT:
- // type = TYPE_STRUCT;
- // for (int i = 0; i < node.struct_fields.size(); ++i) {
- // ++(*idx);
- // children.push_back(TypeDescriptor(types, idx));
- // field_names.push_back(node.struct_fields[i].name);
- // }
- // break;
- // case TTypeNodeType::ARRAY:
- // DCHECK(!node.__isset.scalar_type);
- // DCHECK_LT(*idx, types.size() - 1);
- // type = TYPE_ARRAY;
- // ++(*idx);
- // children.push_back(TypeDescriptor(types, idx));
- // break;
case TTypeNodeType::MAP: {
//TODO(xy): handle contains_null[0] for key and [1] for value
DCHECK(!node.__isset.scalar_type);
DCHECK_LT(*idx, types.size() - 2);
- DCHECK(!node.__isset.contains_null);
+ DCHECK_EQ(node.contains_nulls.size(), 2);
+ contains_nulls.reserve(2);
type = TYPE_MAP;
++(*idx);
children.push_back(TypeDescriptor(types, idx));
+ contains_nulls.push_back(node.contains_nulls[0]);
++(*idx);
children.push_back(TypeDescriptor(types, idx));
+ contains_nulls.push_back(node.contains_nulls[1]);
break;
}
default:
@@ -122,11 +111,16 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type)
const {
TTypeNode& node = thrift_type->types.back();
if (is_complex_type()) {
if (type == TYPE_ARRAY) {
+ DCHECK_EQ(contains_nulls.size(), 1);
node.type = TTypeNodeType::ARRAY;
- node.contains_null = contains_nulls[0];
+ node.contains_nulls.reserve(1);
+ node.contains_nulls.push_back(contains_nulls[0]);
} else if (type == TYPE_MAP) {
- //TODO(xy): need to process children for map
+ DCHECK_EQ(contains_nulls.size(), 2);
node.type = TTypeNodeType::MAP;
+ node.contains_nulls.reserve(2);
+ node.contains_nulls.push_back(contains_nulls[0]);
+ node.contains_nulls.push_back(contains_nulls[1]);
} else if (type == TYPE_VARIANT) {
node.type = TTypeNodeType::VARIANT;
} else {
diff --git a/be/src/vec/data_types/data_type_factory.cpp
b/be/src/vec/data_types/data_type_factory.cpp
index 91fc51187b..aaf9a395f1 100644
--- a/be/src/vec/data_types/data_type_factory.cpp
+++ b/be/src/vec/data_types/data_type_factory.cpp
@@ -169,10 +169,10 @@ DataTypePtr DataTypeFactory::create_data_type(const
TypeDescriptor& col_desc, bo
break;
case TYPE_MAP:
DCHECK(col_desc.children.size() == 2);
- // todo. (Amory) Support Map contains_nulls in FE MapType.java Later PR
+ DCHECK_EQ(col_desc.contains_nulls.size(), 2);
nested = std::make_shared<vectorized::DataTypeMap>(
- create_data_type(col_desc.children[0], true),
- create_data_type(col_desc.children[1], true));
+ create_data_type(col_desc.children[0],
col_desc.contains_nulls[0]),
+ create_data_type(col_desc.children[1],
col_desc.contains_nulls[1]));
break;
case TYPE_STRUCT: {
DCHECK(col_desc.children.size() >= 1);
diff --git a/be/src/vec/exprs/vmap_literal.cpp
b/be/src/vec/exprs/vmap_literal.cpp
index 954142f04d..60415dda69 100644
--- a/be/src/vec/exprs/vmap_literal.cpp
+++ b/be/src/vec/exprs/vmap_literal.cpp
@@ -30,17 +30,17 @@ Status VMapLiteral::prepare(RuntimeState* state, const
RowDescriptor& row_desc,
Field keys = Array();
Field values = Array();
// each child is slot with key1, value1, key2, value2...
- for (int idx = 0; idx < _children.size(); ++idx) {
- Field item;
- ColumnPtrWrapper* const_col_wrapper = nullptr;
- RETURN_IF_ERROR(_children[idx]->get_const_col(context,
&const_col_wrapper));
- const_col_wrapper->column_ptr->get(0, item);
+ for (int idx = 0; idx < _children.size() && idx + 1 < _children.size();
idx += 2) {
+ Field kf, vf;
+ ColumnPtrWrapper* const_key_col_wrapper = nullptr;
+ ColumnPtrWrapper* const_val_col_wrapper = nullptr;
+ RETURN_IF_ERROR(_children[idx]->get_const_col(context,
&const_key_col_wrapper));
+ RETURN_IF_ERROR(_children[idx + 1]->get_const_col(context,
&const_val_col_wrapper));
+ const_key_col_wrapper->column_ptr->get(0, kf);
+ const_val_col_wrapper->column_ptr->get(0, vf);
- if ((idx & 1) == 0) {
- keys.get<Array>().push_back(item);
- } else {
- values.get<Array>().push_back(item);
- }
+ keys.get<Array>().push_back(kf);
+ values.get<Array>().push_back(vf);
}
map.get<Map>().push_back(keys);
map.get<Map>().push_back(values);
diff --git a/be/src/vec/functions/function_cast.h
b/be/src/vec/functions/function_cast.h
index 7ac3316fdb..f3e5d6caf1 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -1542,6 +1542,7 @@ private:
}
}
+ //TODO(Amory) . Need support more cast for key , value for map
WrapperType create_map_wrapper(const DataTypePtr& from_type, const
DataTypeMap& to_type) const {
switch (from_type->get_type_id()) {
case TypeIndex::String:
diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
index b6d635aa44..c7e4894d20 100644
--- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
+++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
@@ -375,13 +375,14 @@ TEST_F(ParquetThriftReaderTest, group_reader) {
{
TTypeNode node;
node.__set_type(TTypeNodeType::ARRAY);
- node.contains_null = true;
+ std::vector<bool> contains_nulls {true};
+ node.__set_contains_nulls(contains_nulls);
TTypeNode inner;
inner.__set_type(TTypeNodeType::SCALAR);
TScalarType scalar_type;
scalar_type.__set_type(TPrimitiveType::STRING);
inner.__set_scalar_type(scalar_type);
- inner.contains_null = true;
+ inner.__set_contains_nulls(contains_nulls);
type.types.push_back(node);
type.types.push_back(inner);
}
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java
b/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java
index 8aa6acb31a..6cc9162dc6 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java
@@ -24,6 +24,7 @@ import org.apache.doris.thrift.TTypeNodeType;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
import com.google.gson.annotations.SerializedName;
import java.util.Objects;
@@ -138,7 +139,7 @@ public class ArrayType extends Type {
container.types.add(node);
Preconditions.checkNotNull(itemType);
node.setType(TTypeNodeType.ARRAY);
- node.setContainsNull(containsNull);
+ node.setContainsNulls(Lists.newArrayList(containsNull));
itemType.toThrift(container);
}
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java
b/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java
index 06167cc9c9..6fd8da9c24 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java
@@ -24,6 +24,7 @@ import org.apache.doris.thrift.TTypeNodeType;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
import com.google.gson.annotations.SerializedName;
import java.util.Objects;
@@ -35,19 +36,30 @@ public class MapType extends Type {
@SerializedName(value = "keyType")
private final Type keyType;
+
+ @SerializedName(value = "isKeyContainsNull")
+ private final boolean isKeyContainsNull; // Now always true
+
@SerializedName(value = "valueType")
private final Type valueType;
+ @SerializedName(value = "isValueContainsNull")
+ private final boolean isValueContainsNull; // Now always true
+
public MapType() {
this.keyType = NULL;
+ this.isKeyContainsNull = true;
this.valueType = NULL;
+ this.isValueContainsNull = true;
}
public MapType(Type keyType, Type valueType) {
Preconditions.checkNotNull(keyType);
Preconditions.checkNotNull(valueType);
this.keyType = keyType;
+ this.isKeyContainsNull = true;
this.valueType = valueType;
+ this.isValueContainsNull = true;
}
@Override
@@ -59,6 +71,14 @@ public class MapType extends Type {
return keyType;
}
+ public Boolean getIsKeyContainsNull() {
+ return isKeyContainsNull;
+ }
+
+ public Boolean getIsValueContainsNull() {
+ return isValueContainsNull;
+ }
+
public Type getValueType() {
return valueType;
}
@@ -141,6 +161,7 @@ public class MapType extends Type {
Preconditions.checkNotNull(keyType);
Preconditions.checkNotNull(valueType);
node.setType(TTypeNodeType.MAP);
+ node.setContainsNulls(Lists.newArrayList(isKeyContainsNull,
isValueContainsNull));
keyType.toThrift(container);
valueType.toThrift(container);
}
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
index 06f00d04e5..ca4ef88bd1 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
@@ -194,6 +194,7 @@ public abstract class Type {
mapSubTypes.add(CHAR);
mapSubTypes.add(VARCHAR);
mapSubTypes.add(STRING);
+ mapSubTypes.add(NULL);
structSubTypes = Lists.newArrayList();
structSubTypes.add(BOOLEAN);
@@ -606,7 +607,7 @@ public abstract class Type {
&& !sourceType.isNull()) {
// TODO: current not support cast any non-array type(except for
null) to nested array type.
return false;
- } else if (targetType.isStructType() && sourceType.isStringType()) {
+ } else if ((targetType.isStructType() || targetType.isMapType()) &&
sourceType.isStringType()) {
return true;
} else if (sourceType.isStructType() && targetType.isStructType()) {
return StructType.canCastTo((StructType) sourceType, (StructType)
targetType);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
index 2ff6579d82..22252d1d5d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
@@ -333,7 +333,7 @@ public class CastExpr extends Expr {
if (fn == null) {
//TODO(xy): check map type
- if (type.isStructType() && childType.isStringType()) {
+ if ((type.isMapType() || type.isStructType()) &&
childType.isStringType()) {
return;
}
if (childType.isNull() && Type.canCastTo(childType, type)) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java
index fe8815a4d2..182fb40f33 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java
@@ -47,30 +47,35 @@ public class MapLiteral extends LiteralExpr {
Type keyType = Type.NULL;
Type valueType = Type.NULL;
children = new ArrayList<>();
- int idx = 0;
- // TODO(xy): limit key type to scalar type
- for (LiteralExpr expr : exprs) {
- if (idx % 2 == 0) {
- if (keyType == Type.NULL) {
- keyType = expr.getType();
- } else {
- keyType = Type.getAssignmentCompatibleType(keyType,
expr.getType(), true);
- }
- if (keyType == Type.INVALID) {
- throw new AnalysisException("Invalid element type in Map");
- }
+ // check types here
+ // 1. limit key type with map-key support
+ // 2. check type can be assigment for cast
+ for (int idx = 0; idx < exprs.length && idx + 1 < exprs.length; idx +=
2) {
+ if (!MapType.MAP.supportSubType(exprs[idx].getType())) {
+ throw new AnalysisException("Invalid key type in Map, not
support " + exprs[idx].getType());
+ }
+ keyType = Type.getAssignmentCompatibleType(keyType,
exprs[idx].getType(), true);
+ valueType = Type.getAssignmentCompatibleType(valueType, exprs[idx
+ 1].getType(), true);
+ }
+
+ if (keyType == Type.INVALID) {
+ throw new AnalysisException("Invalid key type in Map.");
+ }
+ if (valueType == Type.INVALID) {
+ throw new AnalysisException("Invalid value type in Map.");
+ }
+
+ for (int idx = 0; idx < exprs.length && idx + 1 < exprs.length; idx +=
2) {
+ if (exprs[idx].getType().equals(keyType)) {
+ children.add(exprs[idx]);
+ } else {
+ children.add(exprs[idx].castTo(keyType));
+ }
+ if (exprs[idx + 1].getType().equals(valueType)) {
+ children.add(exprs[idx + 1]);
} else {
- if (valueType == Type.NULL) {
- valueType = expr.getType();
- } else {
- valueType = Type.getAssignmentCompatibleType(valueType,
expr.getType(), true);
- }
- if (valueType == Type.INVALID) {
- throw new AnalysisException("Invalid element type in Map");
- }
+ children.add(exprs[idx + 1].castTo(valueType));
}
- children.add(expr);
- ++ idx;
}
type = new MapType(keyType, valueType);
@@ -89,13 +94,9 @@ public class MapLiteral extends LiteralExpr {
Type keyType = ((MapType) targetType).getKeyType();
Type valueType = ((MapType) targetType).getValueType();
- for (int i = 0; i < children.size(); ++ i) {
- Expr child = children.get(i);
- if ((i % 2) == 0) {
- literal.children.set(i, child.uncheckedCastTo(keyType));
- } else {
- literal.children.set(i, child.uncheckedCastTo(valueType));
- }
+ for (int i = 0; i < children.size() && i + 1 < children.size(); i +=
2) {
+ literal.children.set(i, children.get(i).uncheckedCastTo(keyType));
+ literal.children.set(i + 1, children.get(i +
1).uncheckedCastTo(valueType));
}
literal.setType(targetType);
return literal;
@@ -111,7 +112,7 @@ public class MapLiteral extends LiteralExpr {
@Override
protected String toSqlImpl() {
List<String> list = new ArrayList<>(children.size());
- for (int i = 0; i < children.size(); i += 2) {
+ for (int i = 0; i < children.size() && i + 1 < children.size(); i +=
2) {
list.add(children.get(i).toSqlImpl() + ":" + children.get(i +
1).toSqlImpl());
}
return "MAP{" + StringUtils.join(list, ", ") + "}";
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index f6ae293f54..72bb65a20a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
@@ -218,6 +218,8 @@ public class Column implements Writable,
GsonPostProcessable {
} else if (type.isMapType()) {
Column k = new Column(COLUMN_MAP_KEY, ((MapType)
type).getKeyType());
Column v = new Column(COLUMN_MAP_VALUE, ((MapType)
type).getValueType());
+ k.setIsAllowNull(((MapType) type).getIsKeyContainsNull());
+ v.setIsAllowNull(((MapType) type).getIsValueContainsNull());
column.addChildrenColumn(k);
column.addChildrenColumn(v);
} else if (type.isStructType()) {
diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift
index 641e54f780..cec723aada 100644
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift
@@ -141,7 +141,7 @@ struct TTypeNode {
3: optional list<TStructField> struct_fields
// only used for complex types, such as array, map and etc.
- 4: optional bool contains_null
+ 4: optional list<bool> contains_nulls
}
// A flattened representation of a tree of column types obtained by depth-first
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]