morningman commented on code in PR #43525:
URL: https://github.com/apache/doris/pull/43525#discussion_r1836083798
##########
fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java:
##########
@@ -104,17 +104,18 @@ private List<Snapshot>
loadSnapshots(IcebergMetadataCacheKey key) {
@NotNull
private Table loadTable(IcebergMetadataCacheKey key) {
- Catalog icebergCatalog;
+ Table icebergTable;
if (key.catalog instanceof HMSExternalCatalog) {
- icebergCatalog = ((HMSExternalCatalog)
key.catalog).getIcebergHiveCatalog();
+ Catalog icebergCatalog = ((HMSExternalCatalog)
key.catalog).getIcebergHiveCatalog();
+ icebergTable =
HiveMetaStoreClientHelper.ugiDoAs(((ExternalCatalog)
key.catalog).getConfiguration(),
+ () -> icebergCatalog.loadTable(TableIdentifier.of(key.dbName,
key.tableName)));
Review Comment:
We should unify the interface, both using `catalog.loadTable()` or using
`metadataOps.loadTable()`
##########
fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalog.java:
##########
@@ -39,6 +39,7 @@ public abstract class IcebergExternalCatalog extends
ExternalCatalog {
public static final String ICEBERG_HADOOP = "hadoop";
public static final String ICEBERG_GLUE = "glue";
public static final String ICEBERG_DLF = "dlf";
+ public static final String EXTERNAL_SERVER_CATALOG_NAME =
"external_server_catalog_name";
Review Comment:
```suggestion
public static final String EXTERNAL_SERVER_CATALOG_NAME =
"external_catalog.name";
```
##########
be/src/vec/exec/format/table/iceberg_reader.h:
##########
@@ -218,7 +218,7 @@ class IcebergParquetReader final : public
IcebergTableReader {
parquet_reader->set_delete_rows(&_iceberg_delete_rows);
}
- Status _gen_col_name_maps(std::vector<tparquet::KeyValue> parquet_meta_kv);
+ Status _gen_col_name_maps(FieldDescriptor field_desc);
Review Comment:
Need also modify this method in `IcebergOrcReader`
##########
be/src/vec/exec/format/table/iceberg_reader.cpp:
##########
@@ -672,39 +673,19 @@ Status IcebergOrcReader::_read_position_delete_file(const
TFileRangeDesc* delete
* 1. col1_new -> col1
* 2. col1 -> col1_new
*/
-Status
IcebergParquetReader::_gen_col_name_maps(std::vector<tparquet::KeyValue>
parquet_meta_kv) {
- for (int i = 0; i < parquet_meta_kv.size(); ++i) {
- tparquet::KeyValue kv = parquet_meta_kv[i];
- if (kv.key == "iceberg.schema") {
- _has_iceberg_schema = true;
- std::string schema = kv.value;
- rapidjson::Document json;
- json.Parse(schema.c_str());
-
- if (json.HasMember("fields")) {
- rapidjson::Value& fields = json["fields"];
- if (fields.IsArray()) {
- for (int j = 0; j < fields.Size(); j++) {
- rapidjson::Value& e = fields[j];
- rapidjson::Value& id = e["id"];
- rapidjson::Value& name = e["name"];
- std::string name_string = name.GetString();
- transform(name_string.begin(), name_string.end(),
name_string.begin(),
- ::tolower);
- auto iter = _col_id_name_map.find(id.GetInt());
- if (iter != _col_id_name_map.end()) {
- _table_col_to_file_col.emplace(iter->second,
name_string);
- _file_col_to_table_col.emplace(name_string,
iter->second);
- if (name_string != iter->second) {
- _has_schema_change = true;
- }
- } else {
- _has_schema_change = true;
- }
- }
+Status IcebergParquetReader::_gen_col_name_maps(FieldDescriptor field_desc) {
Review Comment:
```suggestion
Status IcebergParquetReader::_gen_col_name_maps(const FieldDescriptor&
field_desc) {
```
##########
be/src/vec/exec/format/parquet/vparquet_reader.h:
##########
@@ -149,6 +149,7 @@ class ParquetReader : public GenericReader {
const std::unordered_map<std::string, VExprContextSPtr>&
missing_columns) override;
std::vector<tparquet::KeyValue> get_metadata_key_values();
Review Comment:
This method can be removed?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]