github-actions[bot] commented on code in PR #61585:
URL: https://github.com/apache/doris/pull/61585#discussion_r3295963667


##########
be/src/cloud/cloud_internal_service.cpp:
##########
@@ -49,12 +50,70 @@ bvar::LatencyRecorder 
g_file_cache_get_by_peer_read_cache_file_latency(
         "file_cache_get_by_peer_read_cache_file_latency");
 bvar::LatencyRecorder 
g_cloud_internal_service_get_file_cache_meta_by_tablet_id_latency(
         "cloud_internal_service_get_file_cache_meta_by_tablet_id_latency");
+bvar::Adder<int64_t> g_cloud_sync_tablet_meta_requests_total(
+        "cloud_sync_tablet_meta_requests_total");
+bvar::Adder<int64_t> 
g_cloud_sync_tablet_meta_synced_total("cloud_sync_tablet_meta_synced_total");
+bvar::Adder<int64_t> 
g_cloud_sync_tablet_meta_skipped_total("cloud_sync_tablet_meta_skipped_total");
+bvar::Adder<int64_t> 
g_cloud_sync_tablet_meta_failed_total("cloud_sync_tablet_meta_failed_total");
 
 CloudInternalServiceImpl::CloudInternalServiceImpl(CloudStorageEngine& engine, 
ExecEnv* exec_env)
         : PInternalService(exec_env), _engine(engine) {}
 
 CloudInternalServiceImpl::~CloudInternalServiceImpl() = default;
 
+void 
CloudInternalServiceImpl::sync_tablet_meta(google::protobuf::RpcController* 
controller,
+                                                const PSyncTabletMetaRequest* 
request,
+                                                PSyncTabletMetaResponse* 
response,
+                                                google::protobuf::Closure* 
done) {
+    auto start_time = std::chrono::steady_clock::now();
+    bool ret = _light_work_pool.try_offer([this, request, response, done, 
start_time]() {
+        brpc::ClosureGuard closure_guard(done);
+        LOG(INFO) << "begin to sync tablet meta, request=" << 
request->ShortDebugString();
+        int64_t synced = 0;
+        int64_t skipped = 0;
+        int64_t failed = 0;
+        g_cloud_sync_tablet_meta_requests_total << 1;
+        for (const auto tablet_id : request->tablet_ids()) {
+            auto tablet = _engine.tablet_mgr().get_tablet_if_cached(tablet_id);
+            if (!tablet) {
+                ++skipped;
+                continue;
+            }
+            auto st = tablet->sync_meta();

Review Comment:
   This new RPC path can call `CloudTablet::sync_meta()` from the light work 
pool concurrently with the scheduled `CloudTabletMgr::sync_tablets()` path and 
with read/compaction paths that read tablet metadata under `_meta_lock`. 
However `CloudTablet::sync_meta()` currently fetches remote meta and then 
mutates `_tablet_meta` and `tablet_schema` fields without taking 
`_sync_meta_lock` or `_meta_lock` (for example `set_ttl_seconds`, compaction 
policy fields, and `set_disable_auto_compaction`). That makes the proactive 
notification race with readers that assume `_meta_lock` protects tablet 
metadata, and it also violates the documented cloud lock order `_sync_meta_lock 
-> _meta_lock`. Please serialize `sync_meta()` consistently with the other 
cloud sync paths, e.g. take `_sync_meta_lock` for the sync operation and 
`_meta_lock` in write mode before applying the refreshed fields.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to