my-ship-it commented on code in PR #1629:
URL: https://github.com/apache/cloudberry/pull/1629#discussion_r3008017362


##########
gpcontrib/gp_stats_collector/src/UDSConnector.cpp:
##########
@@ -0,0 +1,130 @@
+/*-------------------------------------------------------------------------
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * UDSConnector.cpp
+ *
+ * IDENTIFICATION
+ *       gpcontrib/gp_stats_collector/src/UDSConnector.cpp
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "UDSConnector.h"
+#include "Config.h"
+#include "GpscStat.h"
+#include "memory/gpdbwrappers.h"
+#include "log/LogOps.h"
+
+#include <string>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <chrono>
+#include <thread>
+
+extern "C" {
+#include "postgres.h"
+}
+
+static void inline log_tracing_failure(const gpsc::SetQueryReq &req,
+                                       const std::string &event) {
+  ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %m",
+                       req.query_key().tmid(), req.query_key().ssid(),
+                       req.query_key().ccnt(), event.c_str())));
+}
+
+bool UDSConnector::report_query(const gpsc::SetQueryReq &req,
+                                const std::string &event,
+                                const Config &config) {
+  sockaddr_un address{};
+  address.sun_family = AF_UNIX;
+  const auto &uds_path = config.uds_path();
+
+  if (uds_path.size() >= sizeof(address.sun_path)) {
+    ereport(WARNING, (errmsg("UDS path is too long for socket buffer")));
+    GpscStat::report_error();
+    return false;
+  }
+  strcpy(address.sun_path, uds_path.c_str());
+
+  const auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0);

Review Comment:
   Thanks for the contribution. It is great but minor comments here:
    
   Every single hook event creates a new socket, connects, sends, and closes. 
For a busy cluster this means thousands of socket()/connect()/close() syscalls 
per second. This will be a significant performance hit. Should use a persistent 
connection with reconnect-on-failure.



##########
gpcontrib/gp_stats_collector/.clang-format:
##########
@@ -0,0 +1,2 @@
+BasedOnStyle: LLVM
+SortIncludes: false

Review Comment:
   The extension uses LLVM code style (brace on same line, etc.) which is 
inconsistent with the rest of CBDB (BSD/Allman style). This will cause 
confusion and merge conflicts if other contributors touch these files.



##########
gpcontrib/gp_stats_collector/src/hook_wrappers.cpp:
##########
@@ -0,0 +1,414 @@
+/*-------------------------------------------------------------------------
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * hook_wrappers.cpp
+ *
+ * IDENTIFICATION
+ *       gpcontrib/gp_stats_collector/src/hook_wrappers.cpp
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define typeid __typeid
+extern "C" {
+#include "postgres.h"
+#include "funcapi.h"
+#include "executor/executor.h"
+#include "executor/execUtils.h"
+#include "utils/elog.h"
+#include "utils/builtins.h"
+#include "utils/metrics_utils.h"
+#include "cdb/cdbvars.h"
+#include "cdb/ml_ipc.h"
+#include "tcop/utility.h"
+#include "stat_statements_parser/pg_stat_statements_ya_parser.h"
+
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+#include <errno.h>
+#include <poll.h>
+}
+#undef typeid
+
+#include "Config.h"
+#include "GpscStat.h"
+#include "EventSender.h"
+#include "hook_wrappers.h"
+#include "memory/gpdbwrappers.h"
+
+static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr;
+static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr;
+static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr;
+static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr;
+static query_info_collect_hook_type previous_query_info_collect_hook = nullptr;
+#ifdef ANALYZE_STATS_COLLECT_HOOK
+static analyze_stats_collect_hook_type previous_analyze_stats_collect_hook =
+    nullptr;
+#endif
+#ifdef IC_TEARDOWN_HOOK
+static ic_teardown_hook_type previous_ic_teardown_hook = nullptr;
+#endif
+static ProcessUtility_hook_type previous_ProcessUtility_hook = nullptr;
+
+static void gpsc_ExecutorStart_hook(QueryDesc *query_desc, int eflags);
+static void gpsc_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection 
direction,
+                                uint64 count, bool execute_once);
+static void gpsc_ExecutorFinish_hook(QueryDesc *query_desc);
+static void gpsc_ExecutorEnd_hook(QueryDesc *query_desc);
+static void gpsc_query_info_collect_hook(QueryMetricsStatus status, void *arg);
+#ifdef IC_TEARDOWN_HOOK
+static void gpsc_ic_teardown_hook(ChunkTransportState *transportStates,
+                                bool hasErrors);
+#endif
+#ifdef ANALYZE_STATS_COLLECT_HOOK
+static void gpsc_analyze_stats_collect_hook(QueryDesc *query_desc);
+#endif
+static void gpsc_process_utility_hook(PlannedStmt *pstmt, const char 
*queryString,
+                                    bool readOnlyTree,
+                                    ProcessUtilityContext context,
+                                    ParamListInfo params,
+                                    QueryEnvironment *queryEnv,
+                                    DestReceiver *dest, QueryCompletion *qc);
+
+#define TEST_MAX_CONNECTIONS 4
+#define TEST_RCV_BUF_SIZE 8192
+#define TEST_POLL_TIMEOUT_MS 200
+
+static int test_server_fd = -1;
+static char *test_sock_path = NULL;
+
+static EventSender *sender = nullptr;
+
+static inline EventSender *get_sender() {
+  if (!sender) {
+    sender = new EventSender();
+  }
+  return sender;
+}
+
+template <typename T, typename R, typename... Args>
+R cpp_call(T *obj, R (T::*func)(Args...), Args... args) {
+  try {
+    return (obj->*func)(args...);
+  } catch (const std::exception &e) {
+    ereport(FATAL, (errmsg("Unexpected exception in gpsc %s", e.what())));

Review Comment:
   Any C++ exception from the extension kills the entire backend with FATAL. 
This should be ERROR or WARNING at most — a monitoring extension should never 
crash the database.



##########
gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h:
##########
@@ -0,0 +1,81 @@
+/*-------------------------------------------------------------------------
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * gpdbwrappers.h
+ *
+ * IDENTIFICATION
+ *       gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#pragma once

Review Comment:
   CBDB convention requires #ifndef FILENAME_H / #define FILENAME_H style 
include guards, not #pragma once.



##########
gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto:
##########
@@ -0,0 +1,183 @@
+syntax = "proto3";

Review Comment:
   No Apache license header on .proto files — The protobuf definition files 
lack the Apache 2.0 license header that other files have.



##########
gpcontrib/gp_stats_collector/src/Config.cpp:
##########
@@ -0,0 +1,177 @@
+/*-------------------------------------------------------------------------
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * Config.cpp
+ *
+ * IDENTIFICATION
+ *       gpcontrib/gp_stats_collector/src/Config.cpp
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "Config.h"
+#include "memory/gpdbwrappers.h"
+#include <limits.h>
+#include <memory>
+#include <string>
+#include <unordered_set>
+
+extern "C" {
+#include "postgres.h"
+#include "utils/guc.h"
+}
+
+static char *guc_uds_path = nullptr;
+static bool guc_enable_analyze = true;
+static bool guc_enable_cdbstats = true;
+static bool guc_enable_collector = true;
+static bool guc_report_nested_queries = true;
+static char *guc_ignored_users = nullptr;
+static int guc_max_text_size = 1 << 20;  // in bytes (1MB)
+static int guc_max_plan_size = 1024;     // in KB
+static int guc_min_analyze_time = 10000; // in ms
+static int guc_logging_mode = LOG_MODE_UDS;
+static bool guc_enable_utility = false;
+
+static const struct config_enum_entry logging_mode_options[] = {
+    {"uds", LOG_MODE_UDS, false /* hidden */},
+    {"tbl", LOG_MODE_TBL, false},
+    {NULL, 0, false}};
+
+static bool ignored_users_guc_dirty = false;
+
+static void assign_ignored_users_hook(const char *, void *) {
+  ignored_users_guc_dirty = true;
+}
+
+void Config::init_gucs() {
+  DefineCustomStringVariable(
+      "gpsc.uds_path", "Sets filesystem path of the agent socket", 0LL,
+      &guc_uds_path, "/tmp/gpsc_agent.sock", PGC_SUSET,
+      GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL);
+
+  DefineCustomBoolVariable(
+      "gpsc.enable", "Enable metrics collector", 0LL, &guc_enable_collector,

Review Comment:
    When the extension is loaded via shared_preload_libraries, collection is ON 
by default. Combined with gpsc.ignored_users_list defaulting to 
"gpadmin,repl,gpperfmon,monitor", this means the extension silently starts 
collecting for non-default users. Default should be false for safety?



##########
gpcontrib/gp_stats_collector/results/gpsc_cursors.out:
##########
@@ -0,0 +1,163 @@
+CREATE EXTENSION gp_stats_collector;

Review Comment:
   Remove results/ from git?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to