Copilot commented on code in PR #60439: URL: https://github.com/apache/doris/pull/60439#discussion_r2753326137
########## thirdparty/vars.sh: ########## @@ -86,10 +86,10 @@ GFLAGS_SOURCE=gflags-2.2.2 GFLAGS_MD5SUM="1a865b93bacfa963201af3f75b7bd64c" # glog -GLOG_DOWNLOAD="https://github.com/google/glog/archive/refs/tags/v0.6.0.tar.gz" -GLOG_NAME="glog-v0.6.0.tar.gz" -GLOG_SOURCE=glog-0.6.0 -GLOG_MD5SUM="c98a6068bc9b8ad9cebaca625ca73aa2" +GLOG_DOWNLOAD="https://github.com/google/glog/archive/refs/tags/v0.7.1.tar.gz" +GLOG_NAME="glog-v0.7.1.tar.gz" +GLOG_SOURCE=glog-0.7.1 +GLOG_MD5SUM="128e2995cc33d794ff24f785a3060346" Review Comment: `GLOG_MD5SUM` uses MD5 to verify the integrity of the downloaded glog tarball, which is a weak and broken hash function for security-sensitive integrity checks. An active network attacker (especially given the use of `wget` for remote downloads) could potentially bypass this protection by exploiting MD5 weaknesses, undermining your supply-chain integrity for this dependency. Replace the MD5-based check with a stronger hash (e.g., SHA-256) and compare against a pinned, known-good value for the archive. ```suggestion GLOG_MD5SUM="" GLOG_SHA256SUM="REPLACE_WITH_ACTUAL_PINNED_SHA256_FOR_glog-v0.7.1.tar.gz" ``` ########## thirdparty/patches/glog-0.7.1.patch: ########## @@ -0,0 +1,398 @@ +--- a/src/glog/flags.h ++++ b/src/glog/flags.h +@@ -111,6 +111,16 @@ + DECLARE_string(alsologtoemail); + DECLARE_string(log_backtrace_at); + ++// Set max log file num ++DECLARE_int32(log_filenum_quota); ++ ++// Set max warn log file num ++#define GLOG_HAS_WARN_LOG_FILENUM_QUOTA ++DECLARE_int32(warn_log_filenum_quota); ++ ++// Set log file split method ++DECLARE_string(log_split_method); ++ + // Set whether appending a timestamp to the log file name + DECLARE_bool(timestamp_in_logfile_name); + +--- a/src/flags.cc ++++ b/src/flags.cc +@@ -111,6 +111,16 @@ + GLOG_DEFINE_int32(logbufsecs, 30, + "Buffer log messages for at most this many seconds"); + ++GLOG_DEFINE_string(log_split_method, "day", ++ "split log by size, day, hour"); ++ ++GLOG_DEFINE_int32(log_filenum_quota, 10, ++ "max log file num in log dir"); ++ ++GLOG_DEFINE_int32(warn_log_filenum_quota, -1, ++ "max warn log file num in log dir, -1 means equal to " ++ "log_filenum_quota"); ++ + GLOG_DEFINE_int32(logcleansecs, 60 * 5, // every 5 minutes + "Clean overdue logs every this many seconds"); + +--- a/src/logging.cc ++++ b/src/logging.cc +@@ -38,6 +38,7 @@ + #include <cstdint> + #include <iomanip> + #include <iterator> ++#include <list> + #include <memory> + #include <mutex> + #include <shared_mutex> +@@ -443,6 +444,13 @@ + + std::unique_ptr<PrefixFormatter> g_prefix_formatter; + ++struct Filetime { ++ std::string name; ++ std::time_t time; ++ ++ bool operator<(const Filetime& o) const { return o.time > time; } ++}; ++ + // Encapsulates all file-system related state + class LogFileObject : public base::Logger { + public: +@@ -473,6 +481,7 @@ + // can avoid grabbing a lock. Usually Flush() calls it after + // acquiring lock_. + void FlushUnlocked(const std::chrono::system_clock::time_point& now); ++ void CheckFileNumQuota(); + + private: + static const uint32 kRolloverAttemptFrequency = 0x20; +@@ -491,6 +500,9 @@ + std::chrono::system_clock::time_point + next_flush_time_; // cycle count at which to flush log + std::chrono::system_clock::time_point start_time_; ++ std::list<Filetime> file_list_; ++ bool inited_{false}; ++ struct ::tm tm_time_{}; + + // Actually create a logfile using the value of base_filename_ and the + // optional argument time_pid_string +@@ -707,10 +719,8 @@ + // all this stuff. + std::lock_guard<std::mutex> l{log_mutex}; + for (int i = min_severity; i < NUM_SEVERITIES; i++) { +- LogDestination* log = log_destination(static_cast<LogSeverity>(i)); +- if (log != nullptr) { +- log->logger_->Flush(); +- } ++ LogDestination* log = log_destinations_[i].get(); ++ if (log != nullptr) log->logger_->Flush(); + } + } + +@@ -903,9 +913,12 @@ + } else if (FLAGS_logtostderr) { // global flag: never log to file + ColoredWriteToStderr(severity, message, len); + } else { +- for (int i = severity; i >= 0; --i) { +- LogDestination::MaybeLogToLogfile(static_cast<LogSeverity>(i), timestamp, +- message, len); ++ if (severity >= 1) { ++ LogDestination::MaybeLogToLogfile(GLOG_WARNING, timestamp, message, len); ++ LogDestination::MaybeLogToLogfile(GLOG_INFO, timestamp, message, len); ++ } else if (severity == 0) { ++ LogDestination::MaybeLogToLogfile(GLOG_INFO, timestamp, message, len); ++ } else { + } + } + } +@@ -1059,11 +1072,11 @@ + string_filename += filename_extension_; + const char* filename = string_filename.c_str(); + // only write to files, create if non-existant. +- int flags = O_WRONLY | O_CREAT; +- if (FLAGS_timestamp_in_logfile_name) { +- // demand that the file is unique for our timestamp (fail if it exists). +- flags = flags | O_EXCL; +- } ++ int flags = O_WRONLY | O_CREAT | O_APPEND; ++ // if (FLAGS_timestamp_in_logfile_name) { ++ // // demand that the file is unique for our timestamp (fail if it exists). ++ // flags = flags | O_EXCL; ++ // } + FileDescriptor fd{ + open(filename, flags, static_cast<mode_t>(FLAGS_logfile_mode))}; + if (!fd) return false; +@@ -1112,6 +1125,9 @@ + } + } + #endif ++ Filetime ft; ++ ft.name = string_filename; ++ file_list_.push_back(ft); + // We try to create a symlink called <program_name>.<severity>, + // which is easier to use. (Every time we create a new logfile, + // we destroy the old symlink and create a new one, so it always +@@ -1155,6 +1171,58 @@ + return true; // Everything worked + } + ++void LogFileObject::CheckFileNumQuota() { ++ struct dirent* entry; ++ DIR* dp; ++ ++ const vector<string>& log_dirs = GetLoggingDirectories(); ++ if (log_dirs.size() < 1) return; ++ ++ dp = opendir(log_dirs[0].c_str()); ++ if (dp == nullptr) { ++ fprintf(stderr, "open log dir %s fail\n", log_dirs[0].c_str()); ++ return; ++ } ++ ++ file_list_.clear(); ++ while ((entry = readdir(dp)) != nullptr) { ++ if (DT_DIR == entry->d_type || DT_LNK == entry->d_type) { ++ continue; ++ } ++ std::string filename = std::string(entry->d_name); ++ ++ if (filename.find(symlink_basename_ + '.' + ++ LogSeverityNames[severity_]) == 0) { ++ std::string filepath = log_dirs[0] + "/" + filename; ++ ++ struct stat fstat; ++ if (::stat(filepath.c_str(), &fstat) < 0) { ++ fprintf(stderr, "state %s fail\n", filepath.c_str()); Review Comment: The error message printed when `::stat` fails contains a typo ("state %s fail") which makes the log harder to understand; please change "state" to "stat" to correctly describe the failing system call. ```suggestion + fprintf(stderr, "stat %s fail\n", filepath.c_str()); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
