Copilot commented on code in PR #13328:
URL: https://github.com/apache/trafficserver/pull/13328#discussion_r3471707340


##########
src/iocore/cache/CacheShmLayout.h:
##########
@@ -0,0 +1,105 @@
+/** @file
+
+  Layout of the cache shared-memory control segment, shared between the cache
+  subsystem and tools (traffic_ctl) that inspect or clear the segment without
+  going through the running traffic_server.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+namespace cache_shm
+{
+
+constexpr char             CACHE_SHM_MAGIC[8]       = {'A', 'T', 'S', '-', 
'S', 'H', 'M', '\0'};
+constexpr uint32_t         CACHE_SHM_SCHEMA_VERSION = 1;
+constexpr std::string_view CACHE_SHM_CONTROL        = "control";
+
+// macOS PSHMNAMLEN is 31 chars including the leading '/'. Keep names under 
that
+// limit on Linux too, so the same naming works everywhere.
+constexpr std::size_t MAX_SHM_NAME_LEN = 31;
+
+// Maximum number of stripes in the control segment. Bumping it changes the ABI
+// hash, so old segments are dropped automatically.
+constexpr std::size_t MAX_STRIPES = 256;
+
+// Per-stripe entry in the control segment. A stripe is matched to its prior
+// segment on attach by stripe_key_hash, not by name (order-independent).
+struct StripeEntry {
+  char     shm_name[MAX_SHM_NAME_LEN + 1]; ///< full shm name, NUL-terminated.
+  uint64_t raw_dir_size;                   ///< size of the stripe's raw_dir 
segment, bytes.
+  uint64_t stripe_key_hash;                ///< full 64-bit FNV-1a of the 
stripe hash_text.
+};
+
+struct CacheShmControl {
+  char     magic[8];       ///< CACHE_SHM_MAGIC
+  uint32_t schema_version; ///< CACHE_SHM_SCHEMA_VERSION
+  uint32_t pad0;
+  uint64_t abi_hash;          ///< compile-time ABI fingerprint
+  uint64_t storage_signature; ///< storage.yaml fingerprint
+  uint8_t  clean_shutdown;    ///< 0 = dirty, 1 = clean
+  uint8_t  pad1[3];
+  int32_t  owner_pid; ///< PID holding the exclusive lock; 0 when none. Backs 
the
+                      ///< concurrent-attach guard. Cleared on clean shutdown.
+  uint32_t    stripe_count;
+  uint32_t    pad2;
+  StripeEntry stripes[MAX_STRIPES];
+};
+
+constexpr std::size_t CONTROL_SIZE = sizeof(CacheShmControl);
+
+// Normalize the operator-configured prefix into the full shm name prefix used
+// to build segment names. The operator sets only the middle word (e.g. "ats");
+// the framing is supplied here so the leading '/' that POSIX shm_open requires
+// and the '-' separating the prefix from the per-object suffix can never be
+// mis-typed. Any stray framing carried over from an older config (e.g. a
+// literal "/ats-") is trimmed first, so migration can never yield an invalid
+// embedded-slash name like "//ats--". An embedded '/' or '-' in the middle is
+// preserved; only the framing characters are trimmed. Both the running server
+// and traffic_ctl normalize through here so they agree on the same names.
+inline std::string
+normalize_name_prefix(std::string_view configured)
+{
+  std::size_t begin = configured.find_first_not_of('/');
+  if (begin == std::string_view::npos) {
+    begin = configured.size(); // all '/' (or empty): no middle.
+  }
+  std::size_t      last_kept = configured.find_last_not_of('-');
+  std::string_view middle    = (last_kept == std::string_view::npos || 
last_kept < begin) ?
+                                 std::string_view{} :
+                                 configured.substr(begin, last_kept - begin + 
1);
+  return "/" + std::string(middle) + "-";
+}

Review Comment:
   The comment (and current implementation) says an embedded '/' in the 
configured prefix is preserved, but POSIX shared-memory names must not contain 
'/' beyond the leading one. Preserving embedded '/' produces names that will 
fail `shm_open()` with EINVAL (and `traffic_ctl cache shm status` will report 
it as “not found: Invalid argument”). Consider stripping embedded '/' during 
normalization (or otherwise rejecting it) so the normalization contract always 
yields a valid POSIX shm name prefix.



##########
src/iocore/cache/CacheShmPurge.h:
##########
@@ -0,0 +1,241 @@
+/** @file
+
+  Shared "enumerate and unlink the shm segments for a prefix" primitive, used 
by
+  both the cache subsystem (purge-on-disabled-start) and `traffic_ctl cache shm
+  clear`. Header-only since traffic_ctl does not link the cache library.
+  purge_segments() does no logging; it returns a report each caller formats 
itself.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+#pragma once
+
+#include "CacheShmLayout.h"
+
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <csignal>
+#include <algorithm>
+#include <cerrno>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <vector>
+
+namespace cache_shm
+{
+
+/// True if `pid` names a live process (pid <= 0 is not). EPERM counts as 
alive (it
+/// exists, we just may not signal it). Backs the owner-liveness backstop used 
where
+/// the control-segment flock is not honored.
+inline bool
+process_is_alive(int32_t pid)
+{
+  if (pid <= 0) {
+    return false;
+  }
+  return ::kill(static_cast<pid_t>(pid), 0) == 0 || errno == EPERM;
+}
+
+/// Outcome of trying to take the control segment's exclusive lock.
+enum class LockResult {
+  Acquired,    ///< We hold the exclusive lock; no other process does.
+  HeldByOther, ///< Another live process holds it (flock returned EWOULDBLOCK).
+  Unsupported, ///< flock is not honored for this fd (e.g. macOS POSIX shm).
+};
+
+/// Take the exclusive, non-blocking advisory lock on the control fd. 
Authoritative
+/// on Linux/tmpfs (auto-released on crash); macOS POSIX shm returns 
Unsupported, so
+/// the owner_pid liveness check is used there instead. On Unsupported the 
flock errno
+/// is reported via `unexpected_errno` (when non-null) so a caller with 
logging can
+/// surface an otherwise-silent failure (EBADF/EINVAL/ENOLCK vs the expected 
macOS case).
+inline LockResult
+try_lock_control(int fd, int *unexpected_errno = nullptr)
+{
+  if (::flock(fd, LOCK_EX | LOCK_NB) == 0) {
+    return LockResult::Acquired;
+  }
+  // EWOULDBLOCK is the only errno meaning "another process holds it"; 
anything else
+  // means flock is unusable here -> fall back to the owner_pid backstop.
+  if (errno == EWOULDBLOCK) {
+    return LockResult::HeldByOther;
+  }
+  if (unexpected_errno != nullptr) {
+    *unexpected_errno = errno;
+  }
+  return LockResult::Unsupported;
+}

Review Comment:
   `try_lock_control()` treats every `flock()` failure other than `EWOULDBLOCK` 
as “Unsupported”. That includes transient `EINTR`, which can cause the 
concurrent-attach guard to fall back to the owner-pid liveness check even on 
Linux where `flock` is supposed to be authoritative. Retrying on `EINTR` keeps 
the guard reliable and avoids a race where `owner_pid` has not been set yet by 
the real owner.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to