Copilot commented on code in PR #13328: URL: https://github.com/apache/trafficserver/pull/13328#discussion_r3471707340
########## src/iocore/cache/CacheShmLayout.h: ########## @@ -0,0 +1,105 @@ +/** @file + + Layout of the cache shared-memory control segment, shared between the cache + subsystem and tools (traffic_ctl) that inspect or clear the segment without + going through the running traffic_server. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <string> +#include <string_view> + +namespace cache_shm +{ + +constexpr char CACHE_SHM_MAGIC[8] = {'A', 'T', 'S', '-', 'S', 'H', 'M', '\0'}; +constexpr uint32_t CACHE_SHM_SCHEMA_VERSION = 1; +constexpr std::string_view CACHE_SHM_CONTROL = "control"; + +// macOS PSHMNAMLEN is 31 chars including the leading '/'. Keep names under that +// limit on Linux too, so the same naming works everywhere. +constexpr std::size_t MAX_SHM_NAME_LEN = 31; + +// Maximum number of stripes in the control segment. Bumping it changes the ABI +// hash, so old segments are dropped automatically. +constexpr std::size_t MAX_STRIPES = 256; + +// Per-stripe entry in the control segment. A stripe is matched to its prior +// segment on attach by stripe_key_hash, not by name (order-independent). +struct StripeEntry { + char shm_name[MAX_SHM_NAME_LEN + 1]; ///< full shm name, NUL-terminated. + uint64_t raw_dir_size; ///< size of the stripe's raw_dir segment, bytes. + uint64_t stripe_key_hash; ///< full 64-bit FNV-1a of the stripe hash_text. +}; + +struct CacheShmControl { + char magic[8]; ///< CACHE_SHM_MAGIC + uint32_t schema_version; ///< CACHE_SHM_SCHEMA_VERSION + uint32_t pad0; + uint64_t abi_hash; ///< compile-time ABI fingerprint + uint64_t storage_signature; ///< storage.yaml fingerprint + uint8_t clean_shutdown; ///< 0 = dirty, 1 = clean + uint8_t pad1[3]; + int32_t owner_pid; ///< PID holding the exclusive lock; 0 when none. Backs the + ///< concurrent-attach guard. Cleared on clean shutdown. + uint32_t stripe_count; + uint32_t pad2; + StripeEntry stripes[MAX_STRIPES]; +}; + +constexpr std::size_t CONTROL_SIZE = sizeof(CacheShmControl); + +// Normalize the operator-configured prefix into the full shm name prefix used +// to build segment names. The operator sets only the middle word (e.g. "ats"); +// the framing is supplied here so the leading '/' that POSIX shm_open requires +// and the '-' separating the prefix from the per-object suffix can never be +// mis-typed. Any stray framing carried over from an older config (e.g. a +// literal "/ats-") is trimmed first, so migration can never yield an invalid +// embedded-slash name like "//ats--". An embedded '/' or '-' in the middle is +// preserved; only the framing characters are trimmed. Both the running server +// and traffic_ctl normalize through here so they agree on the same names. +inline std::string +normalize_name_prefix(std::string_view configured) +{ + std::size_t begin = configured.find_first_not_of('/'); + if (begin == std::string_view::npos) { + begin = configured.size(); // all '/' (or empty): no middle. + } + std::size_t last_kept = configured.find_last_not_of('-'); + std::string_view middle = (last_kept == std::string_view::npos || last_kept < begin) ? + std::string_view{} : + configured.substr(begin, last_kept - begin + 1); + return "/" + std::string(middle) + "-"; +} Review Comment: The comment (and current implementation) says an embedded '/' in the configured prefix is preserved, but POSIX shared-memory names must not contain '/' beyond the leading one. Preserving embedded '/' produces names that will fail `shm_open()` with EINVAL (and `traffic_ctl cache shm status` will report it as “not found: Invalid argument”). Consider stripping embedded '/' during normalization (or otherwise rejecting it) so the normalization contract always yields a valid POSIX shm name prefix. ########## src/iocore/cache/CacheShmPurge.h: ########## @@ -0,0 +1,241 @@ +/** @file + + Shared "enumerate and unlink the shm segments for a prefix" primitive, used by + both the cache subsystem (purge-on-disabled-start) and `traffic_ctl cache shm + clear`. Header-only since traffic_ctl does not link the cache library. + purge_segments() does no logging; it returns a report each caller formats itself. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#pragma once + +#include "CacheShmLayout.h" + +#include <fcntl.h> +#include <sys/file.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <csignal> +#include <algorithm> +#include <cerrno> +#include <cstdint> +#include <cstring> +#include <string> +#include <vector> + +namespace cache_shm +{ + +/// True if `pid` names a live process (pid <= 0 is not). EPERM counts as alive (it +/// exists, we just may not signal it). Backs the owner-liveness backstop used where +/// the control-segment flock is not honored. +inline bool +process_is_alive(int32_t pid) +{ + if (pid <= 0) { + return false; + } + return ::kill(static_cast<pid_t>(pid), 0) == 0 || errno == EPERM; +} + +/// Outcome of trying to take the control segment's exclusive lock. +enum class LockResult { + Acquired, ///< We hold the exclusive lock; no other process does. + HeldByOther, ///< Another live process holds it (flock returned EWOULDBLOCK). + Unsupported, ///< flock is not honored for this fd (e.g. macOS POSIX shm). +}; + +/// Take the exclusive, non-blocking advisory lock on the control fd. Authoritative +/// on Linux/tmpfs (auto-released on crash); macOS POSIX shm returns Unsupported, so +/// the owner_pid liveness check is used there instead. On Unsupported the flock errno +/// is reported via `unexpected_errno` (when non-null) so a caller with logging can +/// surface an otherwise-silent failure (EBADF/EINVAL/ENOLCK vs the expected macOS case). +inline LockResult +try_lock_control(int fd, int *unexpected_errno = nullptr) +{ + if (::flock(fd, LOCK_EX | LOCK_NB) == 0) { + return LockResult::Acquired; + } + // EWOULDBLOCK is the only errno meaning "another process holds it"; anything else + // means flock is unusable here -> fall back to the owner_pid backstop. + if (errno == EWOULDBLOCK) { + return LockResult::HeldByOther; + } + if (unexpected_errno != nullptr) { + *unexpected_errno = errno; + } + return LockResult::Unsupported; +} Review Comment: `try_lock_control()` treats every `flock()` failure other than `EWOULDBLOCK` as “Unsupported”. That includes transient `EINTR`, which can cause the concurrent-attach guard to fall back to the owner-pid liveness check even on Linux where `flock` is supposed to be authoritative. Retrying on `EINTR` keeps the guard reliable and avoids a race where `owner_pid` has not been set yet by the real owner. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
