Copilot commented on code in PR #12743: URL: https://github.com/apache/trafficserver/pull/12743#discussion_r2640694146
########## example/cripts/cache_groups.cc: ########## @@ -0,0 +1,106 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#define CRIPTS_CONVENIENCE_APIS 1 + +#include <cripts/CacheGroup.hpp> +#include <cripts/Preamble.hpp> + +do_create_instance() +{ + // Create a cache-group for this site / remap rule(s). They can be shared. + instance.data[0] = cripts::Cache::Group::Manager::Factory("example"); +} + +do_delete_instance() +{ + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + delete static_cast<std::shared_ptr<cripts::Cache::Group> *>(ptr); + instance.data[0] = nullptr; + } +} + +do_cache_lookup() +{ + if (cached.response.lookupstatus != cripts::LookupStatus::MISS) { + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + auto date = cached.response.AsDate("Date"); + + if (date > 0) { Review Comment: The comparison operator should use '!=' instead of '>' for comparing with time_point zero to check validity. Time points don't have a natural ordering relative to "zero" in the same way integers do, and using '>' is semantically unclear. Consider using '!=' to check if the date is valid/not-zero. ```suggestion if (date != 0) { ``` ########## include/cripts/CacheGroup.hpp: ########## @@ -0,0 +1,216 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#pragma once + +#include <unordered_map> +#include <string> +#include <vector> +#include <chrono> +#include <mutex> +#include <shared_mutex> +#include <fstream> +#include <atomic> +#include <memory> +#include <cstdint> + +#include "cripts/Context.hpp" +#include "cripts/Time.hpp" + +// Implemented in the .cc file +int _cripts_cache_group_sync(TSCont cont, TSEvent event, void *edata); + +namespace cripts::Cache +{ + +class Group +{ +private: + using self_type = Group; + + struct _Entry { + cripts::Time::Point timestamp; // Timestamp of when the entry was created + size_t length; // Length of the group ID + uint32_t prefix; // First 4 characters of the group ID + uint64_t hash; // Hash value of the group ID, needed when writing to disk + }; + + // Header structure for on-disk map files (after VERSION field) + struct _MapHeader { + time_t created_ts; + time_t last_write_ts; + time_t last_sync_ts; + size_t count; + }; + + using _MapType = std::unordered_map<uint64_t, _Entry>; + + struct _MapSlot { + std::unique_ptr<_MapType> map; + std::string path; + cripts::Time::Point created; + cripts::Time::Point last_write; + cripts::Time::Point last_sync; + }; + +public: + static constexpr uint64_t VERSION = (static_cast<uint64_t>('C') << 56) | (static_cast<uint64_t>('G') << 48) | + (static_cast<uint64_t>('M') << 40) | (static_cast<uint64_t>('A') << 32) | + (static_cast<uint64_t>('P') << 24) | (static_cast<uint64_t>('S') << 16) | + (static_cast<uint64_t>('0') << 8) | 0x00; // Change this on version bump + + static constexpr std::chrono::seconds DEFAULT_MAX_AGE{63072000}; // 2 Years, max cache lifetime in ATS as well + + Group(const std::string &name, const std::string &base_dir, size_t max_entries = 1024, size_t num_maps = 3) + { + Initialize(name, base_dir, num_maps, max_entries, DEFAULT_MAX_AGE); + } + + // Not used at the moment. + Group() = default; + + ~Group() { WriteToDisk(); } + + Group(const self_type &) = delete; + self_type &operator=(const self_type &) = delete; + + void Initialize(const std::string &name, const std::string &base_dir, size_t num_maps = 3, size_t max_entries = 1024, + std::chrono::seconds max_age = DEFAULT_MAX_AGE); + + void + SetMaxEntries(size_t max_entries) + { + std::unique_lock lock(_mutex); + _max_entries = max_entries; + } + + void + SetMaxAge(std::chrono::seconds max_age) + { + std::unique_lock lock(_mutex); + _max_age = max_age; + } + + void Insert(cripts::string_view key); + void Insert(const std::vector<cripts::string_view> &keys); + bool Lookup(cripts::string_view key, cripts::Time::Point age) const; + bool Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const; + + bool + Lookup(cripts::string_view key, time_t age) const + { + return Lookup(key, cripts::Time::Clock::from_time_t(age)); + } + + bool + Lookup(const std::vector<cripts::string_view> &keys, time_t age) const + { + return Lookup(keys, cripts::Time::Clock::from_time_t(age)); + } + + cripts::Time::Point + LastSync() const + { + std::shared_lock lock(_mutex); + return _last_sync; + } + + void WriteToDisk(); + void LoadFromDisk(); + +private: + mutable std::shared_mutex _mutex; + std::string _name = "CacheGroup"; + size_t _num_maps = 3; + size_t _max_entries = 1024; + std::chrono::seconds _max_age = DEFAULT_MAX_AGE; + std::atomic<size_t> _map_index = 0; Review Comment: The _map_index member is declared as std::atomic but is accessed without atomic operations in multiple places. In the Insert method, it's read and written non-atomically (lines 126, 144), and in the Lookup method, it's read without atomic load (line 176). Since _mutex protects these operations, using std::atomic is unnecessary and misleading. Either use proper atomic operations or change it to a regular size_t since it's already protected by the mutex. ```suggestion size_t _map_index = 0; ``` ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,441 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + ++it; + } else { + // The group has been deleted, remove it from the map + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to set permissions on `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } + } + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + _MapHeader header{}; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&header), sizeof(header)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read header from map file: %s. Skipping this map.", slot.path.c_str()); + continue; + } + + slot.created = cripts::Time::Clock::from_time_t(header.created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(header.last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(header.last_sync_ts); + + for (size_t i = 0; i < header.count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read entry %zu from map file: %s. Stopping entry load.", i, slot.path.c_str()); + break; + } + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast<char *>(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock unique_lock(_mutex); + + _last_sync = cripts::Time::Clock::now(); + for (size_t ix = 0; ix < _slots.size(); ++ix) { + bool need_sync = false; + + if (_slots[ix].last_write > _slots[ix].last_sync) { + _slots[ix].last_sync = _last_sync; + need_sync = true; + } + + if (need_sync) { + syncMap(ix); + } + } + + clearLog(); +} + +// +// Here comes the private member methods, these must never be called without +// already holding an exclusive lock on the mutex. +// + +void +Cache::Group::appendLog(const Cache::Group::_Entry &entry) +{ + if (!_txn_log.is_open() || !_txn_log.good()) { + _txn_log.open(_log_path, std::ios::app | std::ios::out); Review Comment: The transaction log is opened with std::ios::app but never explicitly opened in binary mode. Since the log stores binary Entry structures, it should be opened with std::ios::binary to prevent text-mode transformations on some platforms. This could lead to data corruption on Windows or other systems that distinguish between text and binary modes. ```suggestion _txn_log.open(_log_path, std::ios::app | std::ios::out | std::ios::binary); ``` ########## include/cripts/CacheGroup.hpp: ########## @@ -0,0 +1,216 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#pragma once + +#include <unordered_map> +#include <string> +#include <vector> +#include <chrono> +#include <mutex> +#include <shared_mutex> +#include <fstream> +#include <atomic> +#include <memory> +#include <cstdint> + +#include "cripts/Context.hpp" +#include "cripts/Time.hpp" + +// Implemented in the .cc file +int _cripts_cache_group_sync(TSCont cont, TSEvent event, void *edata); + +namespace cripts::Cache +{ + +class Group Review Comment: Missing documentation for the Cache Groups feature. The code lacks explanation of key concepts like what cache groups are used for, how the rotation mechanism works, what the parameters mean (num_maps, max_entries, max_age), and how the disk persistence works. Consider adding comprehensive documentation including class-level and method-level comments explaining the design and usage patterns. ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,441 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + ++it; + } else { + // The group has been deleted, remove it from the map + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to set permissions on `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } + } + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + _MapHeader header{}; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&header), sizeof(header)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read header from map file: %s. Skipping this map.", slot.path.c_str()); + continue; + } + + slot.created = cripts::Time::Clock::from_time_t(header.created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(header.last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(header.last_sync_ts); + + for (size_t i = 0; i < header.count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read entry %zu from map file: %s. Stopping entry load.", i, slot.path.c_str()); + break; + } + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast<char *>(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock unique_lock(_mutex); + + _last_sync = cripts::Time::Clock::now(); + for (size_t ix = 0; ix < _slots.size(); ++ix) { + bool need_sync = false; + + if (_slots[ix].last_write > _slots[ix].last_sync) { + _slots[ix].last_sync = _last_sync; + need_sync = true; + } + + if (need_sync) { + syncMap(ix); + } + } + + clearLog(); +} + +// +// Here comes the private member methods, these must never be called without +// already holding an exclusive lock on the mutex. +// + +void +Cache::Group::appendLog(const Cache::Group::_Entry &entry) +{ + if (!_txn_log.is_open() || !_txn_log.good()) { + _txn_log.open(_log_path, std::ios::app | std::ios::out); + if (!_txn_log) { + TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.", _log_path.c_str()); + return; + } + } + + _txn_log.write(reinterpret_cast<const char *>(&entry), sizeof(entry)); + _txn_log.flush(); +} + +void +Cache::Group::syncMap(size_t index) +{ + constexpr size_t BUFFER_SIZE = 64 * 1024; + std::array<std::byte, BUFFER_SIZE> buffer; + size_t buf_pos = 0; + bool write_failed = false; + const auto &slot = _slots[index]; + const std::string tmp_path = slot.path + ".tmp"; + std::ofstream o_file(tmp_path, std::ios::binary | std::ios::trunc); + + if (!o_file) { + TSWarning("cripts::Cache::Group: Failed to open temp file for sync: %s.", tmp_path.c_str()); + return; + } + + // Helper lambda to append data to the write buffer + auto _AppendToBuffer = [&](const void *data, size_t size) { + if (write_failed) { + return; + } + if (buf_pos + size > buffer.size()) { + o_file.write(reinterpret_cast<const char *>(buffer.data()), buf_pos); + if (!o_file) { + write_failed = true; + return; + } + buf_pos = 0; + } + std::memcpy(buffer.data() + buf_pos, static_cast<const std::byte *>(data), size); + buf_pos += size; + }; + + _MapHeader header{.created_ts = cripts::Time::Clock::to_time_t(slot.created), + .last_write_ts = cripts::Time::Clock::to_time_t(slot.last_write), + .last_sync_ts = cripts::Time::Clock::to_time_t(slot.last_sync), + .count = slot.map->size()}; + + _AppendToBuffer(&VERSION, sizeof(VERSION)); + _AppendToBuffer(&header, sizeof(header)); + + // Write entries + for (const auto &[_, entry] : *slot.map) { + _AppendToBuffer(&entry, sizeof(entry)); + } + + if (buf_pos > 0 && !write_failed) { + o_file.write(reinterpret_cast<const char *>(buffer.data()), buf_pos); + } + o_file.flush(); + o_file.close(); + + if (write_failed || !o_file) { + TSWarning("cripts::Cache::Group: Failed to write to temp file `%s'.", tmp_path.c_str()); + std::filesystem::remove(tmp_path); + return; + } + + if (std::rename(tmp_path.c_str(), slot.path.c_str()) != 0) { + TSWarning("cripts::Cache::Group: Failed to rename temp file `%s' to `%s'.", tmp_path.c_str(), slot.path.c_str()); + std::filesystem::remove(tmp_path); + } Review Comment: If std::rename fails, the temporary file is removed but the original file may be in an inconsistent state. The error handling should ensure that either the update succeeds completely or the original file remains intact. Consider implementing a more robust error recovery strategy, such as keeping a backup of the original file before attempting the rename. ########## doc/developer-guide/cripts/cripts-misc.en.rst: ########## @@ -414,3 +414,63 @@ Debug logging uses the same format string syntax as ``fmt::format()`` in ``libfm debug tags in your ATS configuration to enable debug output for your Cripts. The default debug tag for Cripts is the name of the Cript itself, either the Cript source file, or the compiled plugin name. + +Cache Groups +============ + +As a way to manage association between cache entries, Cripts provides an infrastructure +for cache groups. A cache group is a set of cache entries that are logically +associated with each other via custom identifiers. + +Example implementation of the Cache Groups RFC + +.. code-block:: cpp + + do_create_instance() + { + // Create a cache-group for this site / remap rule(s). They can be shared. + instance.data[0] = cripts::Cache::Group::Manager::Factory("example_site"); + } + + do_delete_instance() + { + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + delete static_cast<std::shared_ptr<cripts::Cache::Group> *>(ptr); + instance.data[0] = nullptr; + } + } + + do_cache_lookup() + { + if (cached.response.lookupstatus != cripts::LookupStatus::MISS) { + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + auto date = cached.response.AsDate("Date"); + if (date > 0) { + auto cache_groups = cached.response["Cache-Groups"]; + if (!cache_groups.empty()) { + borrow cg = *static_cast<std::shared_ptr<cripts::Cache::Group> *>(ptr); Review Comment: The same comparison pattern appears here. Using '>' to compare a time_t value with 0 works but using '!=' would be more semantically clear to indicate checking for validity rather than ordering. ```suggestion if (date != 0) { auto cache_groups = cached.response["Cache-Groups"]; if (!cache_groups.empty()) { borrow cg = *static_cast<std::shared_ptr<std::shared_ptr<cripts::Cache::Group> *>(ptr); ``` ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,441 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + ++it; + } else { + // The group has been deleted, remove it from the map + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to set permissions on `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } + } + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; Review Comment: The hash collision detection at line 129 only checks prefix and length but doesn't verify the actual key content. While the prefix check provides some collision resistance, two different keys could have the same hash, same length, and same 4-byte prefix, leading to incorrect cache group behavior. Consider either storing the full key or adding a more robust collision detection mechanism. ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,441 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + ++it; + } else { + // The group has been deleted, remove it from the map + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to set permissions on `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } + } + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; Review Comment: The same hash collision issue exists in the Lookup method. The check at line 189 verifies hash, timestamp, length, and prefix, but doesn't confirm the actual key matches. This could result in false positive lookups when hash collisions occur with keys that have matching length and prefix. ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,441 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + ++it; + } else { + // The group has been deleted, remove it from the map + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to set permissions on `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } + } + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + _MapHeader header{}; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&header), sizeof(header)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read header from map file: %s. Skipping this map.", slot.path.c_str()); + continue; + } + + slot.created = cripts::Time::Clock::from_time_t(header.created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(header.last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(header.last_sync_ts); + + for (size_t i = 0; i < header.count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read entry %zu from map file: %s. Stopping entry load.", i, slot.path.c_str()); + break; + } + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast<char *>(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock unique_lock(_mutex); + + _last_sync = cripts::Time::Clock::now(); + for (size_t ix = 0; ix < _slots.size(); ++ix) { + bool need_sync = false; + + if (_slots[ix].last_write > _slots[ix].last_sync) { + _slots[ix].last_sync = _last_sync; + need_sync = true; + } + + if (need_sync) { + syncMap(ix); + } + } + + clearLog(); +} + +// +// Here comes the private member methods, these must never be called without +// already holding an exclusive lock on the mutex. +// + +void +Cache::Group::appendLog(const Cache::Group::_Entry &entry) +{ + if (!_txn_log.is_open() || !_txn_log.good()) { + _txn_log.open(_log_path, std::ios::app | std::ios::out); + if (!_txn_log) { + TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.", _log_path.c_str()); + return; + } + } + + _txn_log.write(reinterpret_cast<const char *>(&entry), sizeof(entry)); + _txn_log.flush(); +} + +void +Cache::Group::syncMap(size_t index) +{ + constexpr size_t BUFFER_SIZE = 64 * 1024; + std::array<std::byte, BUFFER_SIZE> buffer; + size_t buf_pos = 0; + bool write_failed = false; + const auto &slot = _slots[index]; + const std::string tmp_path = slot.path + ".tmp"; + std::ofstream o_file(tmp_path, std::ios::binary | std::ios::trunc); + + if (!o_file) { + TSWarning("cripts::Cache::Group: Failed to open temp file for sync: %s.", tmp_path.c_str()); + return; + } + + // Helper lambda to append data to the write buffer + auto _AppendToBuffer = [&](const void *data, size_t size) { Review Comment: The lambda function name '_AppendToBuffer' uses PascalCase which is inconsistent with C++ naming conventions. Lambda names, if given, should follow the same conventions as regular functions (typically snake_case or camelCase in this codebase). Consider renaming to 'append_to_buffer' or 'appendToBuffer'. ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,441 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + ++it; + } else { + // The group has been deleted, remove it from the map + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to set permissions on `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } + } + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + _MapHeader header{}; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&header), sizeof(header)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read header from map file: %s. Skipping this map.", slot.path.c_str()); + continue; + } + + slot.created = cripts::Time::Clock::from_time_t(header.created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(header.last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(header.last_sync_ts); + + for (size_t i = 0; i < header.count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read entry %zu from map file: %s. Stopping entry load.", i, slot.path.c_str()); + break; + } + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast<char *>(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock unique_lock(_mutex); + + _last_sync = cripts::Time::Clock::now(); + for (size_t ix = 0; ix < _slots.size(); ++ix) { + bool need_sync = false; + + if (_slots[ix].last_write > _slots[ix].last_sync) { + _slots[ix].last_sync = _last_sync; + need_sync = true; + } + + if (need_sync) { + syncMap(ix); + } + } + + clearLog(); +} + +// +// Here comes the private member methods, these must never be called without +// already holding an exclusive lock on the mutex. +// + +void +Cache::Group::appendLog(const Cache::Group::_Entry &entry) +{ + if (!_txn_log.is_open() || !_txn_log.good()) { + _txn_log.open(_log_path, std::ios::app | std::ios::out); + if (!_txn_log) { + TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.", _log_path.c_str()); + return; + } + } + + _txn_log.write(reinterpret_cast<const char *>(&entry), sizeof(entry)); + _txn_log.flush(); +} + +void +Cache::Group::syncMap(size_t index) +{ + constexpr size_t BUFFER_SIZE = 64 * 1024; + std::array<std::byte, BUFFER_SIZE> buffer; + size_t buf_pos = 0; + bool write_failed = false; + const auto &slot = _slots[index]; + const std::string tmp_path = slot.path + ".tmp"; + std::ofstream o_file(tmp_path, std::ios::binary | std::ios::trunc); Review Comment: The variable name 'o_file' is inconsistent with the naming style used elsewhere in the codebase. Other file streams use full names like 'file' or 'log'. Consider renaming to 'tmp_file' or 'temp_file' for better consistency and clarity. ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,441 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + ++it; + } else { + // The group has been deleted, remove it from the map + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to set permissions on `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } + } + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + _MapHeader header{}; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&header), sizeof(header)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read header from map file: %s. Skipping this map.", slot.path.c_str()); + continue; + } + + slot.created = cripts::Time::Clock::from_time_t(header.created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(header.last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(header.last_sync_ts); + + for (size_t i = 0; i < header.count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read entry %zu from map file: %s. Stopping entry load.", i, slot.path.c_str()); + break; + } + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast<char *>(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock unique_lock(_mutex); + + _last_sync = cripts::Time::Clock::now(); + for (size_t ix = 0; ix < _slots.size(); ++ix) { + bool need_sync = false; + + if (_slots[ix].last_write > _slots[ix].last_sync) { + _slots[ix].last_sync = _last_sync; + need_sync = true; + } + + if (need_sync) { + syncMap(ix); + } + } + + clearLog(); +} + +// +// Here comes the private member methods, these must never be called without +// already holding an exclusive lock on the mutex. +// + +void +Cache::Group::appendLog(const Cache::Group::_Entry &entry) +{ + if (!_txn_log.is_open() || !_txn_log.good()) { + _txn_log.open(_log_path, std::ios::app | std::ios::out); + if (!_txn_log) { + TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.", _log_path.c_str()); + return; + } + } + + _txn_log.write(reinterpret_cast<const char *>(&entry), sizeof(entry)); + _txn_log.flush(); +} + +void +Cache::Group::syncMap(size_t index) +{ + constexpr size_t BUFFER_SIZE = 64 * 1024; + std::array<std::byte, BUFFER_SIZE> buffer; + size_t buf_pos = 0; + bool write_failed = false; + const auto &slot = _slots[index]; + const std::string tmp_path = slot.path + ".tmp"; + std::ofstream o_file(tmp_path, std::ios::binary | std::ios::trunc); + + if (!o_file) { + TSWarning("cripts::Cache::Group: Failed to open temp file for sync: %s.", tmp_path.c_str()); + return; + } + + // Helper lambda to append data to the write buffer + auto _AppendToBuffer = [&](const void *data, size_t size) { + if (write_failed) { + return; + } + if (buf_pos + size > buffer.size()) { + o_file.write(reinterpret_cast<const char *>(buffer.data()), buf_pos); + if (!o_file) { + write_failed = true; + return; + } + buf_pos = 0; + } + std::memcpy(buffer.data() + buf_pos, static_cast<const std::byte *>(data), size); + buf_pos += size; + }; + + _MapHeader header{.created_ts = cripts::Time::Clock::to_time_t(slot.created), + .last_write_ts = cripts::Time::Clock::to_time_t(slot.last_write), + .last_sync_ts = cripts::Time::Clock::to_time_t(slot.last_sync), + .count = slot.map->size()}; + + _AppendToBuffer(&VERSION, sizeof(VERSION)); + _AppendToBuffer(&header, sizeof(header)); + + // Write entries + for (const auto &[_, entry] : *slot.map) { + _AppendToBuffer(&entry, sizeof(entry)); + } + + if (buf_pos > 0 && !write_failed) { + o_file.write(reinterpret_cast<const char *>(buffer.data()), buf_pos); + } + o_file.flush(); + o_file.close(); + + if (write_failed || !o_file) { + TSWarning("cripts::Cache::Group: Failed to write to temp file `%s'.", tmp_path.c_str()); + std::filesystem::remove(tmp_path); + return; + } + + if (std::rename(tmp_path.c_str(), slot.path.c_str()) != 0) { + TSWarning("cripts::Cache::Group: Failed to rename temp file `%s' to `%s'.", tmp_path.c_str(), slot.path.c_str()); + std::filesystem::remove(tmp_path); + } +} + +void +Cache::Group::clearLog() +{ + std::error_code ec; + + _txn_log.close(); + std::filesystem::remove(_log_path, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to clear transaction log `%s': %s", _log_path.c_str(), ec.message().c_str()); + } +} + +// Singleton instance for the Cache::Group::Manager +Cache::Group::Manager & +Cache::Group::Manager::_instance() +{ + static Cache::Group::Manager inst; + return inst; +} + +void * +Cache::Group::Manager::Factory(const std::string &name, size_t max_entries, size_t num_maps) +{ + std::lock_guard lock(_instance()._mutex); + auto &groups = _instance()._groups; + + if (auto it = groups.find(name); it != groups.end()) { + if (auto group = it->second.lock()) { + return new std::shared_ptr<Group>(std::move(group)); + } + } + + if (!_instance()._base_dir.empty()) { + auto group = std::make_shared<Group>(name, _instance()._base_dir, max_entries, num_maps); + + groups[name] = group; + return new std::shared_ptr<Group>(std::move(group)); + } else { + TSError("cripts::Cache::Group: Failed to get runtime directory for initialization."); + return nullptr; + } +} + +void +Cache::Group::Manager::_scheduleCont() +{ + if (!_cont) { + _cont = TSContCreate(_cripts_cache_group_sync, TSMutexCreate()); + TSContDataSet(_cont, this); + } + + if (_action) { + TSActionCancel(_action); // Can this even happen ? Review Comment: The comment states "Can this even happen ?" which indicates uncertainty about the logic. If TSActionCancel is called on a valid action and then scheduling a new one, this makes sense. However, the comment suggests the developer is unsure about this scenario. Either clarify the comment to explain when this can occur, or remove it if the logic is straightforward. ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,441 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + ++it; + } else { + // The group has been deleted, remove it from the map + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to set permissions on `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } + } + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + _MapHeader header{}; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&header), sizeof(header)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read header from map file: %s. Skipping this map.", slot.path.c_str()); + continue; + } + + slot.created = cripts::Time::Clock::from_time_t(header.created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(header.last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(header.last_sync_ts); + + for (size_t i = 0; i < header.count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read entry %zu from map file: %s. Stopping entry load.", i, slot.path.c_str()); + break; + } + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast<char *>(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock unique_lock(_mutex); + + _last_sync = cripts::Time::Clock::now(); + for (size_t ix = 0; ix < _slots.size(); ++ix) { + bool need_sync = false; + + if (_slots[ix].last_write > _slots[ix].last_sync) { + _slots[ix].last_sync = _last_sync; + need_sync = true; + } + + if (need_sync) { + syncMap(ix); + } + } + + clearLog(); +} + +// +// Here comes the private member methods, these must never be called without +// already holding an exclusive lock on the mutex. +// + +void +Cache::Group::appendLog(const Cache::Group::_Entry &entry) +{ + if (!_txn_log.is_open() || !_txn_log.good()) { + _txn_log.open(_log_path, std::ios::app | std::ios::out); + if (!_txn_log) { + TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.", _log_path.c_str()); + return; + } + } + + _txn_log.write(reinterpret_cast<const char *>(&entry), sizeof(entry)); + _txn_log.flush(); +} + +void +Cache::Group::syncMap(size_t index) +{ + constexpr size_t BUFFER_SIZE = 64 * 1024; + std::array<std::byte, BUFFER_SIZE> buffer; + size_t buf_pos = 0; + bool write_failed = false; + const auto &slot = _slots[index]; + const std::string tmp_path = slot.path + ".tmp"; + std::ofstream o_file(tmp_path, std::ios::binary | std::ios::trunc); + + if (!o_file) { + TSWarning("cripts::Cache::Group: Failed to open temp file for sync: %s.", tmp_path.c_str()); + return; + } + + // Helper lambda to append data to the write buffer + auto _AppendToBuffer = [&](const void *data, size_t size) { + if (write_failed) { + return; + } + if (buf_pos + size > buffer.size()) { + o_file.write(reinterpret_cast<const char *>(buffer.data()), buf_pos); + if (!o_file) { + write_failed = true; + return; + } + buf_pos = 0; + } + std::memcpy(buffer.data() + buf_pos, static_cast<const std::byte *>(data), size); + buf_pos += size; + }; + + _MapHeader header{.created_ts = cripts::Time::Clock::to_time_t(slot.created), + .last_write_ts = cripts::Time::Clock::to_time_t(slot.last_write), + .last_sync_ts = cripts::Time::Clock::to_time_t(slot.last_sync), + .count = slot.map->size()}; + + _AppendToBuffer(&VERSION, sizeof(VERSION)); + _AppendToBuffer(&header, sizeof(header)); + + // Write entries + for (const auto &[_, entry] : *slot.map) { + _AppendToBuffer(&entry, sizeof(entry)); + } + + if (buf_pos > 0 && !write_failed) { + o_file.write(reinterpret_cast<const char *>(buffer.data()), buf_pos); + } + o_file.flush(); + o_file.close(); + + if (write_failed || !o_file) { + TSWarning("cripts::Cache::Group: Failed to write to temp file `%s'.", tmp_path.c_str()); + std::filesystem::remove(tmp_path); + return; + } + + if (std::rename(tmp_path.c_str(), slot.path.c_str()) != 0) { + TSWarning("cripts::Cache::Group: Failed to rename temp file `%s' to `%s'.", tmp_path.c_str(), slot.path.c_str()); + std::filesystem::remove(tmp_path); + } +} + +void +Cache::Group::clearLog() +{ + std::error_code ec; + + _txn_log.close(); + std::filesystem::remove(_log_path, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to clear transaction log `%s': %s", _log_path.c_str(), ec.message().c_str()); + } +} + +// Singleton instance for the Cache::Group::Manager +Cache::Group::Manager & +Cache::Group::Manager::_instance() +{ + static Cache::Group::Manager inst; + return inst; +} + +void * +Cache::Group::Manager::Factory(const std::string &name, size_t max_entries, size_t num_maps) +{ + std::lock_guard lock(_instance()._mutex); + auto &groups = _instance()._groups; + + if (auto it = groups.find(name); it != groups.end()) { + if (auto group = it->second.lock()) { + return new std::shared_ptr<Group>(std::move(group)); + } + } + + if (!_instance()._base_dir.empty()) { + auto group = std::make_shared<Group>(name, _instance()._base_dir, max_entries, num_maps); + + groups[name] = group; + return new std::shared_ptr<Group>(std::move(group)); + } else { + TSError("cripts::Cache::Group: Failed to get runtime directory for initialization."); + return nullptr; + } +} + +void +Cache::Group::Manager::_scheduleCont() +{ + if (!_cont) { + _cont = TSContCreate(_cripts_cache_group_sync, TSMutexCreate()); + TSContDataSet(_cont, this); + } + + if (_action) { + TSActionCancel(_action); // Can this even happen ? + _action = nullptr; + } + + _action = TSContScheduleEveryOnPool(_cont, _CONT_SYNC_INTERVAL * 1000, TS_THREAD_POOL_TASK); Review Comment: The continuation is scheduled with TSContScheduleEveryOnPool but there's no check whether scheduling succeeded. If _action is nullptr after this call, the continuation won't run and cache groups won't be synced to disk, potentially leading to data loss. Add error checking and logging if scheduling fails. ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,441 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination Review Comment: The comment contains a typo: "contination" should be "continuation". ########## include/cripts/CacheGroup.hpp: ########## @@ -0,0 +1,216 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#pragma once + +#include <unordered_map> +#include <string> +#include <vector> +#include <chrono> +#include <mutex> +#include <shared_mutex> +#include <fstream> +#include <atomic> +#include <memory> +#include <cstdint> + +#include "cripts/Context.hpp" +#include "cripts/Time.hpp" + +// Implemented in the .cc file +int _cripts_cache_group_sync(TSCont cont, TSEvent event, void *edata); + +namespace cripts::Cache +{ + +class Group +{ +private: + using self_type = Group; + + struct _Entry { + cripts::Time::Point timestamp; // Timestamp of when the entry was created + size_t length; // Length of the group ID + uint32_t prefix; // First 4 characters of the group ID + uint64_t hash; // Hash value of the group ID, needed when writing to disk + }; + + // Header structure for on-disk map files (after VERSION field) + struct _MapHeader { + time_t created_ts; + time_t last_write_ts; + time_t last_sync_ts; + size_t count; + }; + + using _MapType = std::unordered_map<uint64_t, _Entry>; + + struct _MapSlot { + std::unique_ptr<_MapType> map; + std::string path; + cripts::Time::Point created; + cripts::Time::Point last_write; + cripts::Time::Point last_sync; + }; + +public: + static constexpr uint64_t VERSION = (static_cast<uint64_t>('C') << 56) | (static_cast<uint64_t>('G') << 48) | + (static_cast<uint64_t>('M') << 40) | (static_cast<uint64_t>('A') << 32) | + (static_cast<uint64_t>('P') << 24) | (static_cast<uint64_t>('S') << 16) | + (static_cast<uint64_t>('0') << 8) | 0x00; // Change this on version bump + + static constexpr std::chrono::seconds DEFAULT_MAX_AGE{63072000}; // 2 Years, max cache lifetime in ATS as well + + Group(const std::string &name, const std::string &base_dir, size_t max_entries = 1024, size_t num_maps = 3) + { + Initialize(name, base_dir, num_maps, max_entries, DEFAULT_MAX_AGE); Review Comment: The Initialize method is called from the constructor with parameters in a different order (num_maps and max_entries are swapped). The constructor at line 78 passes max_entries as the third parameter and num_maps as the fourth, but Initialize expects num_maps third and max_entries fourth. This will cause the values to be assigned incorrectly, leading to unexpected behavior. ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,441 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + ++it; + } else { + // The group has been deleted, remove it from the map + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to set permissions on `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } + } + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + _MapHeader header{}; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&header), sizeof(header)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read header from map file: %s. Skipping this map.", slot.path.c_str()); + continue; + } + + slot.created = cripts::Time::Clock::from_time_t(header.created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(header.last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(header.last_sync_ts); + + for (size_t i = 0; i < header.count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read entry %zu from map file: %s. Stopping entry load.", i, slot.path.c_str()); + break; + } + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast<char *>(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock unique_lock(_mutex); + + _last_sync = cripts::Time::Clock::now(); + for (size_t ix = 0; ix < _slots.size(); ++ix) { + bool need_sync = false; + + if (_slots[ix].last_write > _slots[ix].last_sync) { + _slots[ix].last_sync = _last_sync; + need_sync = true; + } + + if (need_sync) { + syncMap(ix); + } + } + + clearLog(); +} + +// +// Here comes the private member methods, these must never be called without +// already holding an exclusive lock on the mutex. +// + +void +Cache::Group::appendLog(const Cache::Group::_Entry &entry) +{ + if (!_txn_log.is_open() || !_txn_log.good()) { + _txn_log.open(_log_path, std::ios::app | std::ios::out); + if (!_txn_log) { + TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.", _log_path.c_str()); + return; + } + } + + _txn_log.write(reinterpret_cast<const char *>(&entry), sizeof(entry)); + _txn_log.flush(); +} + +void +Cache::Group::syncMap(size_t index) +{ + constexpr size_t BUFFER_SIZE = 64 * 1024; + std::array<std::byte, BUFFER_SIZE> buffer; + size_t buf_pos = 0; + bool write_failed = false; + const auto &slot = _slots[index]; + const std::string tmp_path = slot.path + ".tmp"; + std::ofstream o_file(tmp_path, std::ios::binary | std::ios::trunc); + + if (!o_file) { + TSWarning("cripts::Cache::Group: Failed to open temp file for sync: %s.", tmp_path.c_str()); + return; + } + + // Helper lambda to append data to the write buffer + auto _AppendToBuffer = [&](const void *data, size_t size) { + if (write_failed) { + return; + } + if (buf_pos + size > buffer.size()) { + o_file.write(reinterpret_cast<const char *>(buffer.data()), buf_pos); + if (!o_file) { + write_failed = true; + return; + } + buf_pos = 0; + } + std::memcpy(buffer.data() + buf_pos, static_cast<const std::byte *>(data), size); + buf_pos += size; + }; + + _MapHeader header{.created_ts = cripts::Time::Clock::to_time_t(slot.created), + .last_write_ts = cripts::Time::Clock::to_time_t(slot.last_write), + .last_sync_ts = cripts::Time::Clock::to_time_t(slot.last_sync), + .count = slot.map->size()}; + + _AppendToBuffer(&VERSION, sizeof(VERSION)); + _AppendToBuffer(&header, sizeof(header)); + + // Write entries + for (const auto &[_, entry] : *slot.map) { + _AppendToBuffer(&entry, sizeof(entry)); + } + + if (buf_pos > 0 && !write_failed) { + o_file.write(reinterpret_cast<const char *>(buffer.data()), buf_pos); + } + o_file.flush(); + o_file.close(); + + if (write_failed || !o_file) { + TSWarning("cripts::Cache::Group: Failed to write to temp file `%s'.", tmp_path.c_str()); + std::filesystem::remove(tmp_path); + return; + } + + if (std::rename(tmp_path.c_str(), slot.path.c_str()) != 0) { + TSWarning("cripts::Cache::Group: Failed to rename temp file `%s' to `%s'.", tmp_path.c_str(), slot.path.c_str()); + std::filesystem::remove(tmp_path); + } +} + +void +Cache::Group::clearLog() +{ + std::error_code ec; + + _txn_log.close(); + std::filesystem::remove(_log_path, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to clear transaction log `%s': %s", _log_path.c_str(), ec.message().c_str()); + } +} + +// Singleton instance for the Cache::Group::Manager +Cache::Group::Manager & +Cache::Group::Manager::_instance() +{ + static Cache::Group::Manager inst; + return inst; +} + +void * +Cache::Group::Manager::Factory(const std::string &name, size_t max_entries, size_t num_maps) +{ + std::lock_guard lock(_instance()._mutex); + auto &groups = _instance()._groups; + + if (auto it = groups.find(name); it != groups.end()) { + if (auto group = it->second.lock()) { + return new std::shared_ptr<Group>(std::move(group)); + } + } + + if (!_instance()._base_dir.empty()) { + auto group = std::make_shared<Group>(name, _instance()._base_dir, max_entries, num_maps); + + groups[name] = group; + return new std::shared_ptr<Group>(std::move(group)); Review Comment: Memory allocated by 'new std::shared_ptr' in the Factory method could leak if an exception is thrown before the caller takes ownership. Consider using std::make_unique or returning the shared_ptr by value instead of raw pointer to ensure exception safety. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
