wgtmac commented on code in PR #595: URL: https://github.com/apache/iceberg-cpp/pull/595#discussion_r2975791396
########## src/iceberg/deletes/roaring_position_bitmap.h: ########## @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/deletes/roaring_position_bitmap.h +/// A 64-bit position bitmap using an array of 32-bit Roaring bitmaps. + +#include <cstdint> +#include <functional> +#include <memory> +#include <string> +#include <string_view> + +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" + +namespace iceberg { + +/// \brief A bitmap that supports positive 64-bit positions, optimized +/// for cases where most positions fit in 32 bits. +/// +/// Incoming 64-bit positions are divided into a 32-bit "key" using the +/// most significant 4 bytes and a 32-bit position using the least +/// significant 4 bytes. For each key, a 32-bit Roaring bitmap is +/// maintained to store positions for that key. +/// +/// \note The Puffin deletion-vector-v1 wrapping (length prefix, magic +/// bytes, CRC-32) is handled by the Puffin writer/reader layer, not +/// this class. +class ICEBERG_EXPORT RoaringPositionBitmap { + public: + /// \brief Maximum supported position. + static constexpr int64_t kMaxPosition = 0x7FFFFFFE80000000LL; + + RoaringPositionBitmap(); + ~RoaringPositionBitmap(); + + RoaringPositionBitmap(RoaringPositionBitmap&& other) noexcept; + RoaringPositionBitmap& operator=(RoaringPositionBitmap&& other) noexcept; + + RoaringPositionBitmap(const RoaringPositionBitmap&) = delete; + RoaringPositionBitmap& operator=(const RoaringPositionBitmap&) = delete; + + /// \brief Sets a position in the bitmap. + /// \param pos the position (must be >= 0 and <= kMaxPosition) + /// \return Status indicating success or InvalidArgument error + [[nodiscard]] Status Add(int64_t pos); + + /// \brief Sets a range of positions [pos_start, pos_end). + /// \return Status indicating success or InvalidArgument error + [[nodiscard]] Status AddRange(int64_t pos_start, int64_t pos_end); + + /// \brief Checks if a position is set in the bitmap. + /// \return Result<bool> or InvalidArgument error Review Comment: We haven't added OutOfRange error kind and so does Arrow C++. This is consistent with the Java impl below: ```java private static void validatePosition(long pos) { Preconditions.checkArgument( pos >= 0 && pos <= MAX_POSITION, "Bitmap supports positions that are >= 0 and <= %s: %s", MAX_POSITION, pos); } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
