pitrou commented on code in PR #37792:
URL: https://github.com/apache/arrow/pull/37792#discussion_r1338137857
##########
cpp/src/arrow/type.h:
##########
@@ -710,6 +717,120 @@ class ARROW_EXPORT BinaryType : public BaseBinaryType {
explicit BinaryType(Type::type logical_type) : BaseBinaryType(logical_type)
{}
};
+/// \brief Concrete type class for variable-size binary view data
+class ARROW_EXPORT BinaryViewType : public DataType {
+ public:
+ static constexpr Type::type type_id = Type::BINARY_VIEW;
+ static constexpr bool is_utf8 = false;
+ using PhysicalType = BinaryViewType;
+
+ static constexpr int kSize = 16;
+ static constexpr int kInlineSize = 12;
+ static constexpr int kPrefixSize = 4;
+
+ /// Variable length string or binary with inline optimization for small
values (12 bytes
+ /// or fewer). This is similar to std::string_view except limited in size to
INT32_MAX
+ /// and at least the first four bytes of the string are copied inline
(accessible
+ /// without pointer dereference). This inline prefix allows failing
comparisons early
+ /// and can reduce the CPU cache working set when dealing with short strings.
+ ///
+ /// This union supports three states:
+ ///
+ /// Short string |----|----|--------|
+ /// ^ ^ ^
+ /// | | |
+ /// size prefix remaining in-line portion, zero padded
+ ///
+ /// Long string |----|----|--------|
+ /// ^ ^ ^
+ /// | | |
+ /// size prefix raw pointer to out-of-line portion
Review Comment:
Thank you!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]