Repository: arrow
Updated Branches:
  refs/heads/master 5da6b8795 -> f9d1e1be7


ARROW-1611: [C++] Add BitmapWriter, do not perform out of bounds read in 
BitmapReader when length is 0

close #1133
close #1131

Author: Rene Sugar <rene.su...@gmail.com>

Closes #1137 from wesm/ARROW-1611 and squashes the following commits:

5bff0bd [Rene Sugar] ARROW-1611 Do not read bitmap when length is zero, add 
internal::BitmapWriter


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f9d1e1be
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f9d1e1be
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f9d1e1be

Branch: refs/heads/master
Commit: f9d1e1be756636cb8a280ac3723ed3dea2abe204
Parents: 5da6b87
Author: Rene Sugar <rene.su...@gmail.com>
Authored: Tue Sep 26 21:56:33 2017 -0400
Committer: Wes McKinney <wes.mckin...@twosigma.com>
Committed: Tue Sep 26 21:56:33 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/util/bit-util-test.cc |  36 ++++++++
 cpp/src/arrow/util/bit-util.h       | 139 +++++++++++++++++++++----------
 2 files changed, 132 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/f9d1e1be/cpp/src/arrow/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util-test.cc 
b/cpp/src/arrow/util/bit-util-test.cc
index a5c6cec..5a66d7e 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -91,6 +91,42 @@ TEST(BitmapReader, DoesNotReadOutOfBounds) {
     ASSERT_TRUE(r2.IsNotSet());
     r2.Next();
   }
+
+  // Does not access invalid memory
+  internal::BitmapReader r3(nullptr, 0, 0);
+}
+
+TEST(BitmapWriter, DoesNotWriteOutOfBounds) {
+  uint8_t bitmap[16] = {0};
+
+  const int length = 128;
+
+  int64_t num_values = 0;
+
+  internal::BitmapWriter r1(bitmap, 0, length);
+
+  // If this were to write out of bounds, valgrind would tell us
+  for (int i = 0; i < length; ++i) {
+    r1.Set();
+    r1.Clear();
+    r1.Next();
+  }
+  r1.Finish();
+  num_values = r1.position();
+
+  ASSERT_EQ(length, num_values);
+
+  internal::BitmapWriter r2(bitmap, 5, length - 5);
+
+  for (int i = 0; i < (length - 5); ++i) {
+    r2.Set();
+    r2.Clear();
+    r2.Next();
+  }
+  r2.Finish();
+  num_values = r2.position();
+
+  ASSERT_EQ((length - 5), num_values);
 }
 
 static inline int64_t SlowCountBits(const uint8_t* data, int64_t bit_offset,

http://git-wip-us.apache.org/repos/asf/arrow/blob/f9d1e1be/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index fa0d7a4..a85aff7 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -48,49 +48,6 @@
 #endif
 
 namespace arrow {
-namespace internal {
-
-class BitmapReader {
- public:
-  BitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
-      : bitmap_(bitmap), position_(0), length_(length) {
-    byte_offset_ = start_offset / 8;
-    bit_offset_ = start_offset % 8;
-    current_byte_ = bitmap[byte_offset_];
-  }
-
-#if defined(_MSC_VER)
-  // MSVC is finicky about this cast
-  bool IsSet() const { return (current_byte_ & (1 << bit_offset_)) != 0; }
-#else
-  bool IsSet() const { return current_byte_ & (1 << bit_offset_); }
-#endif
-
-  bool IsNotSet() const { return (current_byte_ & (1 << bit_offset_)) == 0; }
-
-  void Next() {
-    ++bit_offset_;
-    ++position_;
-    if (bit_offset_ == 8) {
-      bit_offset_ = 0;
-      ++byte_offset_;
-      if (ARROW_PREDICT_TRUE(position_ < length_)) {
-        current_byte_ = bitmap_[byte_offset_];
-      }
-    }
-  }
-
- private:
-  const uint8_t* bitmap_;
-  int64_t position_;
-  int64_t length_;
-
-  uint8_t current_byte_;
-  int64_t byte_offset_;
-  int64_t bit_offset_;
-};
-
-}  // namespace internal
 
 #ifndef ARROW_NO_DEPRECATED_API
 
@@ -436,6 +393,102 @@ Status BytesToBits(const std::vector<uint8_t>&, 
MemoryPool*, std::shared_ptr<Buf
 
 }  // namespace BitUtil
 
+namespace internal {
+
+class BitmapReader {
+ public:
+  BitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(bitmap), position_(0), length_(length) {
+    current_byte_ = 0;
+    byte_offset_ = start_offset / 8;
+    bit_offset_ = start_offset % 8;
+    if (length > 0) {
+      current_byte_ = bitmap[byte_offset_];
+    }
+  }
+
+#if defined(_MSC_VER)
+  // MSVC is finicky about this cast
+  bool IsSet() const { return (current_byte_ & (1 << bit_offset_)) != 0; }
+#else
+  bool IsSet() const { return current_byte_ & (1 << bit_offset_); }
+#endif
+
+  bool IsNotSet() const { return (current_byte_ & (1 << bit_offset_)) == 0; }
+
+  void Next() {
+    ++bit_offset_;
+    ++position_;
+    if (bit_offset_ == 8) {
+      bit_offset_ = 0;
+      ++byte_offset_;
+      if (ARROW_PREDICT_TRUE(position_ < length_)) {
+        current_byte_ = bitmap_[byte_offset_];
+      }
+    }
+  }
+
+ private:
+  const uint8_t* bitmap_;
+  int64_t position_;
+  int64_t length_;
+
+  uint8_t current_byte_;
+  int64_t byte_offset_;
+  int64_t bit_offset_;
+};
+
+class BitmapWriter {
+ public:
+  BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(bitmap), position_(0), length_(length) {
+    current_byte_ = 0;
+    byte_offset_ = start_offset / 8;
+    bit_offset_ = start_offset % 8;
+    if (length > 0) {
+      current_byte_ = bitmap[byte_offset_];
+    }
+  }
+
+  void Set() { current_byte_ |= BitUtil::kBitmask[bit_offset_]; }
+
+  void Clear() { current_byte_ &= BitUtil::kFlippedBitmask[bit_offset_]; }
+
+  void Next() {
+    ++bit_offset_;
+    ++position_;
+    bitmap_[byte_offset_] = current_byte_;
+    if (bit_offset_ == 8) {
+      bit_offset_ = 0;
+      ++byte_offset_;
+      if (ARROW_PREDICT_TRUE(position_ < length_)) {
+        current_byte_ = bitmap_[byte_offset_];
+      }
+    }
+  }
+
+  void Finish() {
+    if (ARROW_PREDICT_TRUE(position_ < length_)) {
+      if (bit_offset_ != 0) {
+        bitmap_[byte_offset_] = current_byte_;
+      }
+    }
+  }
+
+  int64_t position() const { return position_; }
+
+ private:
+  uint8_t* bitmap_;
+  int64_t position_;
+  int64_t length_;
+
+  uint8_t current_byte_;
+  int64_t byte_offset_;
+  int64_t bit_offset_;
+};
+
+}  // namespace internal
+
 // ----------------------------------------------------------------------
 // Bitmap utilities
 

Reply via email to