Repository: kudu Updated Branches: refs/heads/master 0348499e3 -> acf093c18
cfile-test: some test micro-optimization to avoid timeouts It turns out that SCOPED_TRACE is relatively slow. Removing some SCOPED_TRACE in hot parts of the test speeds some of the test cases up by almost 2x in ASAN builds on my box. Given that these tests are fairly deterministic given a seed, and have been stable for a long time, leaving the SCOPED_TRACEs in has limited value. I only removed those that are in hot paths rather than removing all SCOPED_TRACEs. I also optimized the "large strings" test to avoid using a large amount of padding via the StringPrintf argument. The new implementation seems to run almost twice as fast on my machine. Comparing the 'CacheTypes/TestCFileBothCacheTypes.TestReadWriteLargeStrings/0' in fast-test ASAN mode before and after: Before: real 0m39.710s user 0m28.808s sys 0m3.828s After: real 0m21.274s user 0m9.824s sys 0m3.832s Change-Id: I951f4ef47c5275ea9743b1be0ff74374498192b1 Reviewed-on: http://gerrit.cloudera.org:8080/3875 Reviewed-by: Adar Dembo <a...@cloudera.com> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/cee7bddf Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/cee7bddf Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/cee7bddf Branch: refs/heads/master Commit: cee7bddf03d6287002d21ffcf4e8330cb08c9c6d Parents: 0348499 Author: Todd Lipcon <t...@apache.org> Authored: Tue Aug 9 10:49:04 2016 -0700 Committer: Todd Lipcon <t...@apache.org> Committed: Tue Aug 9 22:03:58 2016 +0000 ---------------------------------------------------------------------- src/kudu/cfile/cfile-test-base.h | 12 ++++++--- src/kudu/cfile/cfile-test.cc | 47 +++++++++++++++++++++-------------- 2 files changed, 37 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/cee7bddf/src/kudu/cfile/cfile-test-base.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/cfile-test-base.h b/src/kudu/cfile/cfile-test-base.h index 2aa408f..a5e1b9b 100644 --- a/src/kudu/cfile/cfile-test-base.h +++ b/src/kudu/cfile/cfile-test-base.h @@ -20,6 +20,7 @@ #include <glog/logging.h> #include <algorithm> +#include <functional> #include <stdlib.h> #include <string> #include <vector> @@ -229,11 +230,16 @@ template<bool HAS_NULLS> class StringDataGenerator : public DataGenerator<STRING, HAS_NULLS> { public: explicit StringDataGenerator(const char* format) - : format_(format) { + : StringDataGenerator( + [=](size_t x) { return StringPrintf(format, x); }) { + } + + explicit StringDataGenerator(std::function<std::string(size_t)> formatter) + : formatter_(std::move(formatter)) { } Slice BuildTestValue(size_t block_index, size_t value) OVERRIDE { - data_buffers_[block_index] = StringPrintf(format_, value); + data_buffers_[block_index] = formatter_(value); return Slice(data_buffers_[block_index]); } @@ -244,7 +250,7 @@ class StringDataGenerator : public DataGenerator<STRING, HAS_NULLS> { private: std::vector<std::string> data_buffers_; - const char* format_; + std::function<std::string(size_t)> formatter_; }; // Class for generating strings that contain duplicate http://git-wip-us.apache.org/repos/asf/kudu/blob/cee7bddf/src/kudu/cfile/cfile-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/cfile-test.cc b/src/kudu/cfile/cfile-test.cc index 7a54046..0e33283 100644 --- a/src/kudu/cfile/cfile-test.cc +++ b/src/kudu/cfile/cfile-test.cc @@ -169,7 +169,6 @@ class TestCFile : public CFileTestBase { // Read and verify several ColumnBlocks from this point in the file. int read_offset = target; for (int block = 0; block < 3 && iter->HasNext(); block++) { - SCOPED_TRACE(block); size_t n = cb.nrows(); ASSERT_OK_FAST(iter->CopyNextValues(&n, &cb)); ASSERT_EQ(n, std::min(num_entries - read_offset, cb.nrows())); @@ -177,7 +176,6 @@ class TestCFile : public CFileTestBase { // Verify that the block data is correct. generator->Build(read_offset, n); for (size_t j = 0; j < n; ++j) { - SCOPED_TRACE(j); bool expected_null = generator->TestValueShouldBeNull(read_offset + j); ASSERT_EQ(expected_null, cb.is_null(j)); if (!expected_null) { @@ -255,7 +253,14 @@ class TestCFile : public CFileTestBase { ASSERT_EQ(num_entries, count); } - void TestReadWriteStrings(EncodingType encoding, const char* format); + void TestReadWriteStrings(EncodingType encoding) { + TestReadWriteStrings(encoding, [](size_t val) { + return StringPrintf("hello %04zd", val); + }); + } + + void TestReadWriteStrings(EncodingType encoding, + std::function<string(size_t)> formatter); #ifdef NDEBUG void TestWrite100MFileStrings(EncodingType encoding) { @@ -474,12 +479,12 @@ void EncodeStringKey(const Schema &schema, const Slice& key, } void TestCFile::TestReadWriteStrings(EncodingType encoding, - const char* str_format = "hello %04d") { + std::function<string(size_t)> formatter) { Schema schema({ ColumnSchema("key", STRING) }, 1); const int nrows = 10000; BlockId block_id; - StringDataGenerator<false> generator(str_format); + StringDataGenerator<false> generator(formatter); WriteTestFile(&generator, encoding, NO_COMPRESSION, nrows, SMALL_BLOCKSIZE | WRITE_VALIDX, &block_id); @@ -504,7 +509,7 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding, Slice s; CopyOne<STRING>(iter.get(), &s, &arena); - ASSERT_EQ(StringPrintf(str_format, 5000), s.ToString()); + ASSERT_EQ(formatter(5000), s.ToString()); // Seek to last key exactly, should succeed ASSERT_OK(iter->SeekToOrdinal(9999)); @@ -526,8 +531,7 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding, string buf; for (int i = 1; i < 10000; i++) { arena.Reset(); - SCOPED_TRACE(i); - SStringPrintf(&buf, str_format, i - 1); + buf = formatter(i - 1); buf.append(".5"); s = Slice(buf); EncodeStringKey(schema, s, &encoded_key); @@ -535,15 +539,14 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding, ASSERT_FALSE(exact); ASSERT_EQ(i, iter->GetCurrentOrdinal()); CopyOne<STRING>(iter.get(), &s, &arena); - ASSERT_EQ(StringPrintf(str_format, i), s.ToString()); + ASSERT_EQ(formatter(i), s.ToString()); } // Seek exactly to each key // (seek to "hello 0000" through "hello 9999") for (int i = 0; i < 9999; i++) { arena.Reset(); - SCOPED_TRACE(i); - SStringPrintf(&buf, str_format, i); + buf = formatter(i); s = Slice(buf); EncodeStringKey(schema, s, &encoded_key); ASSERT_OK(iter->SeekAtOrAfter(*encoded_key, &exact)); @@ -556,14 +559,14 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding, // after last entry // (seek to "hello 9999.x") - buf = StringPrintf(str_format, 9999) + "x"; + buf = formatter(9999) + ".x"; s = Slice(buf); EncodeStringKey(schema, s, &encoded_key); EXPECT_TRUE(iter->SeekAtOrAfter(*encoded_key, &exact).IsNotFound()); // before first entry // (seek to "hello 000", which falls before "hello 0000") - buf = StringPrintf(str_format, 0); + buf = formatter(0); buf.resize(buf.size() - 1); s = Slice(buf); EncodeStringKey(schema, s, &encoded_key); @@ -571,13 +574,13 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding, EXPECT_FALSE(exact); EXPECT_EQ(0u, iter->GetCurrentOrdinal()); CopyOne<STRING>(iter.get(), &s, &arena); - EXPECT_EQ(StringPrintf(str_format, 0), s.ToString()); + EXPECT_EQ(formatter(0), s.ToString()); // Seek to start of file by ordinal ASSERT_OK(iter->SeekToFirst()); ASSERT_EQ(0u, iter->GetCurrentOrdinal()); CopyOne<STRING>(iter.get(), &s, &arena); - ASSERT_EQ(StringPrintf(str_format, 0), s.ToString()); + ASSERT_EQ(formatter(0), s.ToString()); // Reseek to start and fetch all data. // We fetch in 10 smaller chunks to avoid using too much RAM for the @@ -609,11 +612,17 @@ TEST_P(TestCFileBothCacheTypes, TestReadWriteStringsDictEncoding) { #ifndef THREAD_SANITIZER TEST_P(TestCFileBothCacheTypes, TestReadWriteLargeStrings) { // Pad the values out to a length of ~65KB. - const char* kFormat = "%066000d"; - TestReadWriteStrings(PLAIN_ENCODING, kFormat); + // We use this method instead of just a longer sprintf format since + // this is much more CPU-efficient (speeds up the test). + auto formatter = [](size_t val) { + string ret(66000, '0'); + StringAppendF(&ret, "%010zd", val); + return ret; + }; + TestReadWriteStrings(PLAIN_ENCODING, formatter); if (AllowSlowTests()) { - TestReadWriteStrings(DICT_ENCODING, kFormat); - TestReadWriteStrings(PREFIX_ENCODING, kFormat); + TestReadWriteStrings(DICT_ENCODING, formatter); + TestReadWriteStrings(PREFIX_ENCODING, formatter); } } #endif