Author: mikhail.naga...@gmail.com Date: Thu Jun 11 07:08:34 2009 New Revision: 2147
Modified: branches/bleeding_edge/src/log-utils.cc branches/bleeding_edge/src/log-utils.h branches/bleeding_edge/src/log.cc branches/bleeding_edge/src/log.h branches/bleeding_edge/src/utils.h branches/bleeding_edge/test/cctest/test-log-utils.cc Log: Implement tick events compression in a log file. Two techniques are involved: - compress repeated line ends (common stack beginnings) by using back references; - do RLE compression of repeated tick events. This gives only 5% size reduction on benchmarks run, but this is because tick events are only comprise 10% of file size. Under Chromium winnings are bigger because long repeated samples of idleness are now compressed into a single line. Tickprocessor will be updated in the next patch. Review URL: http://codereview.chromium.org/123012 Modified: branches/bleeding_edge/src/log-utils.cc ============================================================================== --- branches/bleeding_edge/src/log-utils.cc (original) +++ branches/bleeding_edge/src/log-utils.cc Thu Jun 11 07:08:34 2009 @@ -123,15 +123,22 @@ Log::WritePtr Log::Write = NULL; FILE* Log::output_handle_ = NULL; LogDynamicBuffer* Log::output_buffer_ = NULL; -// Must be the same message as in Logger::PauseProfiler +// Must be the same message as in Logger::PauseProfiler. const char* Log::kDynamicBufferSeal = "profiler,\"pause\"\n"; Mutex* Log::mutex_ = NULL; char* Log::message_buffer_ = NULL; +LogRecordCompressor* Log::record_compressor_ = NULL; +// Must be the same as compressed tick event name from Logger. +const char* Log::kCompressedTickEventName = "t,"; void Log::Init() { mutex_ = OS::CreateMutex(); message_buffer_ = NewArray<char>(kMessageBufferSize); + if (FLAG_compress_log) { + record_compressor_ = new LogRecordCompressor( + kRecordCompressorWindow, strlen(kCompressedTickEventName)); + } } @@ -173,6 +180,12 @@ } Write = NULL; + delete record_compressor_; + record_compressor_ = NULL; + + DeleteArray(message_buffer_); + message_buffer_ = NULL; + delete mutex_; mutex_ = NULL; @@ -280,6 +293,29 @@ } +bool LogMessageBuilder::StoreInCompressor() { + if (!FLAG_compress_log) return true; + ASSERT(Log::record_compressor_ != NULL); + return Log::record_compressor_->Store( + Vector<const char>(Log::message_buffer_, pos_)); +} + + +bool LogMessageBuilder::RetrieveCompressedPrevious(const char* prefix) { + if (!FLAG_compress_log) return true; + ASSERT(Log::record_compressor_ != NULL); + pos_ = 0; + if (prefix[0] != '\0') Append(prefix); + Vector<char> prev_record(Log::message_buffer_ + pos_, + Log::kMessageBufferSize - pos_); + const bool has_previous = + Log::record_compressor_->RetrievePreviousCompressed(&prev_record); + if (!has_previous) return false; + pos_ += prev_record.length(); + return true; +} + + void LogMessageBuilder::WriteToLogFile() { ASSERT(pos_ <= Log::kMessageBufferSize); const int written = Log::Write(Log::message_buffer_, pos_); @@ -295,6 +331,105 @@ if (written != len && write_failure_handler != NULL) { write_failure_handler(); } +} + + +// Formatting string for back references. E.g. "#2:10" means +// "the second line above, start from char 10 (0-based)". +const char* LogRecordCompressor::kBackwardReferenceFormat = "#%d:%d"; + + +LogRecordCompressor::~LogRecordCompressor() { + for (int i = 0; i < buffer_.length(); ++i) { + buffer_[i].Dispose(); + } +} + + +bool LogRecordCompressor::Store(const Vector<const char>& record) { + // Check if the record is the same as the last stored one. + if (curr_ != -1) { + Vector<const char>& curr = buffer_[curr_]; + if (record.length() == curr.length() + && strncmp(record.start(), curr.start(), record.length()) == 0) { + return false; + } + } + // buffer_ is circular. + prev_ = curr_++; + curr_ %= buffer_.length(); + Vector<char> record_copy = Vector<char>::New(record.length()); + memcpy(record_copy.start(), record.start(), record.length()); + buffer_[curr_].Dispose(); + buffer_[curr_] = + Vector<const char>(record_copy.start(), record_copy.length()); + return true; +} + + +bool LogRecordCompressor::RetrievePreviousCompressed( + Vector<char>* prev_record) { + if (prev_ == -1) return false; + + int index = prev_; + // Distance from prev_. + int distance = 0; + // Best compression result among records in the buffer. + struct { + intptr_t truncated_len; + int distance; + int copy_from_pos; + } best = {-1, 0, 0}; + Vector<const char>& prev = buffer_[prev_]; + const char* const prev_start = prev.start() + start_pos_; + const char* const prev_end = prev.start() + prev.length(); + do { + // We're moving backwards until we reach the current record. + // Remember that buffer_ is circular. + if (--index == -1) index = buffer_.length() - 1; + ++distance; + if (index == curr_) break; + + Vector<const char>& data = buffer_[index]; + if (data.start() == NULL) break; + const char* const data_end = data.start() + data.length(); + const char* prev_ptr = prev_end; + const char* data_ptr = data_end; + // Compare strings backwards, stop on the last matching character. + while (prev_ptr != prev_start && data_ptr != data.start() + && *(prev_ptr - 1) == *(data_ptr - 1)) { + --prev_ptr; + --data_ptr; + } + const intptr_t truncated_len = prev_end - prev_ptr; + if (truncated_len < kUncompressibleBound) continue; + + // Record compression results. + if (truncated_len > best.truncated_len) { + best.truncated_len = truncated_len; + best.distance = distance; + best.copy_from_pos = data_ptr - data.start(); + } + } while (true); + + if (best.distance == 0) { + // Can't compress the previous record. Return as is. + ASSERT(prev_record->length() >= prev.length()); + memcpy(prev_record->start(), prev.start(), prev.length()); + prev_record->Truncate(prev.length()); + } else { + // Copy the uncompressible part unchanged. + const intptr_t unchanged_len = prev.length() - best.truncated_len; + ASSERT(prev_record->length() >= unchanged_len + kUncompressibleBound); + memcpy(prev_record->start(), prev.start(), unchanged_len); + // Append the backward reference. + Vector<char> patch(prev_record->start() + unchanged_len, + kUncompressibleBound); + OS::SNPrintF(patch, kBackwardReferenceFormat, + best.distance, best.copy_from_pos); + prev_record->Truncate(unchanged_len + strlen(patch.start())); + } + return true; } #endif // ENABLE_LOGGING_AND_PROFILING Modified: branches/bleeding_edge/src/log-utils.h ============================================================================== --- branches/bleeding_edge/src/log-utils.h (original) +++ branches/bleeding_edge/src/log-utils.h Thu Jun 11 07:08:34 2009 @@ -88,6 +88,48 @@ }; +// An utility class for performing backward reference compression +// of string ends. It operates using a window of previous strings. +class LogRecordCompressor { + public: + // 'window_size' is the size of backward lookup window. + // 'start_pos' is the length of string prefix that must be left uncompressed. + LogRecordCompressor(int window_size, int start_pos) + : buffer_(window_size), start_pos_(start_pos), curr_(-1), prev_(-1) { + // Must have place for the current record and the previous. + ASSERT(window_size >= 2); + } + ~LogRecordCompressor(); + + // Fills vector with a compressed version of the previous record. + // Returns false if there is no previous record. + bool RetrievePreviousCompressed(Vector<char>* prev_record); + + // Stores a record if it differs from a previous one (or there's no previous). + // Returns true, if the record has been stored. + bool Store(const Vector<const char>& record); + + private: + // Record field separator. It is assumed that it can't appear + // inside a field (a simplified version of CSV format). + static const char kFieldSeparator = ','; + + // Formatting string for back references. + static const char* kBackwardReferenceFormat; + + // A boundary value. If current record only differs in + // kUncompressibleBound chars, don't compress it. + // The value must be greater than the length of maximum + // projected backward reference length. + static const intptr_t kUncompressibleBound = 10; + + ScopedVector< Vector<const char> > buffer_; + const int start_pos_; + int curr_; + int prev_; +}; + + // Functions and data for performing output of log messages. class Log : public AllStatic { public: @@ -114,6 +156,9 @@ return !is_stopped_ && (output_handle_ != NULL || output_buffer_ != NULL); } + // Size of buffer used for record compression. + static const int kRecordCompressorWindow = 6; + private: typedef int (*WritePtr)(const char* msg, int length); @@ -169,6 +214,11 @@ // mutex_ should be acquired before using it. static char* message_buffer_; + // An utility object for compressing repeated parts of records. + static LogRecordCompressor* record_compressor_; + + static const char* kCompressedTickEventName; + friend class LogMessageBuilder; }; @@ -195,6 +245,19 @@ void Append(String* str); void AppendDetailed(String* str, bool show_impl_info); + + // Stores log message into compressor, returns true if the message + // was stored (i.e. doesn't repeat the previous one). If log compression is + // disabled, does nothing and returns true. + bool StoreInCompressor(); + + // Sets log message to a previous version of compressed message. + // Returns false, if there is no previous message. If log compression + // is disabled, does nothing and retuns true. + bool RetrieveCompressedPrevious() { return RetrieveCompressedPrevious(""); } + + // Does the same at the version without arguments, and sets a prefix. + bool RetrieveCompressedPrevious(const char* prefix); // Write the log message to the log file currently opened. void WriteToLogFile(); Modified: branches/bleeding_edge/src/log.cc ============================================================================== --- branches/bleeding_edge/src/log.cc (original) +++ branches/bleeding_edge/src/log.cc Thu Jun 11 07:08:34 2009 @@ -303,6 +303,7 @@ VMState Logger::bottom_state_(EXTERNAL); SlidingStateWindow* Logger::sliding_state_window_ = NULL; const char** Logger::log_events_ = NULL; +int Logger::tick_repeat_count_ = 0; #define DECLARE_LONG_EVENT(ignore1, long_name, ignore2) long_name, const char* kLongLogEventsNames[Logger::NUMBER_OF_LOG_EVENTS] = { @@ -326,6 +327,9 @@ if (!Log::IsEnabled()) return; LogMessageBuilder msg; msg.Append("profiler,\"begin\",%d\n", kSamplingIntervalMs); + if (FLAG_compress_log) { + msg.Append("profiler,\"compression\",%d\n", Log::kRecordCompressorWindow); + } msg.WriteToLogFile(); } @@ -848,6 +852,28 @@ msg.Append(",0x%" V8PRIxPTR, sample->stack[i]); } } + // In case if log compression is disabled, the flow is straightforward, + // because both StoreInCompressor and RetrieveCompressedPrevious do nothing + // and just return 'true'. Otherwise, to perform compression of repeated + // tick events, instead of the current event, the previous one is written. + if (!msg.StoreInCompressor()) { + // Current message repeats the previous one, don't write it. + ++tick_repeat_count_; + return; + } + + bool has_previous_message; + if (tick_repeat_count_ > 0) { + EmbeddedVector<char, 20> prefix; + OS::SNPrintF(prefix, "%s,%d,", + log_events_[REPEAT_META_EVENT], + tick_repeat_count_ + 1); + tick_repeat_count_ = 0; + has_previous_message = msg.RetrieveCompressedPrevious(prefix.start()); + } else { + has_previous_message = msg.RetrieveCompressedPrevious(); + } + if (!has_previous_message) return; msg.Append('\n'); msg.WriteToLogFile(); } Modified: branches/bleeding_edge/src/log.h ============================================================================== --- branches/bleeding_edge/src/log.h (original) +++ branches/bleeding_edge/src/log.h Thu Jun 11 07:08:34 2009 @@ -107,6 +107,7 @@ V(CODE_MOVE_EVENT, "code-move", "cm") \ V(CODE_DELETE_EVENT, "code-delete", "cd") \ V(TICK_EVENT, "tick", "t") \ + V(REPEAT_META_EVENT, "repeat", "r") \ V(BUILTIN_TAG, "Builtin", "bi") \ V(CALL_DEBUG_BREAK_TAG, "CallDebugBreak", "cdb") \ V(CALL_DEBUG_PREPARE_STEP_IN_TAG, "CallDebugPrepareStepIn", "cdbsi") \ @@ -298,6 +299,9 @@ // An array of log events names. static const char** log_events_; + + // Counter for repeated tick events. + static int tick_repeat_count_; // Internal implementation classes with access to // private members. Modified: branches/bleeding_edge/src/utils.h ============================================================================== --- branches/bleeding_edge/src/utils.h (original) +++ branches/bleeding_edge/src/utils.h Thu Jun 11 07:08:34 2009 @@ -362,6 +362,11 @@ Sort(PointerValueCompare<T>); } + void Truncate(int length) { + ASSERT(length <= length_); + length_ = length; + } + // Releases the array underlying this vector. Once disposed the // vector is empty. void Dispose() { Modified: branches/bleeding_edge/test/cctest/test-log-utils.cc ============================================================================== --- branches/bleeding_edge/test/cctest/test-log-utils.cc (original) +++ branches/bleeding_edge/test/cctest/test-log-utils.cc Thu Jun 11 07:08:34 2009 @@ -9,8 +9,12 @@ #include "log-utils.h" #include "cctest.h" +using v8::internal::CStrVector; using v8::internal::EmbeddedVector; using v8::internal::LogDynamicBuffer; +using v8::internal::LogRecordCompressor; +using v8::internal::MutableCStrVector; +using v8::internal::ScopedVector; using v8::internal::Vector; // Fills 'ref_buffer' with test data: a sequence of two-digit @@ -47,9 +51,13 @@ const Vector<V>& value) { if (expected.length() != value.length()) { V8_Fatal(file, line, "CHECK_EQ(%s, %s) failed\n" - "# Vectors lengths differ: %d expected, %d found", + "# Vectors lengths differ: %d expected, %d found\n" + "# Expected: %.*s\n" + "# Found: %.*s", expected_source, value_source, - expected.length(), value.length()); + expected.length(), value.length(), + expected.length(), expected.start(), + value.length(), value.start()); } if (strncmp(expected.start(), value.start(), expected.length()) != 0) { V8_Fatal(file, line, "CHECK_EQ(%s, %s) failed\n" @@ -124,9 +132,161 @@ // Check the seal. EmbeddedVector<char, 50> seal_buf; CHECK_EQ(seal_size, ReadData(&dynabuf, 100, &seal_buf)); - CHECK_EQ(v8::internal::CStrVector(seal), seal_buf.SubVector(0, seal_size)); + CHECK_EQ(CStrVector(seal), seal_buf.SubVector(0, seal_size)); // Verify that there's no data beyond the seal. CHECK_EQ(0, ReadData(&dynabuf, 100 + seal_size, &buf)); +} + + +TEST(CompressorStore) { + LogRecordCompressor comp(2, 0); + const Vector<const char> empty = CStrVector(""); + CHECK(comp.Store(empty)); + CHECK(!comp.Store(empty)); + CHECK(!comp.Store(empty)); + const Vector<const char> aaa = CStrVector("aaa"); + CHECK(comp.Store(aaa)); + CHECK(!comp.Store(aaa)); + CHECK(!comp.Store(aaa)); + CHECK(comp.Store(empty)); + CHECK(!comp.Store(empty)); + CHECK(!comp.Store(empty)); +} + + +void CheckCompression(LogRecordCompressor* comp, + const Vector<const char>& after) { + EmbeddedVector<char, 100> result; + CHECK(comp->RetrievePreviousCompressed(&result)); + CHECK_EQ(after, result); +} + + +void CheckCompression(LogRecordCompressor* comp, + const char* after) { + CheckCompression(comp, CStrVector(after)); +} + + +TEST(CompressorNonCompressed) { + LogRecordCompressor comp(2, 0); + CHECK(!comp.RetrievePreviousCompressed(NULL)); + const Vector<const char> empty = CStrVector(""); + CHECK(comp.Store(empty)); + CHECK(!comp.RetrievePreviousCompressed(NULL)); + const Vector<const char> a_x_20 = CStrVector("aaaaaaaaaaaaaaaaaaaa"); + CHECK(comp.Store(a_x_20)); + CheckCompression(&comp, empty); + CheckCompression(&comp, empty); + CHECK(comp.Store(empty)); + CheckCompression(&comp, a_x_20); + CheckCompression(&comp, a_x_20); +} + + +TEST(CompressorSingleLine) { + LogRecordCompressor comp(3, strlen("xxx,")); + const Vector<const char> string_1 = CStrVector("eee,ddd,ccc,bbb,aaa"); + CHECK(comp.Store(string_1)); + const Vector<const char> string_2 = CStrVector("fff,ddd,ccc,bbb,aaa"); + CHECK(comp.Store(string_2)); + // string_1 hasn't been compressed. + CheckCompression(&comp, string_1); + CheckCompression(&comp, string_1); + const Vector<const char> string_3 = CStrVector("hhh,ggg,ccc,bbb,aaa"); + CHECK(comp.Store(string_3)); + // string_2 compressed using string_1. + CheckCompression(&comp, "fff,#1:4"); + CheckCompression(&comp, "fff,#1:4"); + CHECK(!comp.Store(string_3)); + // Expecting no changes. + CheckCompression(&comp, "fff,#1:4"); + CHECK(!comp.Store(string_3)); + // Expecting no changes. + CheckCompression(&comp, "fff,#1:4"); + const Vector<const char> string_4 = CStrVector("iii,hhh,ggg,ccc,bbb,aaa"); + CHECK(comp.Store(string_4)); + // string_3 compressed using string_2. + CheckCompression(&comp, "hhh,ggg#1:7"); + const Vector<const char> string_5 = CStrVector("nnn,mmm,lll,kkk,jjj"); + CHECK(comp.Store(string_5)); + // string_4 compressed using string_3. + CheckCompression(&comp, "iii,#1:0"); + const Vector<const char> string_6 = CStrVector("nnn,mmmmmm,lll,kkk,jjj"); + CHECK(comp.Store(string_6)); + // string_5 hasn't been compressed. + CheckCompression(&comp, string_5); + CHECK(comp.Store(string_5)); + // string_6 compressed using string_5. + CheckCompression(&comp, "nnn,mmm#1:4"); + const Vector<const char> string_7 = CStrVector("nnnnnn,mmm,lll,kkk,jjj"); + CHECK(comp.Store(string_7)); + // string_5 compressed using string_6. + CheckCompression(&comp, "nnn,#1:7"); + const Vector<const char> string_8 = CStrVector("xxn,mmm,lll,kkk,jjj"); + CHECK(comp.Store(string_8)); + // string_7 compressed using string_5. + CheckCompression(&comp, "nnnn#1:1"); + const Vector<const char> string_9 = + CStrVector("aaaaaaaaaaaaa,bbbbbbbbbbbbbbbbb"); + CHECK(comp.Store(string_9)); + // string_8 compressed using string_7. + CheckCompression(&comp, "xxn,#1:7"); + const Vector<const char> string_10 = + CStrVector("aaaaaaaaaaaaa,cccccccbbbbbbbbbb"); + CHECK(comp.Store(string_10)); + // string_9 hasn't been compressed. + CheckCompression(&comp, string_9); + CHECK(comp.Store(string_1)); + // string_10 compressed using string_9. + CheckCompression(&comp, "aaaaaaaaaaaaa,ccccccc#1:21"); +} + + + +TEST(CompressorMultiLines) { + const int kWindowSize = 5; + LogRecordCompressor comp(kWindowSize, strlen("xxx,")); + const Vector<const char> string_1 = CStrVector("eee,ddd,ccc,bbb,aaa"); + CHECK(comp.Store(string_1)); + const Vector<const char> string_2 = CStrVector("iii,hhh,ggg,fff,aaa"); + CHECK(comp.Store(string_2)); + const Vector<const char> string_3 = CStrVector("mmm,lll,kkk,jjj,aaa"); + CHECK(comp.Store(string_3)); + const Vector<const char> string_4 = CStrVector("nnn,hhh,ggg,fff,aaa"); + CHECK(comp.Store(string_4)); + const Vector<const char> string_5 = CStrVector("ooo,lll,kkk,jjj,aaa"); + CHECK(comp.Store(string_5)); + // string_4 compressed using string_2. + CheckCompression(&comp, "nnn,#2:4"); + CHECK(comp.Store(string_1)); + // string_5 compressed using string_3. + CheckCompression(&comp, "ooo,#2:4"); + CHECK(comp.Store(string_4)); + // string_1 is out of buffer by now, so it shouldn't be compressed. + CHECK_GE(5, kWindowSize); + CheckCompression(&comp, string_1); + CHECK(comp.Store(string_2)); + // string_4 compressed using itself. + CheckCompression(&comp, "nnn,#3:4"); +} + + +TEST(CompressorBestSelection) { + LogRecordCompressor comp(5, strlen("xxx,")); + const Vector<const char> string_1 = CStrVector("eee,ddd,ccc,bbb,aaa"); + CHECK(comp.Store(string_1)); + const Vector<const char> string_2 = CStrVector("ddd,ccc,bbb,aaa"); + CHECK(comp.Store(string_2)); + const Vector<const char> string_3 = CStrVector("fff,eee,ddd,ccc,bbb,aaa"); + CHECK(comp.Store(string_3)); + // string_2 compressed using string_1. + CheckCompression(&comp, "ddd,#1:8"); + const Vector<const char> string_4 = CStrVector("nnn,hhh,ggg,fff,aaa"); + CHECK(comp.Store(string_4)); + // Compressing string_3 using string_1 gives a better compression than + // using string_2. + CheckCompression(&comp, "fff,#2:0"); } #endif // ENABLE_LOGGING_AND_PROFILING --~--~---------~--~----~------------~-------~--~----~ v8-dev mailing list v8-dev@googlegroups.com http://groups.google.com/group/v8-dev -~----------~----~----~----~------~----~------~--~---