Author: mikhail.naga...@gmail.com
Date: Thu Jun 11 07:08:34 2009
New Revision: 2147

Modified:
    branches/bleeding_edge/src/log-utils.cc
    branches/bleeding_edge/src/log-utils.h
    branches/bleeding_edge/src/log.cc
    branches/bleeding_edge/src/log.h
    branches/bleeding_edge/src/utils.h
    branches/bleeding_edge/test/cctest/test-log-utils.cc

Log:
Implement tick events compression in a log file.

Two techniques are involved:
  - compress repeated line ends (common stack beginnings) by using back  
references;
  - do RLE compression of repeated tick events.

This gives only 5% size reduction on benchmarks run, but this is because  
tick events are only comprise 10% of file size. Under Chromium winnings are  
bigger because long repeated samples of idleness are now compressed into a  
single line.

Tickprocessor will be updated in the next patch.

Review URL: http://codereview.chromium.org/123012


Modified: branches/bleeding_edge/src/log-utils.cc
==============================================================================
--- branches/bleeding_edge/src/log-utils.cc     (original)
+++ branches/bleeding_edge/src/log-utils.cc     Thu Jun 11 07:08:34 2009
@@ -123,15 +123,22 @@
  Log::WritePtr Log::Write = NULL;
  FILE* Log::output_handle_ = NULL;
  LogDynamicBuffer* Log::output_buffer_ = NULL;
-// Must be the same message as in Logger::PauseProfiler
+// Must be the same message as in Logger::PauseProfiler.
  const char* Log::kDynamicBufferSeal = "profiler,\"pause\"\n";
  Mutex* Log::mutex_ = NULL;
  char* Log::message_buffer_ = NULL;
+LogRecordCompressor* Log::record_compressor_ = NULL;
+// Must be the same as compressed tick event name from Logger.
+const char* Log::kCompressedTickEventName = "t,";


  void Log::Init() {
    mutex_ = OS::CreateMutex();
    message_buffer_ = NewArray<char>(kMessageBufferSize);
+  if (FLAG_compress_log) {
+    record_compressor_ = new LogRecordCompressor(
+        kRecordCompressorWindow, strlen(kCompressedTickEventName));
+  }
  }


@@ -173,6 +180,12 @@
    }
    Write = NULL;

+  delete record_compressor_;
+  record_compressor_ = NULL;
+
+  DeleteArray(message_buffer_);
+  message_buffer_ = NULL;
+
    delete mutex_;
    mutex_ = NULL;

@@ -280,6 +293,29 @@
  }


+bool LogMessageBuilder::StoreInCompressor() {
+  if (!FLAG_compress_log) return true;
+  ASSERT(Log::record_compressor_ != NULL);
+  return Log::record_compressor_->Store(
+      Vector<const char>(Log::message_buffer_, pos_));
+}
+
+
+bool LogMessageBuilder::RetrieveCompressedPrevious(const char* prefix) {
+  if (!FLAG_compress_log) return true;
+  ASSERT(Log::record_compressor_ != NULL);
+  pos_ = 0;
+  if (prefix[0] != '\0') Append(prefix);
+  Vector<char> prev_record(Log::message_buffer_ + pos_,
+                           Log::kMessageBufferSize - pos_);
+  const bool has_previous =
+      Log::record_compressor_->RetrievePreviousCompressed(&prev_record);
+  if (!has_previous) return false;
+  pos_ += prev_record.length();
+  return true;
+}
+
+
  void LogMessageBuilder::WriteToLogFile() {
    ASSERT(pos_ <= Log::kMessageBufferSize);
    const int written = Log::Write(Log::message_buffer_, pos_);
@@ -295,6 +331,105 @@
    if (written != len && write_failure_handler != NULL) {
      write_failure_handler();
    }
+}
+
+
+// Formatting string for back references. E.g. "#2:10" means
+// "the second line above, start from char 10 (0-based)".
+const char* LogRecordCompressor::kBackwardReferenceFormat = "#%d:%d";
+
+
+LogRecordCompressor::~LogRecordCompressor() {
+  for (int i = 0; i < buffer_.length(); ++i) {
+    buffer_[i].Dispose();
+  }
+}
+
+
+bool LogRecordCompressor::Store(const Vector<const char>& record) {
+  // Check if the record is the same as the last stored one.
+  if (curr_ != -1) {
+    Vector<const char>& curr = buffer_[curr_];
+    if (record.length() == curr.length()
+        && strncmp(record.start(), curr.start(), record.length()) == 0) {
+      return false;
+    }
+  }
+  // buffer_ is circular.
+  prev_ = curr_++;
+  curr_ %= buffer_.length();
+  Vector<char> record_copy = Vector<char>::New(record.length());
+  memcpy(record_copy.start(), record.start(), record.length());
+  buffer_[curr_].Dispose();
+  buffer_[curr_] =
+      Vector<const char>(record_copy.start(), record_copy.length());
+  return true;
+}
+
+
+bool LogRecordCompressor::RetrievePreviousCompressed(
+    Vector<char>* prev_record) {
+  if (prev_ == -1) return false;
+
+  int index = prev_;
+  // Distance from prev_.
+  int distance = 0;
+  // Best compression result among records in the buffer.
+  struct {
+    intptr_t truncated_len;
+    int distance;
+    int copy_from_pos;
+  } best = {-1, 0, 0};
+  Vector<const char>& prev = buffer_[prev_];
+  const char* const prev_start = prev.start() + start_pos_;
+  const char* const prev_end = prev.start() + prev.length();
+  do {
+    // We're moving backwards until we reach the current record.
+    // Remember that buffer_ is circular.
+    if (--index == -1) index = buffer_.length() - 1;
+    ++distance;
+    if (index == curr_) break;
+
+    Vector<const char>& data = buffer_[index];
+    if (data.start() == NULL) break;
+    const char* const data_end = data.start() + data.length();
+    const char* prev_ptr = prev_end;
+    const char* data_ptr = data_end;
+    // Compare strings backwards, stop on the last matching character.
+    while (prev_ptr != prev_start && data_ptr != data.start()
+          && *(prev_ptr - 1) == *(data_ptr - 1)) {
+      --prev_ptr;
+      --data_ptr;
+    }
+    const intptr_t truncated_len = prev_end - prev_ptr;
+    if (truncated_len < kUncompressibleBound) continue;
+
+    // Record compression results.
+    if (truncated_len > best.truncated_len) {
+      best.truncated_len = truncated_len;
+      best.distance = distance;
+      best.copy_from_pos = data_ptr - data.start();
+    }
+  } while (true);
+
+  if (best.distance == 0) {
+    // Can't compress the previous record. Return as is.
+    ASSERT(prev_record->length() >= prev.length());
+    memcpy(prev_record->start(), prev.start(), prev.length());
+    prev_record->Truncate(prev.length());
+  } else {
+    // Copy the uncompressible part unchanged.
+    const intptr_t unchanged_len = prev.length() - best.truncated_len;
+    ASSERT(prev_record->length() >= unchanged_len + kUncompressibleBound);
+    memcpy(prev_record->start(), prev.start(), unchanged_len);
+    // Append the backward reference.
+    Vector<char> patch(prev_record->start() + unchanged_len,
+                       kUncompressibleBound);
+    OS::SNPrintF(patch, kBackwardReferenceFormat,
+                 best.distance, best.copy_from_pos);
+    prev_record->Truncate(unchanged_len + strlen(patch.start()));
+  }
+  return true;
  }

  #endif  // ENABLE_LOGGING_AND_PROFILING

Modified: branches/bleeding_edge/src/log-utils.h
==============================================================================
--- branches/bleeding_edge/src/log-utils.h      (original)
+++ branches/bleeding_edge/src/log-utils.h      Thu Jun 11 07:08:34 2009
@@ -88,6 +88,48 @@
  };


+// An utility class for performing backward reference compression
+// of string ends. It operates using a window of previous strings.
+class LogRecordCompressor {
+ public:
+  // 'window_size' is the size of backward lookup window.
+  // 'start_pos' is the length of string prefix that must be left  
uncompressed.
+  LogRecordCompressor(int window_size, int start_pos)
+      : buffer_(window_size), start_pos_(start_pos), curr_(-1), prev_(-1) {
+    // Must have place for the current record and the previous.
+    ASSERT(window_size >= 2);
+  }
+  ~LogRecordCompressor();
+
+  // Fills vector with a compressed version of the previous record.
+  // Returns false if there is no previous record.
+  bool RetrievePreviousCompressed(Vector<char>* prev_record);
+
+  // Stores a record if it differs from a previous one (or there's no  
previous).
+  // Returns true, if the record has been stored.
+  bool Store(const Vector<const char>& record);
+
+ private:
+  // Record field separator. It is assumed that it can't appear
+  // inside a field (a simplified version of CSV format).
+  static const char kFieldSeparator = ',';
+
+  // Formatting string for back references.
+  static const char* kBackwardReferenceFormat;
+
+  // A boundary value. If current record only differs in
+  // kUncompressibleBound chars, don't compress it.
+  // The value must be greater than the length of maximum
+  // projected backward reference length.
+  static const intptr_t kUncompressibleBound = 10;
+
+  ScopedVector< Vector<const char> > buffer_;
+  const int start_pos_;
+  int curr_;
+  int prev_;
+};
+
+
  // Functions and data for performing output of log messages.
  class Log : public AllStatic {
   public:
@@ -114,6 +156,9 @@
      return !is_stopped_ && (output_handle_ != NULL || output_buffer_ !=  
NULL);
    }

+  // Size of buffer used for record compression.
+  static const int kRecordCompressorWindow = 6;
+
   private:
    typedef int (*WritePtr)(const char* msg, int length);

@@ -169,6 +214,11 @@
    // mutex_ should be acquired before using it.
    static char* message_buffer_;

+  // An utility object for compressing repeated parts of records.
+  static LogRecordCompressor* record_compressor_;
+
+  static const char* kCompressedTickEventName;
+
    friend class LogMessageBuilder;
  };

@@ -195,6 +245,19 @@
    void Append(String* str);

    void AppendDetailed(String* str, bool show_impl_info);
+
+  // Stores log message into compressor, returns true if the message
+  // was stored (i.e. doesn't repeat the previous one). If log compression  
is
+  // disabled, does nothing and returns true.
+  bool StoreInCompressor();
+
+  // Sets log message to a previous version of compressed message.
+  // Returns false, if there is no previous message. If log compression
+  // is disabled, does nothing and retuns true.
+  bool RetrieveCompressedPrevious() { return  
RetrieveCompressedPrevious(""); }
+
+  // Does the same at the version without arguments, and sets a prefix.
+  bool RetrieveCompressedPrevious(const char* prefix);

    // Write the log message to the log file currently opened.
    void WriteToLogFile();

Modified: branches/bleeding_edge/src/log.cc
==============================================================================
--- branches/bleeding_edge/src/log.cc   (original)
+++ branches/bleeding_edge/src/log.cc   Thu Jun 11 07:08:34 2009
@@ -303,6 +303,7 @@
  VMState Logger::bottom_state_(EXTERNAL);
  SlidingStateWindow* Logger::sliding_state_window_ = NULL;
  const char** Logger::log_events_ = NULL;
+int Logger::tick_repeat_count_ = 0;

  #define DECLARE_LONG_EVENT(ignore1, long_name, ignore2) long_name,
  const char* kLongLogEventsNames[Logger::NUMBER_OF_LOG_EVENTS] = {
@@ -326,6 +327,9 @@
    if (!Log::IsEnabled()) return;
    LogMessageBuilder msg;
    msg.Append("profiler,\"begin\",%d\n", kSamplingIntervalMs);
+  if (FLAG_compress_log) {
+    msg.Append("profiler,\"compression\",%d\n",  
Log::kRecordCompressorWindow);
+  }
    msg.WriteToLogFile();
  }

@@ -848,6 +852,28 @@
        msg.Append(",0x%" V8PRIxPTR, sample->stack[i]);
      }
    }
+  // In case if log compression is disabled, the flow is straightforward,
+  // because both StoreInCompressor and RetrieveCompressedPrevious do  
nothing
+  // and just return 'true'. Otherwise, to perform compression of repeated
+  // tick events, instead of the current event, the previous one is  
written.
+  if (!msg.StoreInCompressor()) {
+    // Current message repeats the previous one, don't write it.
+    ++tick_repeat_count_;
+    return;
+  }
+
+  bool has_previous_message;
+  if (tick_repeat_count_ > 0) {
+    EmbeddedVector<char, 20> prefix;
+    OS::SNPrintF(prefix, "%s,%d,",
+                 log_events_[REPEAT_META_EVENT],
+                 tick_repeat_count_ + 1);
+    tick_repeat_count_ = 0;
+    has_previous_message = msg.RetrieveCompressedPrevious(prefix.start());
+  } else {
+    has_previous_message = msg.RetrieveCompressedPrevious();
+  }
+  if (!has_previous_message) return;
    msg.Append('\n');
    msg.WriteToLogFile();
  }

Modified: branches/bleeding_edge/src/log.h
==============================================================================
--- branches/bleeding_edge/src/log.h    (original)
+++ branches/bleeding_edge/src/log.h    Thu Jun 11 07:08:34 2009
@@ -107,6 +107,7 @@
    V(CODE_MOVE_EVENT,                "code-move",              "cm")       \
    V(CODE_DELETE_EVENT,              "code-delete",            "cd")       \
    V(TICK_EVENT,                     "tick",                   "t")        \
+  V(REPEAT_META_EVENT,              "repeat",                 "r")        \
    V(BUILTIN_TAG,                    "Builtin",                "bi")       \
    V(CALL_DEBUG_BREAK_TAG,           "CallDebugBreak",         "cdb")      \
    V(CALL_DEBUG_PREPARE_STEP_IN_TAG, "CallDebugPrepareStepIn", "cdbsi")    \
@@ -298,6 +299,9 @@

    // An array of log events names.
    static const char** log_events_;
+
+  // Counter for repeated tick events.
+  static int tick_repeat_count_;

    // Internal implementation classes with access to
    // private members.

Modified: branches/bleeding_edge/src/utils.h
==============================================================================
--- branches/bleeding_edge/src/utils.h  (original)
+++ branches/bleeding_edge/src/utils.h  Thu Jun 11 07:08:34 2009
@@ -362,6 +362,11 @@
      Sort(PointerValueCompare<T>);
    }

+  void Truncate(int length) {
+    ASSERT(length <= length_);
+    length_ = length;
+  }
+
    // Releases the array underlying this vector. Once disposed the
    // vector is empty.
    void Dispose() {

Modified: branches/bleeding_edge/test/cctest/test-log-utils.cc
==============================================================================
--- branches/bleeding_edge/test/cctest/test-log-utils.cc        (original)
+++ branches/bleeding_edge/test/cctest/test-log-utils.cc        Thu Jun 11  
07:08:34 2009
@@ -9,8 +9,12 @@
  #include "log-utils.h"
  #include "cctest.h"

+using v8::internal::CStrVector;
  using v8::internal::EmbeddedVector;
  using v8::internal::LogDynamicBuffer;
+using v8::internal::LogRecordCompressor;
+using v8::internal::MutableCStrVector;
+using v8::internal::ScopedVector;
  using v8::internal::Vector;

  // Fills 'ref_buffer' with test data: a sequence of two-digit
@@ -47,9 +51,13 @@
                                       const Vector<V>& value) {
    if (expected.length() != value.length()) {
      V8_Fatal(file, line, "CHECK_EQ(%s, %s) failed\n"
-             "#   Vectors lengths differ: %d expected, %d found",
+             "#   Vectors lengths differ: %d expected, %d found\n"
+             "#   Expected: %.*s\n"
+             "#   Found: %.*s",
               expected_source, value_source,
-             expected.length(), value.length());
+             expected.length(), value.length(),
+             expected.length(), expected.start(),
+             value.length(), value.start());
    }
    if (strncmp(expected.start(), value.start(), expected.length()) != 0) {
      V8_Fatal(file, line, "CHECK_EQ(%s, %s) failed\n"
@@ -124,9 +132,161 @@
    // Check the seal.
    EmbeddedVector<char, 50> seal_buf;
    CHECK_EQ(seal_size, ReadData(&dynabuf, 100, &seal_buf));
-  CHECK_EQ(v8::internal::CStrVector(seal), seal_buf.SubVector(0,  
seal_size));
+  CHECK_EQ(CStrVector(seal), seal_buf.SubVector(0, seal_size));
    // Verify that there's no data beyond the seal.
    CHECK_EQ(0, ReadData(&dynabuf, 100 + seal_size, &buf));
+}
+
+
+TEST(CompressorStore) {
+  LogRecordCompressor comp(2, 0);
+  const Vector<const char> empty = CStrVector("");
+  CHECK(comp.Store(empty));
+  CHECK(!comp.Store(empty));
+  CHECK(!comp.Store(empty));
+  const Vector<const char> aaa = CStrVector("aaa");
+  CHECK(comp.Store(aaa));
+  CHECK(!comp.Store(aaa));
+  CHECK(!comp.Store(aaa));
+  CHECK(comp.Store(empty));
+  CHECK(!comp.Store(empty));
+  CHECK(!comp.Store(empty));
+}
+
+
+void CheckCompression(LogRecordCompressor* comp,
+                      const Vector<const char>& after) {
+  EmbeddedVector<char, 100> result;
+  CHECK(comp->RetrievePreviousCompressed(&result));
+  CHECK_EQ(after, result);
+}
+
+
+void CheckCompression(LogRecordCompressor* comp,
+                      const char* after) {
+  CheckCompression(comp, CStrVector(after));
+}
+
+
+TEST(CompressorNonCompressed) {
+  LogRecordCompressor comp(2, 0);
+  CHECK(!comp.RetrievePreviousCompressed(NULL));
+  const Vector<const char> empty = CStrVector("");
+  CHECK(comp.Store(empty));
+  CHECK(!comp.RetrievePreviousCompressed(NULL));
+  const Vector<const char> a_x_20 = CStrVector("aaaaaaaaaaaaaaaaaaaa");
+  CHECK(comp.Store(a_x_20));
+  CheckCompression(&comp, empty);
+  CheckCompression(&comp, empty);
+  CHECK(comp.Store(empty));
+  CheckCompression(&comp, a_x_20);
+  CheckCompression(&comp, a_x_20);
+}
+
+
+TEST(CompressorSingleLine) {
+  LogRecordCompressor comp(3, strlen("xxx,"));
+  const Vector<const char> string_1 = CStrVector("eee,ddd,ccc,bbb,aaa");
+  CHECK(comp.Store(string_1));
+  const Vector<const char> string_2 = CStrVector("fff,ddd,ccc,bbb,aaa");
+  CHECK(comp.Store(string_2));
+  // string_1 hasn't been compressed.
+  CheckCompression(&comp, string_1);
+  CheckCompression(&comp, string_1);
+  const Vector<const char> string_3 = CStrVector("hhh,ggg,ccc,bbb,aaa");
+  CHECK(comp.Store(string_3));
+  // string_2 compressed using string_1.
+  CheckCompression(&comp, "fff,#1:4");
+  CheckCompression(&comp, "fff,#1:4");
+  CHECK(!comp.Store(string_3));
+  // Expecting no changes.
+  CheckCompression(&comp, "fff,#1:4");
+  CHECK(!comp.Store(string_3));
+  // Expecting no changes.
+  CheckCompression(&comp, "fff,#1:4");
+  const Vector<const char> string_4 =  
CStrVector("iii,hhh,ggg,ccc,bbb,aaa");
+  CHECK(comp.Store(string_4));
+  // string_3 compressed using string_2.
+  CheckCompression(&comp, "hhh,ggg#1:7");
+  const Vector<const char> string_5 = CStrVector("nnn,mmm,lll,kkk,jjj");
+  CHECK(comp.Store(string_5));
+  // string_4 compressed using string_3.
+  CheckCompression(&comp, "iii,#1:0");
+  const Vector<const char> string_6 = CStrVector("nnn,mmmmmm,lll,kkk,jjj");
+  CHECK(comp.Store(string_6));
+  // string_5 hasn't been compressed.
+  CheckCompression(&comp, string_5);
+  CHECK(comp.Store(string_5));
+  // string_6 compressed using string_5.
+  CheckCompression(&comp, "nnn,mmm#1:4");
+  const Vector<const char> string_7 = CStrVector("nnnnnn,mmm,lll,kkk,jjj");
+  CHECK(comp.Store(string_7));
+  // string_5 compressed using string_6.
+  CheckCompression(&comp, "nnn,#1:7");
+  const Vector<const char> string_8 = CStrVector("xxn,mmm,lll,kkk,jjj");
+  CHECK(comp.Store(string_8));
+  // string_7 compressed using string_5.
+  CheckCompression(&comp, "nnnn#1:1");
+  const Vector<const char> string_9 =
+      CStrVector("aaaaaaaaaaaaa,bbbbbbbbbbbbbbbbb");
+  CHECK(comp.Store(string_9));
+  // string_8 compressed using string_7.
+  CheckCompression(&comp, "xxn,#1:7");
+  const Vector<const char> string_10 =
+      CStrVector("aaaaaaaaaaaaa,cccccccbbbbbbbbbb");
+  CHECK(comp.Store(string_10));
+  // string_9 hasn't been compressed.
+  CheckCompression(&comp, string_9);
+  CHECK(comp.Store(string_1));
+  // string_10 compressed using string_9.
+  CheckCompression(&comp, "aaaaaaaaaaaaa,ccccccc#1:21");
+}
+
+
+
+TEST(CompressorMultiLines) {
+  const int kWindowSize = 5;
+  LogRecordCompressor comp(kWindowSize, strlen("xxx,"));
+  const Vector<const char> string_1 = CStrVector("eee,ddd,ccc,bbb,aaa");
+  CHECK(comp.Store(string_1));
+  const Vector<const char> string_2 = CStrVector("iii,hhh,ggg,fff,aaa");
+  CHECK(comp.Store(string_2));
+  const Vector<const char> string_3 = CStrVector("mmm,lll,kkk,jjj,aaa");
+  CHECK(comp.Store(string_3));
+  const Vector<const char> string_4 = CStrVector("nnn,hhh,ggg,fff,aaa");
+  CHECK(comp.Store(string_4));
+  const Vector<const char> string_5 = CStrVector("ooo,lll,kkk,jjj,aaa");
+  CHECK(comp.Store(string_5));
+  // string_4 compressed using string_2.
+  CheckCompression(&comp, "nnn,#2:4");
+  CHECK(comp.Store(string_1));
+  // string_5 compressed using string_3.
+  CheckCompression(&comp, "ooo,#2:4");
+  CHECK(comp.Store(string_4));
+  // string_1 is out of buffer by now, so it shouldn't be compressed.
+  CHECK_GE(5, kWindowSize);
+  CheckCompression(&comp, string_1);
+  CHECK(comp.Store(string_2));
+  // string_4 compressed using itself.
+  CheckCompression(&comp, "nnn,#3:4");
+}
+
+
+TEST(CompressorBestSelection) {
+  LogRecordCompressor comp(5, strlen("xxx,"));
+  const Vector<const char> string_1 = CStrVector("eee,ddd,ccc,bbb,aaa");
+  CHECK(comp.Store(string_1));
+  const Vector<const char> string_2 = CStrVector("ddd,ccc,bbb,aaa");
+  CHECK(comp.Store(string_2));
+  const Vector<const char> string_3 =  
CStrVector("fff,eee,ddd,ccc,bbb,aaa");
+  CHECK(comp.Store(string_3));
+  // string_2 compressed using string_1.
+  CheckCompression(&comp, "ddd,#1:8");
+  const Vector<const char> string_4 = CStrVector("nnn,hhh,ggg,fff,aaa");
+  CHECK(comp.Store(string_4));
+  // Compressing string_3 using string_1 gives a better compression than
+  // using string_2.
+  CheckCompression(&comp, "fff,#2:0");
  }

  #endif  // ENABLE_LOGGING_AND_PROFILING

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
v8-dev@googlegroups.com
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to