This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 76ee290123ebe2d7d6aa45c288d626b3b5ad06bb
Author: Alexey Serbin <[email protected]>
AuthorDate: Mon Dec 15 00:48:13 2025 -0800

    [codegen] encode projection key once
    
    There is no need to encode row projection key multiple times: one time
    when scheduling a compilation task, another time when running it, and
    one more time when inserting a new entry into the codegen cache.
    
    The latter two steps were done by the same thread in the codegen
    compilation pool, and there is just a single thread in the pool.
    Apparently, it's better to encode the key only once by the thread which
    schedules the compilation task upon requesting the corresponding row
    projection.
    
    With this update, there isn't a need for JITWrapper::EncodeOwnKey()
    method anymore.
    
    Change-Id: Ief2a1cf184c86c33031f90d92a10755da3e86e0b
    Reviewed-on: http://gerrit.cloudera.org:8080/23819
    Tested-by: Alexey Serbin <[email protected]>
    Reviewed-by: Zoltan Martonka <[email protected]>
    Reviewed-by: Ashwani Raina <[email protected]>
    Reviewed-by: Abhishek Chennaka <[email protected]>
---
 src/kudu/codegen/code_cache.cc          | 23 +++++++++----------
 src/kudu/codegen/code_cache.h           | 17 +++++++-------
 src/kudu/codegen/compilation_manager.cc | 40 +++++++++++++++++++--------------
 src/kudu/codegen/jit_wrapper.h          |  9 --------
 src/kudu/codegen/row_projector.h        |  7 +-----
 5 files changed, 42 insertions(+), 54 deletions(-)

diff --git a/src/kudu/codegen/code_cache.cc b/src/kudu/codegen/code_cache.cc
index babe40941..21681919b 100644
--- a/src/kudu/codegen/code_cache.cc
+++ b/src/kudu/codegen/code_cache.cc
@@ -26,11 +26,12 @@
 #include "kudu/codegen/jit_wrapper.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/util/cache.h"
-#include "kudu/util/faststring.h"
 #include "kudu/util/slice.h"
 #include "kudu/util/status.h"
 
 namespace kudu {
+class faststring;
+
 namespace codegen {
 
 namespace {
@@ -66,19 +67,15 @@ CodeCache::CodeCache(size_t capacity)
 
 CodeCache::~CodeCache() {}
 
-Status CodeCache::AddEntry(const scoped_refptr<JITWrapper>& value) {
-  // Get the key
-  faststring key;
-  RETURN_NOT_OK(value->EncodeOwnKey(&key));
-
-  JITWrapper* val = value.get();
-  size_t val_len = sizeof(val);
-
-  // We CHECK_NOTNULL because this is always a DRAM-based cache, and if 
allocation
+Status CodeCache::AddEntry(const faststring& key,
+                           const scoped_refptr<JITWrapper>& value) {
+  const JITWrapper* val = value.get();
+  constexpr size_t kValLen = sizeof(val); // NOLINT(bugprone-sizeof-expression)
+  // Using CHECK because this is always a DRAM-based cache, and if allocation
   // failed, we'd just crash the process.
-  auto pending(cache_->Allocate(Slice(key), val_len, /*charge = */1));
+  auto pending(cache_->Allocate(key, kValLen, /*charge = */1));
   CHECK(pending);
-  memcpy(cache_->MutableValue(&pending), &val, val_len);
+  memcpy(cache_->MutableValue(&pending), &val, kValLen);
 
   // Because Cache only accepts void* values, we store just the JITWrapper*
   // and increase its ref count.
@@ -90,7 +87,7 @@ Status CodeCache::AddEntry(const scoped_refptr<JITWrapper>& 
value) {
   return Status::OK();
 }
 
-scoped_refptr<JITWrapper> CodeCache::Lookup(const Slice& key) {
+scoped_refptr<JITWrapper> CodeCache::Lookup(const faststring& key) {
   // Look up in Cache after generating key, returning NULL if not found.
   auto found(cache_->Lookup(key, Cache::EXPECT_IN_CACHE));
   if (!found) {
diff --git a/src/kudu/codegen/code_cache.h b/src/kudu/codegen/code_cache.h
index c137ef06f..7955ce865 100644
--- a/src/kudu/codegen/code_cache.h
+++ b/src/kudu/codegen/code_cache.h
@@ -25,8 +25,8 @@
 namespace kudu {
 
 class Cache;
-class Slice;
 class Status;
+class faststring;
 
 namespace codegen {
 
@@ -44,7 +44,7 @@ class JITWrapper;
 // LRU eviction does not guarantee that a JITWrapper is deleted, only that
 // the cache releases its shared ownership (by decrementing the reference
 // count) of the jit code.
-class CodeCache {
+class CodeCache final {
  public:
   // TODO: currently CodeCache is implemented using the Cache in
   // kudu/util/cache.h, which requires some transformation to nongeneric
@@ -65,18 +65,17 @@ class CodeCache {
   explicit CodeCache(size_t capacity);
   ~CodeCache();
 
-  // This function is NOT thread safe (only one writer may call this at
-  // a time). Attempts to add a new entry 'wrapper' to the cache, using
-  // wrapper->EncodeOwnKey() as the key. Overwrites the previous value
-  // if one exists. If insertion results in excess capacity, LRU eviction
-  // occurs. Returns Status::OK() upon success.
-  Status AddEntry(const scoped_refptr<JITWrapper>& wrapper);
+  // This function is NOT thread safe (only one writer may call this at a 
time).
+  // Attempts to add a new entry 'value' to the cache, using the specified key.
+  // Overwrites the previous value if one exists. If insertion results in
+  // excess capacity, LRU eviction occurs. Returns Status::OK() upon success.
+  Status AddEntry(const faststring& key, const scoped_refptr<JITWrapper>& 
value);
 
   // This function may be called from any thread concurrently with other
   // writes and reads to the cache. Looks in the cache for the specified key.
   // Returns a reference to the associated payload, or NULL if no such entry
   // exists in the cache.
-  scoped_refptr<JITWrapper> Lookup(const Slice& key);
+  scoped_refptr<JITWrapper> Lookup(const faststring& key);
 
  private:
   class EvictionCallback;
diff --git a/src/kudu/codegen/compilation_manager.cc 
b/src/kudu/codegen/compilation_manager.cc
index de16d4a82..615273644 100644
--- a/src/kudu/codegen/compilation_manager.cc
+++ b/src/kudu/codegen/compilation_manager.cc
@@ -23,6 +23,7 @@
 #include <ostream>
 #include <string>
 #include <type_traits>
+#include <utility>
 
 #include <gflags/gflags.h>
 #include <glog/logging.h>
@@ -40,7 +41,6 @@
 #include "kudu/util/logging.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
-#include "kudu/util/slice.h"
 #include "kudu/util/status.h"
 #include "kudu/util/stopwatch.h"
 #include "kudu/util/threadpool.h"
@@ -85,16 +85,21 @@ namespace {
 // A CompilationTask is a task which, given a pair of schemas and a cache to
 // refer to, will generate code pertaining to the two schemas and store it in
 // the cache when run.
-class CompilationTask {
+class CompilationTask final {
  public:
   // Requires that the cache and generator are valid for the lifetime
   // of this object.
-  CompilationTask(const Schema& base, const Schema& proj, CodeCache* cache,
+  CompilationTask(Schema base,
+                  Schema proj,
+                  faststring key,
+                  CodeCache* cache,
                   CodeGenerator* generator)
-    : base_(base),
-      proj_(proj),
-      cache_(cache),
-      generator_(generator) {}
+      : base_(std::move(base)),
+        proj_(std::move(proj)),
+        key_(std::move(key)),
+        cache_(cache),
+        generator_(generator) {
+  }
 
   // Can only be run once.
   void Run() {
@@ -109,24 +114,24 @@ class CompilationTask {
 
  private:
   Status RunWithStatus() {
-    faststring key;
-    RETURN_NOT_OK(RowProjectorFunctions::EncodeKey(base_, proj_, &key));
-
     // Check again to make sure we didn't compile it already.
     // This can occur if we request the same schema pair while the
     // first one's compiling.
-    if (cache_->Lookup(key)) return Status::OK();
+    if (cache_->Lookup(key_)) {
+      return Status::OK();
+    }
 
     scoped_refptr<RowProjectorFunctions> functions;
     LOG_TIMING_IF(INFO, FLAGS_codegen_time_compilation, "code-generating row 
projector") {
       RETURN_NOT_OK(generator_->CompileRowProjector(base_, proj_, &functions));
     }
 
-    return cache_->AddEntry(functions);
+    return cache_->AddEntry(key_, functions);
   }
 
-  Schema base_;
-  Schema proj_;
+  const Schema base_;
+  const Schema proj_;
+  const faststring key_;
   CodeCache* const cache_;
   CodeGenerator* const generator_;
 
@@ -189,19 +194,20 @@ bool CompilationManager::RequestRowProjector(const 
Schema* base_schema,
                                              unique_ptr<RowProjector>* out) {
   faststring key;
   const auto s = RowProjectorFunctions::EncodeKey(*base_schema, *projection, 
&key);
-  WARN_NOT_OK(s, "RowProjector compilation request encode key failed");
   if (PREDICT_FALSE(!s.ok())) {
+    LOG(WARNING) << "RowProjector compilation request encode key failed: "
+                 << s.ToString();
     return false;
   }
   ++query_counter_;
 
   scoped_refptr<RowProjectorFunctions> cached(
-    down_cast<RowProjectorFunctions*>(cache_.Lookup(key).get()));
+      down_cast<RowProjectorFunctions*>(cache_.Lookup(key).get()));
 
   // If not cached, add a request to compilation pool
   if (!cached) {
     shared_ptr<CompilationTask> task(make_shared<CompilationTask>(
-        *base_schema, *projection, &cache_, &generator_));
+        *base_schema, *projection, std::move(key), &cache_, &generator_));
     WARN_NOT_OK_EVERY_N_SECS(pool_->Submit([task]() { task->Run(); }),
                     "RowProjector compilation request submit failed", 10);
     return false;
diff --git a/src/kudu/codegen/jit_wrapper.h b/src/kudu/codegen/jit_wrapper.h
index bc79a6a14..daec108a6 100644
--- a/src/kudu/codegen/jit_wrapper.h
+++ b/src/kudu/codegen/jit_wrapper.h
@@ -52,15 +52,6 @@ class JITWrapper : public RefCountedThreadSafe<JITWrapper> {
     ROW_PROJECTOR
   };
 
-  // Returns the key encoding (for the code cache) for this upon success.
-  // If two JITWrapper instances of the same type have the same key, then
-  // their codegenned code should be functionally equivalent.
-  // Appends key to 'out' upon success.
-  // The key must be unique amongst all derived types of JITWrapper.
-  // To do this, the type's enum value from JITWrapper::JITWrapperType
-  // should be prefixed to out.
-  virtual Status EncodeOwnKey(faststring* out) = 0;
-
  protected:
   explicit JITWrapper(std::unique_ptr<JITCodeOwner> owner);
   virtual ~JITWrapper();
diff --git a/src/kudu/codegen/row_projector.h b/src/kudu/codegen/row_projector.h
index 942d06f36..5a604f7e4 100644
--- a/src/kudu/codegen/row_projector.h
+++ b/src/kudu/codegen/row_projector.h
@@ -66,12 +66,7 @@ class RowProjectorFunctions : public JITWrapper {
   ProjectionFunction read() const { return read_f_; }
   ProjectionFunction write() const { return write_f_; }
 
-  Status EncodeOwnKey(faststring* out) override {
-    return EncodeKey(base_schema_, projection_, out);
-  }
-
-  static Status EncodeKey(const Schema& base, const Schema& proj,
-                          faststring* out);
+  static Status EncodeKey(const Schema& base, const Schema& proj, faststring* 
out);
 
  private:
   RowProjectorFunctions(const Schema& base_schema, const Schema& projection,

Reply via email to