On Wed, Jan 25, 2012 at 04:14:07PM +0000, Dave Mitchell wrote:
> then the CPU usage of the while loop broke down as follows:
> 
>     7.0% overhead of loop,              i.e. while() {$c++}
>    19.2% handle the outer method call,  i.e. $sth->fetch()
>          of which half is method lookup,
>          half is assemble args ($sth) and pp_entersub
>    12.7% XS_DBI_dispatch() doing DBI stuff
>    20.5% XS_DBI_dispatch() doing method lookup and doing direct
>          XS call to DBD::mysql::fetchrow_arrayref()
>    40.6% DBD::mysql::fetchrow_arrayref() fetching a row
> 
> Given that my hacky cache for fetch() lookup reduced overall execution
> time by 16.5%, that reduces overall XS_DBI_dispatch percentage from 33.2%
> to 16.7%, i.e. nearly halves the dispatch time.

I've now written some working caching code, that reduces CPU usage on
while ($sth->fetch()) {$c++} from 15.23s to 13.10s, which is a 14%
saving!! The test code just fetches millions of rows from a 2-int table
and then doesn't do anything real with the returned data, so a real-world
example will see a smaller saving, but I'm still very pleased with it :-)

It's not ready for use yet; it passes the DBI test suite, but:
 * I haven't tested it under ithreads or early perls yet;
 * I haven't tried it against the DBD::mysql test suite yet;
 * I haven't written any tests yet for the method cache invalidation,
        e.g. if code does *DBD::mysql::st::foo = sub {...},
   will subsequent calls to $sth->foo() call the new function?

As regards that last point, can you recommend where I should place the new
tests? Is there an existing test script that they can be added to, or if a
new script needs adding, what should it be called, and is there a good
existing script to use as a template?

The code itself (see diff below) just attaches some magic to the outer CV
that caches the GV of the corresponding inner method. It also remembers
the inner stash and generation number, so that the cache can be
invalidated if the outer method is called on another class. So for
example, in the following pathological example:

    while ($sth_mysql->fetch) {
        $sth_oracle->fetch;
        ...
    }

the cache in the magic attached to &DBI::st::fetch will oscillate
between pointing to *DBD::mysql::st:fetch and the similar oracle function,
so no speedup will be seen. But normally, when there is a constant mapping
between the outer and inner subs, the cache will speed things up.

The caching code *does* assume that the name of the outer sub never
changes, i.e. that GvNAME(CvGV(cv)) in XS_DBI_dispatch is constant WRT cv;
is this a reasonable assumption? Otherwise, I would have to cache the sub
name too, which will slow things down.

The code makes use of non-API knowledge about PL_generation and
HvMROMETA(stash)->cache_gen, which could conceivably change in a later
perl release.

> PS - I'm specifically being paid only to fix a performance issue on
> non-threaded builds, so I won't be looking at threaded issues. But
> dPERINTERP looks like bad news on threaded builds.

I've since been told that a) I'm allowed to say who's funding me:
it's booking.com (thanks, guys!); and that b) depending on how much time I
burn on the cache issue, I may have time to look at threaded issues too,
specifically the dPERINTERP rewrite using MY_CXT - assuming that no one
beats me to it!

Dave.


Index: DBI.xs
===================================================================
--- DBI.xs      (revision 15099)
+++ DBI.xs      (working copy)
@@ -82,6 +82,8 @@
 static I32      dbi_hash _((const char *string, long i));
 static void     dbih_dumphandle _((pTHX_ SV *h, const char *msg, int level));
 static int      dbih_dumpcom _((pTHX_ imp_xxh_t *imp_xxh, const char *msg, int 
level));
+static int      method_cache_free(pTHX_ SV* sv, MAGIC* mg);
+static GV*      inner_method_lookup(pTHX_ HV *stash, CV *cv, const char 
*meth_name);
 char *neatsvpv _((SV *sv, STRLEN maxlen));
 SV * preparse(SV *dbh, const char *statement, IV ps_return, IV ps_accept, void 
*foo);
 
@@ -161,6 +163,74 @@
 /* 32 bit magic FNV-0 and FNV-1 prime */
 #define FNV_32_PRIME ((UV)0x01000193)
 
+
+
+/* ext magic attached to outer CV methods to quickly locate the
+ * corresponding inner method
+ */
+
+static MGVTBL method_cache_vtbl = { 0, 0, 0, 0, method_cache_free, 0, 0, 0 };
+
+typedef struct {
+    HV *stash;          /* the stash we found the GV in */
+    GV *gv;             /* the GV containing the inner sub */
+    U32 generation;     /* cache invalidation */
+} method_cache_t;
+
+static int method_cache_free(pTHX_ SV* sv, MAGIC* mg)
+{
+    method_cache_t *c = (method_cache_t *)(mg->mg_ptr);
+    SvREFCNT_dec(c->stash);
+    SvREFCNT_dec(c->gv);
+    return 0;
+}
+
+static GV* inner_method_lookup(pTHX_ HV *stash, CV *cv, const char *meth_name)
+{
+    GV *gv;
+    method_cache_t *c;
+    MAGIC *mg = mg_findext((SV*)cv, DBI_MAGIC, &method_cache_vtbl);
+
+    if (mg) {
+        if (  (c=(method_cache_t *)(mg->mg_ptr))
+            && c->stash == stash
+            && c->generation == PL_generation
+#ifdef HvMROMETA /*introduced in 5.9.5 */
+                + HvMROMETA(stash)->cache_gen
+#endif
+        )
+            return c->gv;
+        /* clear stale cache */
+        SvREFCNT_dec(c->stash);
+        SvREFCNT_dec(c->gv);
+        c->stash = NULL;
+        c->gv    = NULL;
+    }
+
+    gv = gv_fetchmethod_autoload(stash, meth_name, FALSE);
+    if (!gv)
+        return NULL;
+
+    /* create new cache entry */
+    if (!mg) {
+        Newx(c, 1, method_cache_t);
+        mg = sv_magicext((SV*)cv, NULL, DBI_MAGIC, &method_cache_vtbl,
+                            (char *)c, 0);
+    }
+    SvREFCNT_inc(stash);
+    SvREFCNT_inc(gv);
+    c->stash = stash;
+    c->gv    = gv;
+    c->generation = PL_generation
+#ifdef HvMROMETA
+            + HvMROMETA(stash)->cache_gen
+    ;
+#endif
+    return gv;
+}
+
+
+
 /* --- make DBI safe for multiple perl interpreters --- */
 /*     Contributed by Murray Nesbitt of ActiveState     */
 typedef struct {
@@ -3000,6 +3070,7 @@
     int call_depth;
     int is_nested_call;
     NV profile_t1 = 0.0;
+    int is_orig_method_name = 1;
 
     const char  *meth_name = GvNAME(CvGV(cv));
     const dbi_ima_t     *ima = (dbi_ima_t*)CvXSUBANY(cv).any_ptr;
@@ -3175,6 +3246,7 @@
                     croak("%s->%s() invalid redirect method name %s",
                             neatsvpv(h,0), meth_name, 
neatsvpv(meth_name_sv,0));
                 meth_name = SvPV_nolen(meth_name_sv);
+                is_orig_method_name = 0;
             }
             if (ima_flags & IMA_KEEP_ERR)
                 keep_error = TRUE;
@@ -3414,7 +3486,12 @@
             }
         }
 
-        imp_msv = (SV*)gv_fetchmethod_autoload(DBIc_IMP_STASH(imp_xxh), 
meth_name, FALSE);
+        if (is_orig_method_name)
+            imp_msv = (SV*)inner_method_lookup(aTHX_ DBIc_IMP_STASH(imp_xxh),
+                                            cv, meth_name);
+        else
+            imp_msv = (SV*)gv_fetchmethod_autoload(DBIc_IMP_STASH(imp_xxh),
+                                            meth_name, FALSE);
 
         /* if method was a 'func' then try falling back to real 'func' method 
*/
         if (!imp_msv && (ima_flags & IMA_FUNC_REDIRECT)) {
-- 
In economics, the exam questions are the same every year.
They just change the answers.

Reply via email to