This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 42aa782fb334ee907b99962ac81c53488e35fa41
Author: Taylor Baldwin <tbald...@uber.com>
AuthorDate: Thu Oct 3 10:34:12 2019 +0200

    ARROW-6767: [JS] Lazily bind batches in scan/scanReverse
    
    I noticed some `TODO` comments in the JS client library that expressed 
interest in calling `bind(batch)` lazily. This PR implements that optimization 
and updates related tests.
    
    Happy to make updates per feedback!
    
    Created a JIRA issue 
[here](https://issues.apache.org/jira/browse/ARROW-6767).
    
    Closes #5565 from rolyatmax/tb/lazily-bind-batches and squashes the 
following commits:
    
    8877a8f8f <Taylor Baldwin> ARROW-6767:  lazily bind batches in 
scan/scanReverse
    
    Authored-by: Taylor Baldwin <tbald...@uber.com>
    Signed-off-by: Krisztián Szűcs <szucs.kriszt...@gmail.com>
---
 js/src/compute/dataframe.ts | 30 ++++++++++++++++++++----------
 js/test/unit/table-tests.ts | 28 ++++++++++++++--------------
 2 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/js/src/compute/dataframe.ts b/js/src/compute/dataframe.ts
index e82e65c..db7f3d7 100644
--- a/js/src/compute/dataframe.ts
+++ b/js/src/compute/dataframe.ts
@@ -132,14 +132,19 @@ export class FilteredDataFrame<T extends { [key: string]: 
DataType } = any> exte
         for (let batchIndex = -1; ++batchIndex < numBatches;) {
             // load batches
             const batch = batches[batchIndex];
-            // TODO: bind batches lazily
-            // If predicate doesn't match anything in the batch we don't need
-            // to bind the callback
-            if (bind) { bind(batch); }
             const predicate = this._predicate.bind(batch);
+            let isBound = false;
             // yield all indices
             for (let index = -1, numRows = batch.length; ++index < numRows;) {
-                if (predicate(index, batch)) { next(index, batch); }
+                if (predicate(index, batch)) {
+                    // bind batches lazily - if predicate doesn't match 
anything
+                    // in the batch we don't need to call bind on the batch
+                    if (bind && !isBound) {
+                        bind(batch);
+                        isBound = true;
+                    }
+                    next(index, batch);
+                }
             }
         }
     }
@@ -149,14 +154,19 @@ export class FilteredDataFrame<T extends { [key: string]: 
DataType } = any> exte
         for (let batchIndex = numBatches; --batchIndex >= 0;) {
             // load batches
             const batch = batches[batchIndex];
-            // TODO: bind batches lazily
-            // If predicate doesn't match anything in the batch we don't need
-            // to bind the callback
-            if (bind) { bind(batch); }
             const predicate = this._predicate.bind(batch);
+            let isBound = false;
             // yield all indices
             for (let index = batch.length; --index >= 0;) {
-                if (predicate(index, batch)) { next(index, batch); }
+                if (predicate(index, batch)) {
+                    // bind batches lazily - if predicate doesn't match 
anything
+                    // in the batch we don't need to call bind on the batch
+                    if (bind && !isBound) {
+                        bind(batch);
+                        isBound = true;
+                    }
+                    next(index, batch);
+                }
             }
         }
     }
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index ae2f058..da74b32 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -424,6 +424,10 @@ describe(`Table`, () => {
                             get_i32 = col('i32').bind(batch);
                         })),
                     expected: values.filter((row) => (row[F32] as number) * 
(row[I32] as number) > 0)
+                }, {
+                    name: `filter out all records`,
+                    filtered: table.filter(lit(1).eq(0)),
+                    expected: []
                 }
             ];
             for (let this_test of filter_tests) {
@@ -440,15 +444,13 @@ describe(`Table`, () => {
                                 expect(columns.map((c) => 
c.get(idx))).toEqual(expected[expected_idx++]);
                             });
                         });
-                        test(`calls bind function on every batch`, () => {
-                            // Techincally, we only need to call bind on
-                            // batches with data that match the predicate, so
-                            // this test may fail in the future if we change
-                            // that - and that's ok!
+                        test(`calls bind function lazily`, () => {
                             let bind = jest.fn();
                             filtered.scan(() => { }, bind);
-                            for (let batch of table.chunks) {
-                                expect(bind).toHaveBeenCalledWith(batch);
+                            if (expected.length) {
+                                expect(bind).toHaveBeenCalled();
+                            } else {
+                                expect(bind).not.toHaveBeenCalled();
                             }
                         });
                     });
@@ -460,15 +462,13 @@ describe(`Table`, () => {
                                 expect(columns.map((c) => 
c.get(idx))).toEqual(expected[--expected_idx]);
                             });
                         });
-                        test(`calls bind function on every batch`, () => {
-                            // Techincally, we only need to call bind on
-                            // batches with data that match the predicate, so
-                            // this test may fail in the future if we change
-                            // that - and that's ok!
+                        test(`calls bind function lazily`, () => {
                             let bind = jest.fn();
                             filtered.scanReverse(() => { }, bind);
-                            for (let batch of table.chunks) {
-                                expect(bind).toHaveBeenCalledWith(batch);
+                            if (expected.length) {
+                                expect(bind).toHaveBeenCalled();
+                            } else {
+                                expect(bind).not.toHaveBeenCalled();
                             }
                         });
                     });

Reply via email to