This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 64d2af6  ARROW-2144: [JS] Don't repeat dictionary lookups in DataFrame 
ops
64d2af6 is described below

commit 64d2af6053271a8eb317cd8d0e78a3ba204b65d8
Author: Brian Hulette <brian.hule...@ccri.com>
AuthorDate: Mon Feb 19 12:44:37 2018 -0500

    ARROW-2144: [JS] Don't repeat dictionary lookups in DataFrame ops
    
    The optimized `Equals` predicate now caches its reverse dictionary lookup 
for subsequent `bind` calls. Also adds `DictionaryVector.reverseLookup(value)` 
and `Vector.indexOf(value)`.
    
    Author: Brian Hulette <brian.hule...@ccri.com>
    
    Closes #1599 from TheNeuralBit/cache-dictionary and squashes the following 
commits:
    
    06be136 [Brian Hulette] throw not implemented error in NestedView.indexOf
    e5eaae2 [Brian Hulette] Add basic typedoc script
    6bb0876 [Brian Hulette] caching should check for -1
    ec990e3 [Brian Hulette] indexOf now returns -1 rather than null...
    e2ae901 [Brian Hulette] Rename find -> indexOf
    e6f34a1 [Brian Hulette] Add basic Utf8 tests
    74de6bf [Brian Hulette] Add unit tests for find in numeric vectors
    15947fc [Brian Hulette] Add Vector.find, use in reverseLookup
    249d070 [Brian Hulette] cache dictionary reverse lookup in member var
---
 js/package.json              |  5 +--
 js/src/predicate.ts          | 27 ++++++-------
 js/src/vector.ts             |  5 +++
 js/src/vector/chunked.ts     | 10 +++++
 js/src/vector/dictionary.ts  |  8 ++++
 js/src/vector/flat.ts        | 31 +++++++++++++++
 js/src/vector/list.ts        |  9 +++++
 js/src/vector/nested.ts      |  3 ++
 js/test/unit/vector-tests.ts | 93 +++++++++++++++++++++++++++++++++++++++++---
 9 files changed, 169 insertions(+), 22 deletions(-)

diff --git a/js/package.json b/js/package.json
index d36e872..1c22fc1 100644
--- a/js/package.json
+++ b/js/package.json
@@ -19,7 +19,7 @@
     "clean:testdata": "gulp clean:testdata",
     "create:testdata": "gulp create:testdata",
     "test:coverage": "gulp test -t ts --coverage",
-    "doc": "shx rm -rf ./doc && esdoc",
+    "doc": "shx rm -rf ./doc && typedoc --mode file --out doc src/Arrow.ts",
     "lint": "run-p lint:*",
     "lint:src": "tslint --fix --project -p tsconfig.json -c tslint.json 
\"src/**/*.ts\"",
     "lint:test": "tslint --fix --project -p test/tsconfig.json -c tslint.json 
\"test/**/*.ts\"",
@@ -70,8 +70,6 @@
     "benchmark": "2.1.4",
     "coveralls": "3.0.0",
     "del": "3.0.0",
-    "esdoc": "1.0.4",
-    "esdoc-standard-plugin": "1.0.0",
     "glob": "7.1.2",
     "google-closure-compiler": "20180101.0.0",
     "gulp": "github:gulpjs/gulp#6d71a658c61edb3090221579d8f97dbe086ba2ed",
@@ -97,6 +95,7 @@
     "trash": "4.2.1",
     "ts-jest": "22.0.1",
     "tslint": "5.9.1",
+    "typedoc": "0.10.0",
     "typescript": "2.7.1",
     "uglifyjs-webpack-plugin": "1.1.6",
     "webpack": "3.10.0",
diff --git a/js/src/predicate.ts b/js/src/predicate.ts
index 9d55274..981ffb1 100644
--- a/js/src/predicate.ts
+++ b/js/src/predicate.ts
@@ -125,6 +125,10 @@ export class Or extends CombinationPredicate {
 }
 
 export class Equals extends ComparisonPredicate {
+    // Helpers used to cache dictionary reverse lookups between calls to bind
+    private lastDictionary: Vector|undefined;
+    private lastKey: number|undefined;
+
     protected _bindLitLit(_batch: RecordBatch, left: Literal, right: Literal): 
PredicateFunc {
         const rtrn: boolean = left.v == right.v;
         return () => rtrn;
@@ -139,20 +143,17 @@ export class Equals extends ComparisonPredicate {
     protected _bindColLit(batch: RecordBatch, col: Col, lit: Literal): 
PredicateFunc {
         const col_func = col.bind(batch);
         if (col.vector instanceof DictionaryVector) {
-            // Assume that there is only one key with the value `lit.v`
-            // TODO: add lazily-computed reverse dictionary lookups, associated
-            // with col.vector.data so that we only have to do this once per
-            // dictionary
-            let key = -1;
-            let dict = col.vector;
-            let data = dict.dictionary!;
-            for (let len = data.length; ++key < len;) {
-                if (data.get(key) === lit.v) {
-                    break;
-                }
+            let key: any;
+            const vector = col.vector as DictionaryVector;
+            if (vector.dictionary !== this.lastDictionary) {
+                key = vector.reverseLookup(lit.v);
+                this.lastDictionary = vector.dictionary;
+                this.lastKey = key;
+            } else {
+                key = this.lastKey;
             }
 
-            if (key == data.length) {
+            if (key === -1) {
                 // the value doesn't exist in the dictionary - always return
                 // false
                 // TODO: special-case of PredicateFunc that encapsulates this
@@ -161,7 +162,7 @@ export class Equals extends ComparisonPredicate {
                 return () => false;
             } else {
                 return (idx: number) => {
-                    return dict.getKey(idx) === key;
+                    return vector.getKey(idx) === key;
                 };
             }
         } else {
diff --git a/js/src/vector.ts b/js/src/vector.ts
index d9ca97b..fa1d16e 100644
--- a/js/src/vector.ts
+++ b/js/src/vector.ts
@@ -28,6 +28,7 @@ export interface View<T extends DataType> {
     get(index: number): T['TValue'] | null;
     set(index: number, value: T['TValue']): void;
     toArray(): IterableArrayLike<T['TValue'] | null>;
+    indexOf(search: T['TValue']): number;
     [Symbol.iterator](): IterableIterator<T['TValue'] | null>;
 }
 
@@ -77,6 +78,9 @@ export class Vector<T extends DataType = any> implements 
VectorLike, View<T>, Vi
     public toArray(): IterableArrayLike<T['TValue'] | null> {
         return this.view.toArray();
     }
+    public indexOf(value: T['TValue']) {
+        return this.view.indexOf(value);
+    }
     public [Symbol.iterator](): IterableIterator<T['TValue'] | null> {
         return this.view[Symbol.iterator]();
     }
@@ -414,6 +418,7 @@ export class DictionaryVector<T extends DataType = 
DataType> extends Vector<Dict
     }
     public getKey(index: number) { return this.indicies.get(index); }
     public getValue(key: number) { return this.dictionary.get(key); }
+    public reverseLookup(value: T) { return this.dictionary.indexOf(value); }
 }
 
 export const createVector = ((VectorLoader: new <T extends DataType>(data: 
Data<T>) => TypeVisitor) => (
diff --git a/js/src/vector/chunked.ts b/js/src/vector/chunked.ts
index 2eaf99c..7876bba 100644
--- a/js/src/vector/chunked.ts
+++ b/js/src/vector/chunked.ts
@@ -103,6 +103,16 @@ export class ChunkedView<T extends DataType> implements 
View<T> {
         }
         return target;
     }
+    public indexOf(search: T['TValue']) {
+        let offset = 0, result;
+        for (const vector of this.chunkVectors) {
+            result = vector.indexOf(search);
+            if (result !== -1) { return result + offset; }
+            offset += vector.length;
+        }
+
+        return -1;
+    }
 }
 
 function typedArraySet(source: TypedArray, target: TypedArray, index: number) {
diff --git a/js/src/vector/dictionary.ts b/js/src/vector/dictionary.ts
index 3857298..f4de810 100644
--- a/js/src/vector/dictionary.ts
+++ b/js/src/vector/dictionary.ts
@@ -47,4 +47,12 @@ export class DictionaryView<T extends DataType> implements 
View<T> {
             yield values.get(indicies.get(index));
         }
     }
+    public indexOf(search: T['TValue']) {
+        // First find the dictionary key for the desired value...
+        const key = this.dictionary.indexOf(search);
+        if (key === -1) { return key; }
+
+        // ... then find the first occurence of that key in indicies
+        return this.indicies.indexOf(key!);
+    }
 }
diff --git a/js/src/vector/flat.ts b/js/src/vector/flat.ts
index a32bd9d..acc2f1a 100644
--- a/js/src/vector/flat.ts
+++ b/js/src/vector/flat.ts
@@ -43,6 +43,15 @@ export class FlatView<T extends FlatType> implements View<T> 
{
     public toArray(): IterableArrayLike<T['TValue']> {
         return this.values.subarray(0, this.length);
     }
+    public indexOf(search: T['TValue']) {
+        let index = 0;
+        for (let value of this) {
+            if (value === search) { return index; }
+            ++index;
+        }
+
+        return -1;
+    }
     public [Symbol.iterator](): IterableIterator<T['TValue']> {
         return this.values.subarray(0, this.length)[Symbol.iterator]() as 
IterableIterator<T['TValue']>;
     }
@@ -64,6 +73,10 @@ export class NullView implements View<Null> {
     public toArray(): IterableArrayLike<null> {
         return [...this];
     }
+    public indexOf(search: any) {
+        // if you're looking for nulls and the view isn't empty, we've got 'em!
+        return search === null && this.length > 0 ? 0 : -1;
+    }
     public *[Symbol.iterator](): IterableIterator<null> {
         for (let index = -1, length = this.length; ++index < length;) {
             yield null;
@@ -107,6 +120,15 @@ export class ValidityView<T extends DataType> implements 
View<T> {
     public toArray(): IterableArrayLike<T['TValue'] | null> {
         return [...this];
     }
+    public indexOf(search: T['TValue']) {
+        let index = 0;
+        for (let value of this) {
+            if (value === search) { return index; }
+            ++index;
+        }
+
+        return -1;
+    }
     public isValid(index: number): boolean {
         const nullBitIndex = this.offset + index;
         return getBool(null, index, this.nullBitmap[nullBitIndex >> 3], 
nullBitIndex % 8);
@@ -169,6 +191,15 @@ export class FixedSizeView<T extends PrimitiveType> 
extends PrimitiveView<T> {
     public toArray(): IterableArrayLike<T['TValue']> {
         return this.values;
     }
+    public indexOf(search: T['TValue']) {
+        let index = 0;
+        for (let value of this) {
+            if (value.every((d: number, i: number) => d === search[i])) { 
return index; }
+            ++index;
+        }
+
+        return -1;
+    }
     protected getValue(values: T['TArray'], index: number, size: number): 
T['TValue'] {
         return values.subarray(index * size, index * size + size);
     }
diff --git a/js/src/vector/list.ts b/js/src/vector/list.ts
index 3d365ce..8561c66 100644
--- a/js/src/vector/list.ts
+++ b/js/src/vector/list.ts
@@ -59,6 +59,15 @@ export abstract class ListViewBase<T extends (ListType | 
FlatListType | FixedSiz
             yield get(values, index, valueOffsets);
         }
     }
+    public indexOf(search: T['TValue']) {
+        let index = 0;
+        for (let value of this) {
+            if (value === search) { return index; }
+            ++index;
+        }
+
+        return -1;
+    }
     protected abstract getList(values: T['TArray'], index: number, 
valueOffsets?: Int32Array): T['TValue'];
     protected abstract setList(values: T['TArray'], index: number, value: 
T['TValue'], valueOffsets?: Int32Array): void;
 }
diff --git a/js/src/vector/nested.ts b/js/src/vector/nested.ts
index d0fb24c..a45a912 100644
--- a/js/src/vector/nested.ts
+++ b/js/src/vector/nested.ts
@@ -40,6 +40,9 @@ export abstract class NestedView<T extends NestedType> 
implements View<T> {
     public toArray(): IterableArrayLike<T['TValue']> {
         return [...this];
     }
+    public indexOf(_: T['TValue']): number {
+        throw new Error(`Not implemented yet`);
+    }
     public toJSON(): any { return this.toArray(); }
     public toString() {
         return [...this].map((x) => stringify(x)).join(', ');
diff --git a/js/test/unit/vector-tests.ts b/js/test/unit/vector-tests.ts
index 81676b0..e2be229 100644
--- a/js/test/unit/vector-tests.ts
+++ b/js/test/unit/vector-tests.ts
@@ -15,13 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
+import { TextEncoder } from 'text-encoding-utf-8';
 import Arrow from '../Arrow';
 import { type, TypedArray, TypedArrayConstructor } from '../../src/Arrow';
 
-const { BoolData, FlatData } = Arrow.data;
-const { IntVector, FloatVector, BoolVector } = Arrow.vector;
+const utf8Encoder = new TextEncoder('utf-8');
+
+const { BoolData, FlatData, FlatListData } = Arrow.data;
+const { IntVector, FloatVector, BoolVector, Utf8Vector } = Arrow.vector;
 const {
-    Bool,
+    Utf8, Bool,
     Float16, Float32, Float64,
     Int8, Int16, Int32, Int64,
     Uint8, Uint16, Uint32, Uint64,
@@ -45,11 +48,14 @@ const FixedWidthVectors = {
 
 const fixedSizeVectors = toMap(FixedSizeVectors, 
Object.keys(FixedSizeVectors));
 const fixedWidthVectors = toMap(FixedWidthVectors, 
Object.keys(FixedWidthVectors));
+const randomBytes = (n: number) => Uint8Array.from(
+    { length: n },
+    () => Math.random() * 255 | 0
+);
 const bytes = Array.from(
     { length: 5 },
-    () => Uint8Array.from(
-        { length: 64 },
-        () => Math.random() * 255 | 0));
+    () => randomBytes(64)
+);
 
 describe(`BoolVector`, () => {
     const values = [true, true, false, true, true, false, false, false], n = 
values.length;
@@ -67,6 +73,16 @@ describe(`BoolVector`, () => {
             expect(v).toEqual(values[i]);
         }
     });
+    test(`indexOf returns expected values`, () => {
+        for (let test_value of [true, false]) {
+            const expected = values.indexOf(test_value);
+            expect(vector.indexOf(test_value)).toEqual(expected);
+        }
+    });
+    test(`indexOf returns -1 when value not found`, () => {
+        const v = new BoolVector(new BoolData(new Bool(), 3, null, new 
Uint8Array([0xFF])));
+        expect(v.indexOf(false)).toEqual(-1);
+    });
     test(`can set values to true and false`, () => {
         const v = new BoolVector(new BoolData(new Bool(), n, null, new 
Uint8Array([27, 0, 0, 0, 0, 0, 0, 0])));
         const expected1 = [true, true, false, true, true, false, false, false];
@@ -145,6 +161,13 @@ describe('Float16Vector', () => {
             expect(v).toEqual(clamp(values[i]));
         }
     });
+    test(`indexOf returns expected values`, () => {
+        const randomValues = new Uint16Array(randomBytes(64).buffer);
+        for (let value of [...values, ...randomValues]) {
+            const expected = values.indexOf(value);
+            expect(vector.indexOf(clamp(value))).toEqual(expected);
+        }
+    });
     test(`slices the entire array`, () => {
         expect(vector.slice().toArray()).toEqual(float16s);
     });
@@ -187,6 +210,21 @@ for (const [VectorName, [VectorType, DataType]] of 
fixedSizeVectors) {
                 expect(v).toEqual(values.slice(2 * i, 2 * (i + 1)));
             }
         });
+        test(`indexOf returns expected values`, () => {
+            // Create a set of test data composed of all of the actual values
+            // and a few random values
+            let testValues = concatTyped(
+                type.ArrayType,
+                ...bytes,
+                ...[randomBytes(8 * 2 * type.ArrayType.BYTES_PER_ELEMENT)]
+            );
+
+            for (let i = -1, n = testValues.length / 2 | 0; ++i < n;) {
+                const value = testValues.slice(2 * i, 2 * (i + 1));
+                const expected = values.findIndex((d, i) => i % 2 === 0 && d 
=== value[0] && testValues[i + 1] === value[1]);
+                expect(vector.indexOf(value)).toEqual(expected >= 0 ? expected 
/ 2 : -1);
+            }
+        });
         test(`slices the entire array`, () => {
             expect(vector.slice().toArray()).toEqual(values);
         });
@@ -232,6 +270,20 @@ for (const [VectorName, [VectorType, DataType]] of 
fixedWidthVectors) {
                 expect(v).toEqual(values[i]);
             }
         });
+        test(`indexOf returns expected values`, () => {
+            // Create a set of test data composed of all of the actual values
+            // and a few random values
+            let testValues = concatTyped(
+                type.ArrayType,
+                ...bytes,
+                ...[randomBytes(8 * type.ArrayType.BYTES_PER_ELEMENT)]
+            );
+
+            for (const value of testValues) {
+                const expected = values.indexOf(value);
+                expect(vector.indexOf(value)).toEqual(expected);
+            }
+        });
         test(`slices the entire array`, () => {
             expect(vector.slice().toArray()).toEqual(values);
         });
@@ -253,6 +305,35 @@ for (const [VectorName, [VectorType, DataType]] of 
fixedWidthVectors) {
     });
 }
 
+describe(`Utf8Vector`, () => {
+    const values = ['foo', 'bar', 'baz', 'foo bar', 'bar'], n = values.length;
+    let offset = 0;
+    const offsets = Uint32Array.of(0, ...values.map((d) => { offset += 
d.length; return offset; }));
+    const vector = new Utf8Vector(new FlatListData(new Utf8(), n, null, 
offsets, utf8Encoder.encode(values.join(''))));
+    test(`gets expected values`, () => {
+        let i = -1;
+        while (++i < n) {
+            expect(vector.get(i)).toEqual(values[i]);
+        }
+    });
+    test(`iterates expected values`, () => {
+        expect.hasAssertions();
+        let i = -1;
+        for (let v of vector) {
+            expect(++i).toBeLessThan(n);
+            expect(v).toEqual(values[i]);
+        }
+    });
+    test(`indexOf returns expected values`, () => {
+        let testValues = values.concat(['abc', '12345']);
+
+        for (const value of testValues) {
+            const expected = values.indexOf(value);
+            expect(vector.indexOf(value)).toEqual(expected);
+        }
+    });
+});
+
 function toMap<T>(entries: Record<string, T>, keys: string[]) {
     return keys.reduce((map, key) => {
         map.set(key, entries[key] as T);

-- 
To stop receiving notification emails like this one, please contact
bhule...@apache.org.

Reply via email to