This is an automated email from the ASF dual-hosted git repository.
jonkeane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 9039770 ARROW-13266: [JS] Improve benchmark names & include suite
name in json
9039770 is described below
commit 903977061194786699d1824c4e6cb977184351d1
Author: Diana Clarke <[email protected]>
AuthorDate: Wed Jul 7 10:02:41 2021 -0500
ARROW-13266: [JS] Improve benchmark names & include suite name in json
Before:
```
Prepare Data: 635.29ms
Running "Parse "tracks"" suite...
Table.from 6,513 ops/s ±23%, 0.12 ms, 75 samples
readBatches 6,689 ops/s ±9.0%, 0.12 ms, 70 samples
serialize 325 ops/s ±11%, 2.7 ms, 69 samples
Running "Get "tracks" values by index" suite...
name: 'lat', length: 1,000,000, type: Float32 27.4 ops/s ±8.1%, 33 ms, 49
samples
name: 'lng', length: 1,000,000, type: Float32 28.5 ops/s ±3.8%, 33 ms, 50
samples
name: 'origin', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.181 ops/s
±27%, 5,070 ms, 5 samples
name: 'destination', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.175
ops/s ±28%, 5,619 ms, 5 samples
Running "Iterate "tracks" vectors" suite...
name: 'lat', length: 1,000,000, type: Float32 46 ops/s ±9.8%, 19 ms, 63
samples
name: 'lng', length: 1,000,000, type: Float32 52 ops/s ±1.6%, 19 ms, 67
samples
name: 'origin', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.255 ops/s
±2.7%, 3,903 ms, 5 samples
name: 'destination', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.225
ops/s ±14%, 4,328 ms, 5 samples
Running "Slice toArray "tracks" vectors" suite...
name: 'lat', length: 1,000,000, type: Float32 1,069 ops/s ±7.6%, 0.84 ms,
75 samples
name: 'lng', length: 1,000,000, type: Float32 892 ops/s ±7.3%, 1.1 ms, 61
samples
name: 'origin', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.151 ops/s
±21%, 6,730 ms, 5 samples
name: 'destination', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.189
ops/s ±24%, 5,173 ms, 5 samples
Running "Slice "tracks" vectors" suite...
name: 'lat', length: 1,000,000, type: Float32 1,815,763 ops/s ±4.9%, 0.001
ms, 86 samples
name: 'lng', length: 1,000,000, type: Float32 1,355,757 ops/s ±8.3%, 0.001
ms, 63 samples
name: 'origin', length: 1,000,000, type: Dictionary<Int8, Utf8> 1,678,921
ops/s ±15%, 0 ms, 71 samples
name: 'destination', length: 1,000,000, type: Dictionary<Int8, Utf8>
2,214,331 ops/s ±3.0%, 0 ms, 85 samples
Running "DataFrame Iterate "tracks"" suite...
length: 1,000,000 19.6 ops/s ±8.8%, 46 ms, 36 samples
Running "DataFrame Direct Count "tracks"" suite...
name: 'lat', length: 1,000,000, type: Float32, test: gt, value: 0 113 ops/s
±5.3%, 8.2 ms, 72 samples
name: 'lng', length: 1,000,000, type: Float32, test: gt, value: 0 89 ops/s
±13%, 9 ms, 59 samples
name: 'origin', length: 1,000,000, type: Dictionary<Int8, Utf8>, test: eq,
value: Seattle 0.304 ops/s ±9.1%, 3,305 ms, 5 samples
```
After:
```
Prepare Data: 4.638ms
Running "Parse" suite...
dataset: tracks, function: Table.from 22,176 ops/s ±16%, 0.037 ms, 84
samples
dataset: tracks, function: readBatches 23,428 ops/s ±5.0%, 0.038 ms, 80
samples
dataset: tracks, function: serialize 14,132 ops/s ±5.8%, 0.063 ms, 74
samples
Running "Get values by index" suite...
dataset: tracks, column: lat, length: 200, type: Float32 450,042 ops/s
±5.9%, 0.002 ms, 91 samples
dataset: tracks, column: lng, length: 200, type: Float32 411,900 ops/s
±6.1%, 0.002 ms, 78 samples
dataset: tracks, column: origin, length: 200, type: Dictionary<Int8, Utf8>
991 ops/s ±11%, 0.96 ms, 60 samples
dataset: tracks, column: destination, length: 200, type: Dictionary<Int8,
Utf8> 1,171 ops/s ±7.2%, 0.72 ms, 70 samples
Running "Iterate vectors" suite...
dataset: tracks, column: lat, length: 200, type: Float32 225,008 ops/s
±3.6%, 0.004 ms, 89 samples
dataset: tracks, column: lng, length: 200, type: Float32 240,921 ops/s
±0.51%, 0.004 ms, 97 samples
dataset: tracks, column: origin, length: 200, type: Dictionary<Int8, Utf8>
1,161 ops/s ±5.5%, 0.78 ms, 65 samples
dataset: tracks, column: destination, length: 200, type: Dictionary<Int8,
Utf8> 1,088 ops/s ±5.6%, 0.96 ms, 42 samples
Running "Slice toArray vectors" suite...
dataset: tracks, column: lat, length: 200, type: Float32 651,617 ops/s
±3.8%, 0.002 ms, 83 samples
dataset: tracks, column: lng, length: 200, type: Float32 673,205 ops/s
±3.0%, 0.001 ms, 63 samples
dataset: tracks, column: origin, length: 200, type: Dictionary<Int8, Utf8>
1,145 ops/s ±5.7%, 0.77 ms, 69 samples
dataset: tracks, column: destination, length: 200, type: Dictionary<Int8,
Utf8> 1,142 ops/s ±5.6%, 0.77 ms, 65 samples
Running "Slice vectors" suite...
dataset: tracks, column: lat, length: 200, type: Float32 5,699,749 ops/s
±1.8%, 0 ms, 93 samples
dataset: tracks, column: lng, length: 200, type: Float32 5,768,406 ops/s
±0.75%, 0 ms, 97 samples
dataset: tracks, column: origin, length: 200, type: Dictionary<Int8, Utf8>
6,112,340 ops/s ±0.49%, 0 ms, 96 samples
dataset: tracks, column: destination, length: 200, type: Dictionary<Int8,
Utf8> 6,033,494 ops/s ±1.3%, 0 ms, 94 samples
Running "DataFrame Iterate" suite...
dataset: tracks, length: 200 106,752 ops/s ±3.1%, 0.009 ms, 86 samples
Running "DataFrame Direct Count" suite...
dataset: tracks, column: lat, length: 200, type: Float32, test: gt, value:
0 1,277,478 ops/s ±2.3%, 0.001 ms, 83 samples
dataset: tracks, column: lng, length: 200, type: Float32, test: gt, value:
0 1,163,103 ops/s ±3.9%, 0.001 ms, 74 samples
dataset: tracks, column: origin, length: 200, type: Dictionary<Int8, Utf8>,
test: eq, value: Seattle 1,209 ops/s ±7.1%, 0.76 ms, 63 samples
```
Closes #10665 from dianaclarke/ARROW-13266
Authored-by: Diana Clarke <[email protected]>
Signed-off-by: Jonathan Keane <[email protected]>
---
js/perf/index.ts | 43 ++++++++++++++++++++++---------------------
1 file changed, 22 insertions(+), 21 deletions(-)
diff --git a/js/perf/index.ts b/js/perf/index.ts
index 506f16b..fc2cb8b 100644
--- a/js/perf/index.ts
+++ b/js/perf/index.ts
@@ -46,6 +46,7 @@ const results: CaseResult[] = [];
function cycle(result: CaseResult, _summary: Summary) {
const duration = result.details.median * 1000;
if (json) {
+ result.suite = _summary.name
results.push(result);
}
console.log(
@@ -55,17 +56,17 @@ function cycle(result: CaseResult, _summary: Summary) {
for (const { name, ipc, df } of config) {
b.suite(
- `Parse "${name}"`,
+ `Parse`,
- b.add(`Table.from`, () => {
+ b.add(`dataset: ${name}, function: Table.from`, () => {
Table.from(ipc);
}),
- b.add(`readBatches`, () => {
+ b.add(`dataset: ${name}, function: readBatches`, () => {
for (const _recordBatch of RecordBatchReader.from(ipc)) {}
}),
- b.add(`serialize`, () => {
+ b.add(`dataset: ${name}, function: serialize`, () => {
df.serialize();
}),
@@ -75,30 +76,30 @@ for (const { name, ipc, df } of config) {
const schema = df.schema;
const suites = [{
- name: `Get "${name}" values by index`,
+ suite_name: `Get values by index`,
fn(vector: Arrow.Column<any>) {
for (let i = -1, n = vector.length; ++i < n;) {
vector.get(i);
}
}
}, {
- name: `Iterate "${name}" vectors`,
+ suite_name: `Iterate vectors`,
fn(vector: Arrow.Column<any>) { for (const _value of vector) {} }
}, {
- name: `Slice toArray "${name}" vectors`,
+ suite_name: `Slice toArray vectors`,
fn(vector: Arrow.Column<any>) { vector.slice().toArray(); }
}, {
- name: `Slice "${name}" vectors`,
+ suite_name: `Slice vectors`,
fn(vector: Arrow.Column<any>) { vector.slice(); }
}];
- for (const {name, fn} of suites) {
+ for (const {suite_name, fn} of suites) {
b.suite(
- name,
+ suite_name,
...schema.fields.map((f, i) => {
const vector = df.getColumnAt(i)!;
- return b.add(`name: '${f.name}', length:
${formatNumber(vector.length)}, type: ${vector.type}`, () => {
+ return b.add(`dataset: ${name}, column: ${f.name}, length:
${formatNumber(vector.length)}, type: ${vector.type}`, () => {
fn(vector);
});
}),
@@ -111,9 +112,9 @@ for (const { name, ipc, df } of config) {
for (const { name, df, countBys, counts } of config) {
b.suite(
- `DataFrame Iterate "${name}"`,
+ `DataFrame Iterate`,
- b.add(`length: ${formatNumber(df.length)}`, () => {
+ b.add(`dataset: ${name}, length: ${formatNumber(df.length)}`, () => {
for (const _value of df) {}
}),
@@ -121,10 +122,10 @@ for (const { name, df, countBys, counts } of config) {
);
b.suite(
- `DataFrame Count By "${name}"`,
+ `DataFrame Count By`,
...countBys.map((column: string) => b.add(
- `name: '${column}', length: ${formatNumber(df.length)}, type:
${df.schema.fields.find((c)=> c.name === column)!.type}`,
+ `dataset: ${name}, column: ${column}, length:
${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name ===
column)!.type}`,
() => df.countBy(column)
)),
@@ -132,10 +133,10 @@ for (const { name, df, countBys, counts } of config) {
);
b.suite(
- `DataFrame Filter-Scan Count "${name}"`,
+ `DataFrame Filter-Scan Count`,
...counts.map(({ column, test, value }: {column: string; test: 'gt' |
'eq'; value: number | string}) => b.add(
- `name: '${column}', length: ${formatNumber(df.length)}, type:
${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value:
${value}`,
+ `dataset: ${name}, column: ${column}, length:
${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name ===
column)!.type}, test: ${test}, value: ${value}`,
() => {
let filteredDf: Arrow.FilteredDataFrame;
if (test == 'gt') {
@@ -154,10 +155,10 @@ for (const { name, df, countBys, counts } of config) {
);
b.suite(
- `DataFrame Filter-Iterate "${name}"`,
+ `DataFrame Filter-Iterate`,
...counts.map(({ column, test, value }: {column: string; test: 'gt' |
'eq'; value: number | string}) => b.add(
- `name: '${column}', length: ${formatNumber(df.length)}, type:
${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value:
${value}`,
+ `dataset: ${name}, column: ${column}, length:
${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name ===
column)!.type}, test: ${test}, value: ${value}`,
() => {
let filteredDf: Arrow.FilteredDataFrame;
if (test == 'gt') {
@@ -178,10 +179,10 @@ for (const { name, df, countBys, counts } of config) {
);
b.suite(
- `DataFrame Direct Count "${name}"`,
+ `DataFrame Direct Count`,
...counts.map(({ column, test, value }: {column: string; test: 'gt' |
'eq'; value: number | string}) => b.add(
- `name: '${column}', length: ${formatNumber(df.length)}, type:
${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value:
${value}`,
+ `dataset: ${name}, column: ${column}, length:
${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name ===
column)!.type}, test: ${test}, value: ${value}`,
() => {
const colidx = df.schema.fields.findIndex((c)=> c.name ===
column);