This is an automated email from the ASF dual-hosted git repository. raulcd pushed a commit to branch maint-15.0.x in repository https://gitbox.apache.org/repos/asf/arrow.git
commit a1ada92d17569d4fac6d6ddfc8f6d9b8018a9f14 Author: Antoine Pitrou <[email protected]> AuthorDate: Mon Jan 29 17:27:36 2024 +0100 GH-39740: [C++] Fix filter and take kernel for month_day_nano intervals (#39795) ### Rationale for this change The filter and take functions were not correctly supported on month_day_nano intervals. ### What changes are included in this PR? * Expand the primitive filter implementation to handle all possible fixed-width primitive types (including fixed-size binary) * Expand the take filter implementation to handle all well-known fixed-width primitive types (including month_day_nano, decimal128 and decimal256) * Add benchmarks for taking and filtering fixed-size binary These changes allow for very significant performance improvements filtering and taking fixed-size binary data: ``` ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Non-regressions: (90) ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- benchmark baseline contender change % counters FilterFixedSizeBinaryFilterNoNulls/524288/0/8 1.716 GiB/sec 33.814 GiB/sec 1870.862 {'family_index': 0, 'per_family_instance_index': 0, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2462, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 99.9} TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/8 380.056M items/sec 7.098G items/sec 1767.491 {'family_index': 3, 'per_family_instance_index': 6, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 505, 'byte_width': 8.0, 'null_percent': 100.0} FilterFixedSizeBinaryFilterNoNulls/524288/0/9 1.916 GiB/sec 33.721 GiB/sec 1659.766 {'family_index': 0, 'per_family_instance_index': 1, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2750, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 99.9} FilterFixedSizeBinaryFilterNoNulls/524288/9/8 917.713 MiB/sec 9.193 GiB/sec 925.719 {'family_index': 0, 'per_family_instance_index': 18, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/9/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1271, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 99.9} FilterFixedSizeBinaryFilterNoNulls/524288/12/8 1.004 GiB/sec 9.374 GiB/sec 833.673 {'family_index': 0, 'per_family_instance_index': 24, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/12/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1440, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 99.9} FilterFixedSizeBinaryFilterNoNulls/524288/3/8 1.625 GiB/sec 15.009 GiB/sec 823.442 {'family_index': 0, 'per_family_instance_index': 6, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/3/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2328, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 99.9} FilterFixedSizeBinaryFilterNoNulls/524288/9/9 1021.638 MiB/sec 9.126 GiB/sec 814.670 {'family_index': 0, 'per_family_instance_index': 19, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/9/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1428, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 99.9} FilterFixedSizeBinaryFilterNoNulls/524288/6/8 1.235 GiB/sec 10.814 GiB/sec 775.869 {'family_index': 0, 'per_family_instance_index': 12, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/6/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1762, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 99.9} FilterFixedSizeBinaryFilterNoNulls/524288/12/9 1.123 GiB/sec 9.120 GiB/sec 712.196 {'family_index': 0, 'per_family_instance_index': 25, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/12/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1598, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 99.9} FilterFixedSizeBinaryFilterNoNulls/524288/6/9 1.370 GiB/sec 10.499 GiB/sec 666.348 {'family_index': 0, 'per_family_instance_index': 13, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/6/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1958, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 99.9} FilterFixedSizeBinaryFilterNoNulls/524288/3/9 1.814 GiB/sec 13.394 GiB/sec 638.343 {'family_index': 0, 'per_family_instance_index': 7, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/3/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2600, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 99.9} FilterFixedSizeBinaryFilterNoNulls/524288/2/8 12.155 GiB/sec 77.799 GiB/sec 540.051 {'family_index': 0, 'per_family_instance_index': 4, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 17222, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 1.0} FilterFixedSizeBinaryFilterNoNulls/524288/2/9 13.507 GiB/sec 84.361 GiB/sec 524.592 {'family_index': 0, 'per_family_instance_index': 5, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19469, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 1.0} TakeFixedSizeBinaryMonotonicIndices/524288/1/8 194.493M items/sec 732.378M items/sec 276.557 {'family_index': 4, 'per_family_instance_index': 6, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 259, 'byte_width': 8.0, 'null_percent': 100.0} TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/8 200.981M items/sec 747.628M items/sec 271.989 {'family_index': 2, 'per_family_instance_index': 6, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 268, 'byte_width': 8.0, 'null_percent': 100.0} FilterFixedSizeBinaryFilterWithNulls/524288/0/8 947.631 MiB/sec 3.318 GiB/sec 258.565 {'family_index': 1, 'per_family_instance_index': 0, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1329, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 99.9} FilterFixedSizeBinaryFilterWithNulls/524288/3/8 911.406 MiB/sec 3.121 GiB/sec 250.677 {'family_index': 1, 'per_family_instance_index': 6, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/3/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1275, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 99.9} FilterFixedSizeBinaryFilterNoNulls/524288/1/8 1.045 GiB/sec 3.535 GiB/sec 238.406 {'family_index': 0, 'per_family_instance_index': 2, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1496, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/6/8 899.161 MiB/sec 2.915 GiB/sec 232.029 {'family_index': 1, 'per_family_instance_index': 12, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/6/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1260, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 99.9} FilterFixedSizeBinaryFilterWithNulls/524288/9/8 829.852 MiB/sec 2.617 GiB/sec 222.914 {'family_index': 1, 'per_family_instance_index': 18, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/9/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1157, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 99.9} TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/8 234.268M items/sec 752.809M items/sec 221.345 {'family_index': 2, 'per_family_instance_index': 8, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 312, 'byte_width': 8.0, 'null_percent': 0.0} FilterFixedSizeBinaryFilterNoNulls/524288/1/9 1.171 GiB/sec 3.711 GiB/sec 216.957 {'family_index': 0, 'per_family_instance_index': 3, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1674, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 50.0} TakeFixedSizeBinaryMonotonicIndices/524288/0/8 249.393M items/sec 787.274M items/sec 215.676 {'family_index': 4, 'per_family_instance_index': 8, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 333, 'byte_width': 8.0, 'null_percent': 0.0} TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/8 234.268M items/sec 736.727M items/sec 214.481 {'family_index': 3, 'per_family_instance_index': 8, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 313, 'byte_width': 8.0, 'null_percent': 0.0} TakeFixedSizeBinaryMonotonicIndices/524288/1000/8 134.852M items/sec 423.748M items/sec 214.231 {'family_index': 4, 'per_family_instance_index': 0, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1000/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 202, 'byte_width': 8.0, 'null_percent': 0.1} FilterFixedSizeBinaryFilterWithNulls/524288/12/8 913.734 MiB/sec 2.599 GiB/sec 191.245 {'family_index': 1, 'per_family_instance_index': 24, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/12/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1292, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 99.9} TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/8 138.218M items/sec 309.307M items/sec 123.783 {'family_index': 2, 'per_family_instance_index': 0, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 184, 'byte_width': 8.0, 'null_percent': 0.1} TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/8 132.755M items/sec 293.027M items/sec 120.727 {'family_index': 3, 'per_family_instance_index': 0, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 179, 'byte_width': 8.0, 'null_percent': 0.1} TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/8 125.492M items/sec 272.996M items/sec 117.540 {'family_index': 2, 'per_family_instance_index': 2, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 174, 'byte_width': 8.0, 'null_percent': 10.0} FilterFixedSizeBinaryFilterWithNulls/524288/9/9 926.938 MiB/sec 1.904 GiB/sec 110.379 {'family_index': 1, 'per_family_instance_index': 19, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/9/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1295, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 99.9} TakeFixedSizeBinaryMonotonicIndices/524288/10/8 158.754M items/sec 331.106M items/sec 108.565 {'family_index': 4, 'per_family_instance_index': 2, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 167, 'byte_width': 8.0, 'null_percent': 10.0} FilterFixedSizeBinaryFilterWithNulls/524288/0/9 1.031 GiB/sec 2.129 GiB/sec 106.621 {'family_index': 1, 'per_family_instance_index': 1, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1477, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 99.9} FilterFixedSizeBinaryFilterWithNulls/524288/3/9 1020.776 MiB/sec 2.056 GiB/sec 106.293 {'family_index': 1, 'per_family_instance_index': 7, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/3/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1430, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 99.9} FilterFixedSizeBinaryFilterWithNulls/524288/4/8 890.785 MiB/sec 1.768 GiB/sec 103.293 {'family_index': 1, 'per_family_instance_index': 8, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/4/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1242, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/6/9 1005.839 MiB/sec 1.984 GiB/sec 102.023 {'family_index': 1, 'per_family_instance_index': 13, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/6/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1407, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 99.9} FilterFixedSizeBinaryFilterWithNulls/524288/1/8 916.810 MiB/sec 1.762 GiB/sec 96.757 {'family_index': 1, 'per_family_instance_index': 2, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1270, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/7/8 890.211 MiB/sec 1.694 GiB/sec 94.853 {'family_index': 1, 'per_family_instance_index': 14, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/7/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1235, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 50.0} TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/8 95.788M items/sec 184.004M items/sec 92.095 {'family_index': 2, 'per_family_instance_index': 4, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 124, 'byte_width': 8.0, 'null_percent': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/10/8 862.497 MiB/sec 1.616 GiB/sec 91.823 {'family_index': 1, 'per_family_instance_index': 20, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1200, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/12/9 1.005 GiB/sec 1.904 GiB/sec 89.431 {'family_index': 1, 'per_family_instance_index': 25, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/12/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1442, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 99.9} TakeFixedSizeBinaryMonotonicIndices/524288/2/8 123.065M items/sec 228.755M items/sec 85.881 {'family_index': 4, 'per_family_instance_index': 4, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 164, 'byte_width': 8.0, 'null_percent': 50.0} FilterFixedSizeBinaryFilterNoNulls/524288/10/8 930.637 MiB/sec 1.669 GiB/sec 83.659 {'family_index': 0, 'per_family_instance_index': 20, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1293, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 50.0} FilterFixedSizeBinaryFilterNoNulls/524288/4/8 1.034 GiB/sec 1.871 GiB/sec 81.019 {'family_index': 0, 'per_family_instance_index': 8, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/4/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1482, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 50.0} FilterFixedSizeBinaryFilterNoNulls/524288/7/8 1004.789 MiB/sec 1.772 GiB/sec 80.538 {'family_index': 0, 'per_family_instance_index': 14, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/7/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1404, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/13/8 920.819 MiB/sec 1.616 GiB/sec 79.686 {'family_index': 1, 'per_family_instance_index': 26, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/13/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1285, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 50.0} FilterFixedSizeBinaryFilterNoNulls/524288/13/8 974.713 MiB/sec 1.669 GiB/sec 75.388 {'family_index': 0, 'per_family_instance_index': 26, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/13/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1363, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 50.0} TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/8 107.165M items/sec 187.372M items/sec 74.845 {'family_index': 3, 'per_family_instance_index': 2, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 143, 'byte_width': 8.0, 'null_percent': 10.0} TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/8 72.662M items/sec 114.781M items/sec 57.965 {'family_index': 3, 'per_family_instance_index': 4, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 96, 'byte_width': 8.0, 'null_percent': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/10/9 976.180 MiB/sec 1.480 GiB/sec 55.260 {'family_index': 1, 'per_family_instance_index': 21, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1358, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 50.0} FilterFixedSizeBinaryFilterNoNulls/524288/10/9 1.023 GiB/sec 1.581 GiB/sec 54.502 {'family_index': 0, 'per_family_instance_index': 21, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1466, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/4/9 992.477 MiB/sec 1.453 GiB/sec 49.957 {'family_index': 1, 'per_family_instance_index': 9, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/4/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1400, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/7/9 997.679 MiB/sec 1.450 GiB/sec 48.846 {'family_index': 1, 'per_family_instance_index': 15, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/7/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1389, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 50.0} FilterFixedSizeBinaryFilterNoNulls/524288/13/9 1.071 GiB/sec 1.581 GiB/sec 47.526 {'family_index': 0, 'per_family_instance_index': 27, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/13/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1538, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/13/9 1.008 GiB/sec 1.485 GiB/sec 47.328 {'family_index': 1, 'per_family_instance_index': 27, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/13/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1446, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/1/9 1.003 GiB/sec 1.452 GiB/sec 44.708 {'family_index': 1, 'per_family_instance_index': 3, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1437, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 50.0} FilterFixedSizeBinaryFilterNoNulls/524288/7/9 1.105 GiB/sec 1.568 GiB/sec 41.954 {'family_index': 0, 'per_family_instance_index': 15, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/7/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1587, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 50.0} FilterFixedSizeBinaryFilterNoNulls/524288/4/9 1.163 GiB/sec 1.613 GiB/sec 38.639 {'family_index': 0, 'per_family_instance_index': 9, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/4/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1662, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 50.0} FilterFixedSizeBinaryFilterWithNulls/524288/14/9 8.884 GiB/sec 12.117 GiB/sec 36.381 {'family_index': 1, 'per_family_instance_index': 29, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/14/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12508, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 1.0} FilterFixedSizeBinaryFilterWithNulls/524288/11/9 8.886 GiB/sec 12.075 GiB/sec 35.892 {'family_index': 1, 'per_family_instance_index': 23, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/11/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12716, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 1.0} TakeFixedSizeBinaryMonotonicIndices/524288/1000/9 134.765M items/sec 182.868M items/sec 35.694 {'family_index': 4, 'per_family_instance_index': 1, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1000/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 206, 'byte_width': 9.0, 'null_percent': 0.1} FilterFixedSizeBinaryFilterNoNulls/524288/5/8 11.393 GiB/sec 15.091 GiB/sec 32.453 {'family_index': 0, 'per_family_instance_index': 10, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/5/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16510, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 1.0} FilterFixedSizeBinaryFilterNoNulls/524288/8/8 11.573 GiB/sec 15.102 GiB/sec 30.496 {'family_index': 0, 'per_family_instance_index': 16, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/8/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16684, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 1.0} FilterFixedSizeBinaryFilterWithNulls/524288/11/8 7.740 GiB/sec 10.059 GiB/sec 29.956 {'family_index': 1, 'per_family_instance_index': 22, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/11/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10972, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 1.0} FilterFixedSizeBinaryFilterWithNulls/524288/14/8 7.733 GiB/sec 9.915 GiB/sec 28.213 {'family_index': 1, 'per_family_instance_index': 28, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/14/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10991, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 1.0} FilterFixedSizeBinaryFilterWithNulls/524288/5/8 7.682 GiB/sec 9.765 GiB/sec 27.109 {'family_index': 1, 'per_family_instance_index': 10, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/5/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10991, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 1.0} FilterFixedSizeBinaryFilterWithNulls/524288/8/9 8.856 GiB/sec 11.180 GiB/sec 26.241 {'family_index': 1, 'per_family_instance_index': 17, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/8/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12571, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 1.0} FilterFixedSizeBinaryFilterWithNulls/524288/8/8 7.735 GiB/sec 9.710 GiB/sec 25.530 {'family_index': 1, 'per_family_instance_index': 16, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/8/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 11069, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 1.0} TakeFixedSizeBinaryMonotonicIndices/524288/10/9 128.606M items/sec 160.249M items/sec 24.604 {'family_index': 4, 'per_family_instance_index': 3, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 209, 'byte_width': 9.0, 'null_percent': 10.0} FilterFixedSizeBinaryFilterNoNulls/524288/11/8 12.033 GiB/sec 14.737 GiB/sec 22.478 {'family_index': 0, 'per_family_instance_index': 22, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/11/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 17220, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 1.0} FilterFixedSizeBinaryFilterNoNulls/524288/14/8 12.141 GiB/sec 14.761 GiB/sec 21.579 {'family_index': 0, 'per_family_instance_index': 28, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/14/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 17343, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 1.0} FilterFixedSizeBinaryFilterWithNulls/524288/5/9 8.825 GiB/sec 10.633 GiB/sec 20.489 {'family_index': 1, 'per_family_instance_index': 11, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/5/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12543, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 1.0} FilterFixedSizeBinaryFilterWithNulls/524288/2/8 8.300 GiB/sec 9.969 GiB/sec 20.117 {'family_index': 1, 'per_family_instance_index': 4, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 11819, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 1.0} FilterFixedSizeBinaryFilterNoNulls/524288/5/9 12.954 GiB/sec 15.192 GiB/sec 17.273 {'family_index': 0, 'per_family_instance_index': 11, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/5/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 18572, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 1.0} FilterFixedSizeBinaryFilterNoNulls/524288/8/9 13.181 GiB/sec 15.222 GiB/sec 15.490 {'family_index': 0, 'per_family_instance_index': 17, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/8/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 18904, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 1.0} FilterFixedSizeBinaryFilterWithNulls/524288/2/9 9.344 GiB/sec 10.632 GiB/sec 13.784 {'family_index': 1, 'per_family_instance_index': 5, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 13291, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 1.0} FilterFixedSizeBinaryFilterNoNulls/524288/11/9 13.566 GiB/sec 14.894 GiB/sec 9.789 {'family_index': 0, 'per_family_instance_index': 23, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/11/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19349, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 1.0} FilterFixedSizeBinaryFilterNoNulls/524288/14/9 13.603 GiB/sec 14.863 GiB/sec 9.265 {'family_index': 0, 'per_family_instance_index': 29, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/14/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19490, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 1.0} TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/9 124.390M items/sec 133.566M items/sec 7.377 {'family_index': 2, 'per_family_instance_index': 3, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 164, 'byte_width': 9.0, 'null_percent': 10.0} TakeFixedSizeBinaryMonotonicIndices/524288/2/9 116.792M items/sec 124.182M items/sec 6.328 {'family_index': 4, 'per_family_instance_index': 5, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 161, 'byte_width': 9.0, 'null_percent': 50.0} TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/9 135.860M items/sec 142.524M items/sec 4.905 {'family_index': 2, 'per_family_instance_index': 1, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 180, 'byte_width': 9.0, 'null_percent': 0.1} TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/9 131.123M items/sec 137.400M items/sec 4.788 {'family_index': 3, 'per_family_instance_index': 1, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 176, 'byte_width': 9.0, 'null_percent': 0.1} TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/9 220.634M items/sec 230.872M items/sec 4.640 {'family_index': 2, 'per_family_instance_index': 9, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 295, 'byte_width': 9.0, 'null_percent': 0.0} TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/9 97.425M items/sec 101.477M items/sec 4.159 {'family_index': 2, 'per_family_instance_index': 5, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 130, 'byte_width': 9.0, 'null_percent': 50.0} TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/9 104.830M items/sec 108.346M items/sec 3.354 {'family_index': 3, 'per_family_instance_index': 3, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 100, 'byte_width': 9.0, 'null_percent': 10.0} TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/9 378.858M items/sec 387.322M items/sec 2.234 {'family_index': 3, 'per_family_instance_index': 7, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 506, 'byte_width': 9.0, 'null_percent': 100.0} TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/9 221.900M items/sec 226.450M items/sec 2.050 {'family_index': 3, 'per_family_instance_index': 9, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 295, 'byte_width': 9.0, 'null_percent': 0.0} TakeFixedSizeBinaryMonotonicIndices/524288/0/9 248.664M items/sec 253.037M items/sec 1.758 {'family_index': 4, 'per_family_instance_index': 9, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 332, 'byte_width': 9.0, 'null_percent': 0.0} TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/9 197.730M items/sec 201.173M items/sec 1.741 {'family_index': 2, 'per_family_instance_index': 7, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 264, 'byte_width': 9.0, 'null_percent': 100.0} TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/9 73.196M items/sec 74.167M items/sec 1.327 {'family_index': 3, 'per_family_instance_index': 5, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 96, 'byte_width': 9.0, 'null_percent': 50.0} TakeFixedSizeBinaryMonotonicIndices/524288/1/9 192.545M items/sec 188.138M items/sec -2.289 {'family_index': 4, 'per_family_instance_index': 7, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 257, 'byte_width': 9.0, 'null_percent': 100.0} ``` ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * Closes: #39740 Authored-by: Antoine Pitrou <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]> --- .../compute/kernels/vector_selection_benchmark.cc | 72 ++++++++- .../kernels/vector_selection_filter_internal.cc | 167 ++++++++++++--------- .../compute/kernels/vector_selection_internal.cc | 22 ++- .../compute/kernels/vector_selection_internal.h | 2 +- .../kernels/vector_selection_take_internal.cc | 76 +++++++--- .../arrow/compute/kernels/vector_selection_test.cc | 150 +++++++++++++----- 6 files changed, 348 insertions(+), 141 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc index 25e30e65a3..e65d5dbcab 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc @@ -128,6 +128,13 @@ struct TakeBenchmark { Bench(values); } + void FixedSizeBinary() { + const int32_t byte_width = static_cast<int32_t>(state.range(2)); + auto values = rand.FixedSizeBinary(args.size, byte_width, args.null_proportion); + Bench(values); + state.counters["byte_width"] = byte_width; + } + void String() { int32_t string_min_length = 0, string_max_length = 32; auto values = std::static_pointer_cast<StringArray>(rand.String( @@ -149,6 +156,7 @@ struct TakeBenchmark { for (auto _ : state) { ABORT_NOT_OK(Take(values, indices).status()); } + state.SetItemsProcessed(state.iterations() * values->length()); } }; @@ -166,8 +174,7 @@ struct FilterBenchmark { void Int64() { const int64_t array_size = args.size / sizeof(int64_t); - auto values = std::static_pointer_cast<NumericArray<Int64Type>>( - rand.Int64(array_size, -100, 100, args.values_null_proportion)); + auto values = rand.Int64(array_size, -100, 100, args.values_null_proportion); Bench(values); } @@ -181,6 +188,15 @@ struct FilterBenchmark { Bench(values); } + void FixedSizeBinary() { + const int32_t byte_width = static_cast<int32_t>(state.range(2)); + const int64_t array_size = args.size / byte_width; + auto values = + rand.FixedSizeBinary(array_size, byte_width, args.values_null_proportion); + Bench(values); + state.counters["byte_width"] = byte_width; + } + void String() { int32_t string_min_length = 0, string_max_length = 32; int32_t string_mean_length = (string_max_length + string_min_length) / 2; @@ -202,6 +218,7 @@ struct FilterBenchmark { for (auto _ : state) { ABORT_NOT_OK(Filter(values, filter).status()); } + state.SetItemsProcessed(state.iterations() * values->length()); } void BenchRecordBatch() { @@ -236,6 +253,7 @@ struct FilterBenchmark { for (auto _ : state) { ABORT_NOT_OK(Filter(batch, filter).status()); } + state.SetItemsProcessed(state.iterations() * num_rows); } }; @@ -255,6 +273,14 @@ static void FilterFSLInt64FilterWithNulls(benchmark::State& state) { FilterBenchmark(state, true).FSLInt64(); } +static void FilterFixedSizeBinaryFilterNoNulls(benchmark::State& state) { + FilterBenchmark(state, false).FixedSizeBinary(); +} + +static void FilterFixedSizeBinaryFilterWithNulls(benchmark::State& state) { + FilterBenchmark(state, true).FixedSizeBinary(); +} + static void FilterStringFilterNoNulls(benchmark::State& state) { FilterBenchmark(state, false).String(); } @@ -283,6 +309,19 @@ static void TakeInt64MonotonicIndices(benchmark::State& state) { TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true).Int64(); } +static void TakeFixedSizeBinaryRandomIndicesNoNulls(benchmark::State& state) { + TakeBenchmark(state, false).FixedSizeBinary(); +} + +static void TakeFixedSizeBinaryRandomIndicesWithNulls(benchmark::State& state) { + TakeBenchmark(state, true).FixedSizeBinary(); +} + +static void TakeFixedSizeBinaryMonotonicIndices(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true) + .FixedSizeBinary(); +} + static void TakeFSLInt64RandomIndicesNoNulls(benchmark::State& state) { TakeBenchmark(state, false).FSLInt64(); } @@ -315,8 +354,22 @@ void FilterSetArgs(benchmark::internal::Benchmark* bench) { } } +void FilterFSBSetArgs(benchmark::internal::Benchmark* bench) { + for (int64_t size : g_data_sizes) { + for (int i = 0; i < static_cast<int>(g_filter_params.size()); ++i) { + // FixedSizeBinary of primitive sizes (powers of two up to 32) + // have a faster path. + for (int32_t byte_width : {8, 9}) { + bench->Args({static_cast<ArgsType>(size), i, byte_width}); + } + } + } +} + BENCHMARK(FilterInt64FilterNoNulls)->Apply(FilterSetArgs); BENCHMARK(FilterInt64FilterWithNulls)->Apply(FilterSetArgs); +BENCHMARK(FilterFixedSizeBinaryFilterNoNulls)->Apply(FilterFSBSetArgs); +BENCHMARK(FilterFixedSizeBinaryFilterWithNulls)->Apply(FilterFSBSetArgs); BENCHMARK(FilterFSLInt64FilterNoNulls)->Apply(FilterSetArgs); BENCHMARK(FilterFSLInt64FilterWithNulls)->Apply(FilterSetArgs); BENCHMARK(FilterStringFilterNoNulls)->Apply(FilterSetArgs); @@ -340,9 +393,24 @@ void TakeSetArgs(benchmark::internal::Benchmark* bench) { } } +void TakeFSBSetArgs(benchmark::internal::Benchmark* bench) { + for (int64_t size : g_data_sizes) { + for (auto nulls : std::vector<ArgsType>({1000, 10, 2, 1, 0})) { + // FixedSizeBinary of primitive sizes (powers of two up to 32) + // have a faster path. + for (int32_t byte_width : {8, 9}) { + bench->Args({static_cast<ArgsType>(size), nulls, byte_width}); + } + } + } +} + BENCHMARK(TakeInt64RandomIndicesNoNulls)->Apply(TakeSetArgs); BENCHMARK(TakeInt64RandomIndicesWithNulls)->Apply(TakeSetArgs); BENCHMARK(TakeInt64MonotonicIndices)->Apply(TakeSetArgs); +BENCHMARK(TakeFixedSizeBinaryRandomIndicesNoNulls)->Apply(TakeFSBSetArgs); +BENCHMARK(TakeFixedSizeBinaryRandomIndicesWithNulls)->Apply(TakeFSBSetArgs); +BENCHMARK(TakeFixedSizeBinaryMonotonicIndices)->Apply(TakeFSBSetArgs); BENCHMARK(TakeFSLInt64RandomIndicesNoNulls)->Apply(TakeSetArgs); BENCHMARK(TakeFSLInt64RandomIndicesWithNulls)->Apply(TakeSetArgs); BENCHMARK(TakeFSLInt64MonotonicIndices)->Apply(TakeSetArgs); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc index a25b04ae4f..8825d697fd 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc @@ -146,36 +146,40 @@ class DropNullCounter { /// \brief The Filter implementation for primitive (fixed-width) types does not /// use the logical Arrow type but rather the physical C type. This way we only -/// generate one take function for each byte width. We use the same -/// implementation here for boolean and fixed-byte-size inputs with some -/// template specialization. -template <typename ArrowType> +/// generate one take function for each byte width. +/// +/// We use compile-time specialization for two variations: +/// - operating on boolean data (using kIsBoolean = true) +/// - operating on fixed-width data of arbitrary width (using kByteWidth = -1), +/// with the actual width only known at runtime +template <int32_t kByteWidth, bool kIsBoolean = false> class PrimitiveFilterImpl { public: - using T = typename std::conditional<std::is_same<ArrowType, BooleanType>::value, - uint8_t, typename ArrowType::c_type>::type; - PrimitiveFilterImpl(const ArraySpan& values, const ArraySpan& filter, FilterOptions::NullSelectionBehavior null_selection, ArrayData* out_arr) - : values_is_valid_(values.buffers[0].data), - values_data_(reinterpret_cast<const T*>(values.buffers[1].data)), + : byte_width_(values.type->byte_width()), + values_is_valid_(values.buffers[0].data), + values_data_(values.buffers[1].data), values_null_count_(values.null_count), values_offset_(values.offset), values_length_(values.length), filter_(filter), null_selection_(null_selection) { - if (values.type->id() != Type::BOOL) { + if constexpr (kByteWidth >= 0 && !kIsBoolean) { + DCHECK_EQ(kByteWidth, byte_width_); + } + if constexpr (!kIsBoolean) { // No offset applied for boolean because it's a bitmap - values_data_ += values.offset; + values_data_ += values.offset * byte_width(); } if (out_arr->buffers[0] != nullptr) { // May be unallocated if neither filter nor values contain nulls out_is_valid_ = out_arr->buffers[0]->mutable_data(); } - out_data_ = reinterpret_cast<T*>(out_arr->buffers[1]->mutable_data()); - out_offset_ = out_arr->offset; + out_data_ = out_arr->buffers[1]->mutable_data(); + DCHECK_EQ(out_arr->offset, 0); out_length_ = out_arr->length; out_position_ = 0; } @@ -201,14 +205,11 @@ class PrimitiveFilterImpl { [&](int64_t position, int64_t segment_length, bool filter_valid) { if (filter_valid) { CopyBitmap(values_is_valid_, values_offset_ + position, segment_length, - out_is_valid_, out_offset_ + out_position_); + out_is_valid_, out_position_); WriteValueSegment(position, segment_length); } else { - bit_util::SetBitsTo(out_is_valid_, out_offset_ + out_position_, - segment_length, false); - memset(out_data_ + out_offset_ + out_position_, 0, - segment_length * sizeof(T)); - out_position_ += segment_length; + bit_util::SetBitsTo(out_is_valid_, out_position_, segment_length, false); + WriteNullSegment(segment_length); } return true; }); @@ -218,7 +219,7 @@ class PrimitiveFilterImpl { if (out_is_valid_) { // Set all to valid, so only if nulls are produced by EMIT_NULL, we need // to set out_is_valid[i] to false. - bit_util::SetBitsTo(out_is_valid_, out_offset_, out_length_, true); + bit_util::SetBitsTo(out_is_valid_, 0, out_length_, true); } return VisitPlainxREEFilterOutputSegments( filter_, /*filter_may_have_nulls=*/true, null_selection_, @@ -226,11 +227,8 @@ class PrimitiveFilterImpl { if (filter_valid) { WriteValueSegment(position, segment_length); } else { - bit_util::SetBitsTo(out_is_valid_, out_offset_ + out_position_, - segment_length, false); - memset(out_data_ + out_offset_ + out_position_, 0, - segment_length * sizeof(T)); - out_position_ += segment_length; + bit_util::SetBitsTo(out_is_valid_, out_position_, segment_length, false); + WriteNullSegment(segment_length); } return true; }); @@ -260,13 +258,13 @@ class PrimitiveFilterImpl { values_length_); auto WriteNotNull = [&](int64_t index) { - bit_util::SetBit(out_is_valid_, out_offset_ + out_position_); + bit_util::SetBit(out_is_valid_, out_position_); // Increments out_position_ WriteValue(index); }; auto WriteMaybeNull = [&](int64_t index) { - bit_util::SetBitTo(out_is_valid_, out_offset_ + out_position_, + bit_util::SetBitTo(out_is_valid_, out_position_, bit_util::GetBit(values_is_valid_, values_offset_ + index)); // Increments out_position_ WriteValue(index); @@ -279,15 +277,14 @@ class PrimitiveFilterImpl { BitBlockCount data_block = data_counter.NextWord(); if (filter_block.AllSet() && data_block.AllSet()) { // Fastest path: all values in block are included and not null - bit_util::SetBitsTo(out_is_valid_, out_offset_ + out_position_, - filter_block.length, true); + bit_util::SetBitsTo(out_is_valid_, out_position_, filter_block.length, true); WriteValueSegment(in_position, filter_block.length); in_position += filter_block.length; } else if (filter_block.AllSet()) { // Faster: all values are selected, but some values are null // Batch copy bits from values validity bitmap to output validity bitmap CopyBitmap(values_is_valid_, values_offset_ + in_position, filter_block.length, - out_is_valid_, out_offset_ + out_position_); + out_is_valid_, out_position_); WriteValueSegment(in_position, filter_block.length); in_position += filter_block.length; } else if (filter_block.NoneSet() && null_selection_ == FilterOptions::DROP) { @@ -326,7 +323,7 @@ class PrimitiveFilterImpl { WriteNotNull(in_position); } else if (!is_valid) { // Filter slot is null, so we have a null in the output - bit_util::ClearBit(out_is_valid_, out_offset_ + out_position_); + bit_util::ClearBit(out_is_valid_, out_position_); WriteNull(); } ++in_position; @@ -362,7 +359,7 @@ class PrimitiveFilterImpl { WriteMaybeNull(in_position); } else if (!is_valid) { // Filter slot is null, so we have a null in the output - bit_util::ClearBit(out_is_valid_, out_offset_ + out_position_); + bit_util::ClearBit(out_is_valid_, out_position_); WriteNull(); } ++in_position; @@ -376,54 +373,72 @@ class PrimitiveFilterImpl { // Write the next out_position given the selected in_position for the input // data and advance out_position void WriteValue(int64_t in_position) { - out_data_[out_offset_ + out_position_++] = values_data_[in_position]; + if constexpr (kIsBoolean) { + bit_util::SetBitTo(out_data_, out_position_, + bit_util::GetBit(values_data_, values_offset_ + in_position)); + } else { + memcpy(out_data_ + out_position_ * byte_width(), + values_data_ + in_position * byte_width(), byte_width()); + } + ++out_position_; } void WriteValueSegment(int64_t in_start, int64_t length) { - std::memcpy(out_data_ + out_position_, values_data_ + in_start, length * sizeof(T)); + if constexpr (kIsBoolean) { + CopyBitmap(values_data_, values_offset_ + in_start, length, out_data_, + out_position_); + } else { + memcpy(out_data_ + out_position_ * byte_width(), + values_data_ + in_start * byte_width(), length * byte_width()); + } out_position_ += length; } void WriteNull() { - // Zero the memory - out_data_[out_offset_ + out_position_++] = T{}; + if constexpr (kIsBoolean) { + // Zero the bit + bit_util::ClearBit(out_data_, out_position_); + } else { + // Zero the memory + memset(out_data_ + out_position_ * byte_width(), 0, byte_width()); + } + ++out_position_; + } + + void WriteNullSegment(int64_t length) { + if constexpr (kIsBoolean) { + // Zero the bits + bit_util::SetBitsTo(out_data_, out_position_, length, false); + } else { + // Zero the memory + memset(out_data_ + out_position_ * byte_width(), 0, length * byte_width()); + } + out_position_ += length; + } + + constexpr int32_t byte_width() const { + if constexpr (kByteWidth >= 0) { + return kByteWidth; + } else { + return byte_width_; + } } private: + int32_t byte_width_; const uint8_t* values_is_valid_; - const T* values_data_; + const uint8_t* values_data_; int64_t values_null_count_; int64_t values_offset_; int64_t values_length_; const ArraySpan& filter_; FilterOptions::NullSelectionBehavior null_selection_; uint8_t* out_is_valid_ = NULLPTR; - T* out_data_; - int64_t out_offset_; + uint8_t* out_data_; int64_t out_length_; int64_t out_position_; }; -template <> -inline void PrimitiveFilterImpl<BooleanType>::WriteValue(int64_t in_position) { - bit_util::SetBitTo(out_data_, out_offset_ + out_position_++, - bit_util::GetBit(values_data_, values_offset_ + in_position)); -} - -template <> -inline void PrimitiveFilterImpl<BooleanType>::WriteValueSegment(int64_t in_start, - int64_t length) { - CopyBitmap(values_data_, values_offset_ + in_start, length, out_data_, - out_offset_ + out_position_); - out_position_ += length; -} - -template <> -inline void PrimitiveFilterImpl<BooleanType>::WriteNull() { - // Zero the bit - bit_util::ClearBit(out_data_, out_offset_ + out_position_++); -} - Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const ArraySpan& values = batch[0].array; const ArraySpan& filter = batch[1].array; @@ -459,22 +474,32 @@ Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult switch (bit_width) { case 1: - PrimitiveFilterImpl<BooleanType>(values, filter, null_selection, out_arr).Exec(); + PrimitiveFilterImpl<1, /*kIsBoolean=*/true>(values, filter, null_selection, out_arr) + .Exec(); break; case 8: - PrimitiveFilterImpl<UInt8Type>(values, filter, null_selection, out_arr).Exec(); + PrimitiveFilterImpl<1>(values, filter, null_selection, out_arr).Exec(); break; case 16: - PrimitiveFilterImpl<UInt16Type>(values, filter, null_selection, out_arr).Exec(); + PrimitiveFilterImpl<2>(values, filter, null_selection, out_arr).Exec(); break; case 32: - PrimitiveFilterImpl<UInt32Type>(values, filter, null_selection, out_arr).Exec(); + PrimitiveFilterImpl<4>(values, filter, null_selection, out_arr).Exec(); break; case 64: - PrimitiveFilterImpl<UInt64Type>(values, filter, null_selection, out_arr).Exec(); + PrimitiveFilterImpl<8>(values, filter, null_selection, out_arr).Exec(); + break; + case 128: + // For INTERVAL_MONTH_DAY_NANO, DECIMAL128 + PrimitiveFilterImpl<16>(values, filter, null_selection, out_arr).Exec(); + break; + case 256: + // For DECIMAL256 + PrimitiveFilterImpl<32>(values, filter, null_selection, out_arr).Exec(); break; default: - DCHECK(false) << "Invalid values bit width"; + // Non-specializing on byte width + PrimitiveFilterImpl<-1>(values, filter, null_selection, out_arr).Exec(); break; } return Status::OK(); @@ -1050,10 +1075,10 @@ void PopulateFilterKernels(std::vector<SelectionKernelData>* out) { {InputType(match::Primitive()), plain_filter, PrimitiveFilterExec}, {InputType(match::BinaryLike()), plain_filter, BinaryFilterExec}, {InputType(match::LargeBinaryLike()), plain_filter, BinaryFilterExec}, - {InputType(Type::FIXED_SIZE_BINARY), plain_filter, FSBFilterExec}, {InputType(null()), plain_filter, NullFilterExec}, - {InputType(Type::DECIMAL128), plain_filter, FSBFilterExec}, - {InputType(Type::DECIMAL256), plain_filter, FSBFilterExec}, + {InputType(Type::FIXED_SIZE_BINARY), plain_filter, PrimitiveFilterExec}, + {InputType(Type::DECIMAL128), plain_filter, PrimitiveFilterExec}, + {InputType(Type::DECIMAL256), plain_filter, PrimitiveFilterExec}, {InputType(Type::DICTIONARY), plain_filter, DictionaryFilterExec}, {InputType(Type::EXTENSION), plain_filter, ExtensionFilterExec}, {InputType(Type::LIST), plain_filter, ListFilterExec}, @@ -1068,10 +1093,10 @@ void PopulateFilterKernels(std::vector<SelectionKernelData>* out) { {InputType(match::Primitive()), ree_filter, PrimitiveFilterExec}, {InputType(match::BinaryLike()), ree_filter, BinaryFilterExec}, {InputType(match::LargeBinaryLike()), ree_filter, BinaryFilterExec}, - {InputType(Type::FIXED_SIZE_BINARY), ree_filter, FSBFilterExec}, {InputType(null()), ree_filter, NullFilterExec}, - {InputType(Type::DECIMAL128), ree_filter, FSBFilterExec}, - {InputType(Type::DECIMAL256), ree_filter, FSBFilterExec}, + {InputType(Type::FIXED_SIZE_BINARY), ree_filter, PrimitiveFilterExec}, + {InputType(Type::DECIMAL128), ree_filter, PrimitiveFilterExec}, + {InputType(Type::DECIMAL256), ree_filter, PrimitiveFilterExec}, {InputType(Type::DICTIONARY), ree_filter, DictionaryFilterExec}, {InputType(Type::EXTENSION), ree_filter, ExtensionFilterExec}, {InputType(Type::LIST), ree_filter, ListFilterExec}, diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc index 98eb37e9c5..a0fe2808e3 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc @@ -77,7 +77,8 @@ Status PreallocatePrimitiveArrayData(KernelContext* ctx, int64_t length, int bit if (bit_width == 1) { ARROW_ASSIGN_OR_RAISE(out->buffers[1], ctx->AllocateBitmap(length)); } else { - ARROW_ASSIGN_OR_RAISE(out->buffers[1], ctx->Allocate(length * bit_width / 8)); + ARROW_ASSIGN_OR_RAISE(out->buffers[1], + ctx->Allocate(bit_util::BytesForBits(length * bit_width))); } return Status::OK(); } @@ -899,10 +900,6 @@ Status FilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { } // namespace -Status FSBFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { - return FilterExec<FSBSelectionImpl>(ctx, batch, out); -} - Status ListFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { return FilterExec<ListSelectionImpl<ListType>>(ctx, batch, out); } @@ -946,7 +943,20 @@ Status LargeVarBinaryTakeExec(KernelContext* ctx, const ExecSpan& batch, } Status FSBTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { - return TakeExec<FSBSelectionImpl>(ctx, batch, out); + const ArraySpan& values = batch[0].array; + const auto byte_width = values.type->byte_width(); + // Use primitive Take implementation (presumably faster) for some byte widths + switch (byte_width) { + case 1: + case 2: + case 4: + case 8: + case 16: + case 32: + return PrimitiveTakeExec(ctx, batch, out); + default: + return TakeExec<FSBSelectionImpl>(ctx, batch, out); + } } Status ListTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.h b/cpp/src/arrow/compute/kernels/vector_selection_internal.h index b9eba6ea66..95f3e51cd6 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.h @@ -70,7 +70,6 @@ void VisitPlainxREEFilterOutputSegments( FilterOptions::NullSelectionBehavior null_selection, const EmitREEFilterSegment& emit_segment); -Status FSBFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status ListFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status LargeListFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status FSLFilterExec(KernelContext*, const ExecSpan&, ExecResult*); @@ -79,6 +78,7 @@ Status MapFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status VarBinaryTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status LargeVarBinaryTakeExec(KernelContext*, const ExecSpan&, ExecResult*); +Status PrimitiveTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status FSBTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status ListTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status LargeListTakeExec(KernelContext*, const ExecSpan&, ExecResult*); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc index 612de8505d..89b3f7d0d3 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc @@ -334,11 +334,15 @@ using TakeState = OptionsWrapper<TakeOptions>; /// only generate one take function for each byte width. /// /// This function assumes that the indices have been boundschecked. -template <typename IndexCType, typename ValueCType> +template <typename IndexCType, typename ValueWidthConstant> struct PrimitiveTakeImpl { + static constexpr int kValueWidth = ValueWidthConstant::value; + static void Exec(const ArraySpan& values, const ArraySpan& indices, ArrayData* out_arr) { - const auto* values_data = values.GetValues<ValueCType>(1); + DCHECK_EQ(values.type->byte_width(), kValueWidth); + const auto* values_data = + values.GetValues<uint8_t>(1, 0) + kValueWidth * values.offset; const uint8_t* values_is_valid = values.buffers[0].data; auto values_offset = values.offset; @@ -346,9 +350,10 @@ struct PrimitiveTakeImpl { const uint8_t* indices_is_valid = indices.buffers[0].data; auto indices_offset = indices.offset; - auto out = out_arr->GetMutableValues<ValueCType>(1); + auto out = out_arr->GetMutableValues<uint8_t>(1, 0) + kValueWidth * out_arr->offset; auto out_is_valid = out_arr->buffers[0]->mutable_data(); auto out_offset = out_arr->offset; + DCHECK_EQ(out_offset, 0); // If either the values or indices have nulls, we preemptively zero out the // out validity bitmap so that we don't have to use ClearBit in each @@ -357,6 +362,19 @@ struct PrimitiveTakeImpl { bit_util::SetBitsTo(out_is_valid, out_offset, indices.length, false); } + auto WriteValue = [&](int64_t position) { + memcpy(out + position * kValueWidth, + values_data + indices_data[position] * kValueWidth, kValueWidth); + }; + + auto WriteZero = [&](int64_t position) { + memset(out + position * kValueWidth, 0, kValueWidth); + }; + + auto WriteZeroSegment = [&](int64_t position, int64_t length) { + memset(out + position * kValueWidth, 0, kValueWidth * length); + }; + OptionalBitBlockCounter indices_bit_counter(indices_is_valid, indices_offset, indices.length); int64_t position = 0; @@ -370,7 +388,7 @@ struct PrimitiveTakeImpl { // Fastest path: neither values nor index nulls bit_util::SetBitsTo(out_is_valid, out_offset + position, block.length, true); for (int64_t i = 0; i < block.length; ++i) { - out[position] = values_data[indices_data[position]]; + WriteValue(position); ++position; } } else if (block.popcount > 0) { @@ -379,14 +397,14 @@ struct PrimitiveTakeImpl { if (bit_util::GetBit(indices_is_valid, indices_offset + position)) { // index is not null bit_util::SetBit(out_is_valid, out_offset + position); - out[position] = values_data[indices_data[position]]; + WriteValue(position); } else { - out[position] = ValueCType{}; + WriteZero(position); } ++position; } } else { - memset(out + position, 0, sizeof(ValueCType) * block.length); + WriteZeroSegment(position, block.length); position += block.length; } } else { @@ -397,11 +415,11 @@ struct PrimitiveTakeImpl { if (bit_util::GetBit(values_is_valid, values_offset + indices_data[position])) { // value is not null - out[position] = values_data[indices_data[position]]; + WriteValue(position); bit_util::SetBit(out_is_valid, out_offset + position); ++valid_count; } else { - out[position] = ValueCType{}; + WriteZero(position); } ++position; } @@ -414,16 +432,16 @@ struct PrimitiveTakeImpl { bit_util::GetBit(values_is_valid, values_offset + indices_data[position])) { // index is not null && value is not null - out[position] = values_data[indices_data[position]]; + WriteValue(position); bit_util::SetBit(out_is_valid, out_offset + position); ++valid_count; } else { - out[position] = ValueCType{}; + WriteZero(position); } ++position; } } else { - memset(out + position, 0, sizeof(ValueCType) * block.length); + WriteZeroSegment(position, block.length); position += block.length; } } @@ -554,6 +572,8 @@ void TakeIndexDispatch(const ArraySpan& values, const ArraySpan& indices, } } +} // namespace + Status PrimitiveTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const ArraySpan& values = batch[0].array; const ArraySpan& indices = batch[1].array; @@ -577,24 +597,40 @@ Status PrimitiveTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* TakeIndexDispatch<BooleanTakeImpl>(values, indices, out_arr); break; case 8: - TakeIndexDispatch<PrimitiveTakeImpl, int8_t>(values, indices, out_arr); + TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 1>>( + values, indices, out_arr); break; case 16: - TakeIndexDispatch<PrimitiveTakeImpl, int16_t>(values, indices, out_arr); + TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 2>>( + values, indices, out_arr); break; case 32: - TakeIndexDispatch<PrimitiveTakeImpl, int32_t>(values, indices, out_arr); + TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 4>>( + values, indices, out_arr); break; case 64: - TakeIndexDispatch<PrimitiveTakeImpl, int64_t>(values, indices, out_arr); + TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 8>>( + values, indices, out_arr); break; - default: - DCHECK(false) << "Invalid values byte width"; + case 128: + // For INTERVAL_MONTH_DAY_NANO, DECIMAL128 + TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 16>>( + values, indices, out_arr); + break; + case 256: + // For DECIMAL256 + TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 32>>( + values, indices, out_arr); break; + default: + return Status::NotImplemented("Unsupported primitive type for take: ", + *values.type); } return Status::OK(); } +namespace { + // ---------------------------------------------------------------------- // Null take @@ -836,8 +872,8 @@ void PopulateTakeKernels(std::vector<SelectionKernelData>* out) { {InputType(match::LargeBinaryLike()), take_indices, LargeVarBinaryTakeExec}, {InputType(Type::FIXED_SIZE_BINARY), take_indices, FSBTakeExec}, {InputType(null()), take_indices, NullTakeExec}, - {InputType(Type::DECIMAL128), take_indices, FSBTakeExec}, - {InputType(Type::DECIMAL256), take_indices, FSBTakeExec}, + {InputType(Type::DECIMAL128), take_indices, PrimitiveTakeExec}, + {InputType(Type::DECIMAL256), take_indices, PrimitiveTakeExec}, {InputType(Type::DICTIONARY), take_indices, DictionaryTake}, {InputType(Type::EXTENSION), take_indices, ExtensionTake}, {InputType(Type::LIST), take_indices, ListTakeExec}, diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc index bdf9f5454f..ec94b328ea 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc @@ -309,6 +309,33 @@ class TestFilterKernel : public ::testing::Test { AssertFilter(values_array, ree_filter, expected_array); } + void TestNumericBasics(const std::shared_ptr<DataType>& type) { + ARROW_SCOPED_TRACE("type = ", *type); + AssertFilter(type, "[]", "[]", "[]"); + + AssertFilter(type, "[9]", "[0]", "[]"); + AssertFilter(type, "[9]", "[1]", "[9]"); + AssertFilter(type, "[9]", "[null]", "[null]"); + AssertFilter(type, "[null]", "[0]", "[]"); + AssertFilter(type, "[null]", "[1]", "[null]"); + AssertFilter(type, "[null]", "[null]", "[null]"); + + AssertFilter(type, "[7, 8, 9]", "[0, 1, 0]", "[8]"); + AssertFilter(type, "[7, 8, 9]", "[1, 0, 1]", "[7, 9]"); + AssertFilter(type, "[null, 8, 9]", "[0, 1, 0]", "[8]"); + AssertFilter(type, "[7, 8, 9]", "[null, 1, 0]", "[null, 8]"); + AssertFilter(type, "[7, 8, 9]", "[1, null, 1]", "[7, null, 9]"); + + AssertFilter(ArrayFromJSON(type, "[7, 8, 9]"), + ArrayFromJSON(boolean(), "[0, 1, 1, 1, 0, 1]")->Slice(3, 3), + ArrayFromJSON(type, "[7, 9]")); + + ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"), + ArrayFromJSON(boolean(), "[]"), emit_null_)); + ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"), + ArrayFromJSON(boolean(), "[]"), drop_)); + } + const FilterOptions emit_null_, drop_; }; @@ -342,6 +369,33 @@ void ValidateFilter(const std::shared_ptr<Array>& values, /*verbose=*/true); } +TEST_F(TestFilterKernel, Temporal) { + this->TestNumericBasics(time32(TimeUnit::MILLI)); + this->TestNumericBasics(time64(TimeUnit::MICRO)); + this->TestNumericBasics(timestamp(TimeUnit::NANO, "Europe/Paris")); + this->TestNumericBasics(duration(TimeUnit::SECOND)); + this->TestNumericBasics(date32()); + this->AssertFilter(date64(), "[0, 86400000, null]", "[null, 1, 0]", "[null, 86400000]"); +} + +TEST_F(TestFilterKernel, Duration) { + for (auto type : DurationTypes()) { + this->TestNumericBasics(type); + } +} + +TEST_F(TestFilterKernel, Interval) { + this->TestNumericBasics(month_interval()); + + auto type = day_time_interval(); + this->AssertFilter(type, "[[1, -600], [2, 3000], null]", "[null, 1, 0]", + "[null, [2, 3000]]"); + type = month_day_nano_interval(); + this->AssertFilter(type, + "[[1, -2, 34567890123456789], [2, 3, -34567890123456789], null]", + "[null, 1, 0]", "[null, [2, 3, -34567890123456789]]"); +} + class TestFilterKernelWithNull : public TestFilterKernel { protected: void AssertFilter(const std::string& values, const std::string& filter, @@ -401,30 +455,7 @@ class TestFilterKernelWithNumeric : public TestFilterKernel { TYPED_TEST_SUITE(TestFilterKernelWithNumeric, NumericArrowTypes); TYPED_TEST(TestFilterKernelWithNumeric, FilterNumeric) { - auto type = this->type_singleton(); - this->AssertFilter(type, "[]", "[]", "[]"); - - this->AssertFilter(type, "[9]", "[0]", "[]"); - this->AssertFilter(type, "[9]", "[1]", "[9]"); - this->AssertFilter(type, "[9]", "[null]", "[null]"); - this->AssertFilter(type, "[null]", "[0]", "[]"); - this->AssertFilter(type, "[null]", "[1]", "[null]"); - this->AssertFilter(type, "[null]", "[null]", "[null]"); - - this->AssertFilter(type, "[7, 8, 9]", "[0, 1, 0]", "[8]"); - this->AssertFilter(type, "[7, 8, 9]", "[1, 0, 1]", "[7, 9]"); - this->AssertFilter(type, "[null, 8, 9]", "[0, 1, 0]", "[8]"); - this->AssertFilter(type, "[7, 8, 9]", "[null, 1, 0]", "[null, 8]"); - this->AssertFilter(type, "[7, 8, 9]", "[1, null, 1]", "[7, null, 9]"); - - this->AssertFilter(ArrayFromJSON(type, "[7, 8, 9]"), - ArrayFromJSON(boolean(), "[0, 1, 1, 1, 0, 1]")->Slice(3, 3), - ArrayFromJSON(type, "[7, 9]")); - - ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"), - ArrayFromJSON(boolean(), "[]"), this->emit_null_)); - ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"), - ArrayFromJSON(boolean(), "[]"), this->drop_)); + this->TestNumericBasics(this->type_singleton()); } template <typename CType> @@ -588,7 +619,7 @@ TYPED_TEST(TestFilterKernelWithDecimal, FilterNumeric) { ArrayFromJSON(boolean(), "[]"), this->drop_)); } -TEST(TestFilterKernel, NoValidityBitmapButUnknownNullCount) { +TEST_F(TestFilterKernel, NoValidityBitmapButUnknownNullCount) { auto values = ArrayFromJSON(int32(), "[1, 2, 3, 4]"); auto filter = ArrayFromJSON(boolean(), "[true, true, false, true]"); @@ -1136,6 +1167,20 @@ class TestTakeKernel : public ::testing::Test { TestNoValidityBitmapButUnknownNullCount(ArrayFromJSON(type, values), ArrayFromJSON(int16(), indices)); } + + void TestNumericBasics(const std::shared_ptr<DataType>& type) { + ARROW_SCOPED_TRACE("type = ", *type); + CheckTake(type, "[7, 8, 9]", "[]", "[]"); + CheckTake(type, "[7, 8, 9]", "[0, 1, 0]", "[7, 8, 7]"); + CheckTake(type, "[null, 8, 9]", "[0, 1, 0]", "[null, 8, null]"); + CheckTake(type, "[7, 8, 9]", "[null, 1, 0]", "[null, 8, 7]"); + CheckTake(type, "[null, 8, 9]", "[]", "[]"); + CheckTake(type, "[7, 8, 9]", "[0, 0, 0, 0, 0, 0, 2]", "[7, 7, 7, 7, 7, 7, 9]"); + + std::shared_ptr<Array> arr; + ASSERT_RAISES(IndexError, TakeJSON(type, "[7, 8, 9]", int8(), "[0, 9, 0]", &arr)); + ASSERT_RAISES(IndexError, TakeJSON(type, "[7, 8, 9]", int8(), "[0, -1, 0]", &arr)); + } }; template <typename ArrowType> @@ -1201,6 +1246,34 @@ TEST_F(TestTakeKernel, TakeBoolean) { TakeJSON(boolean(), "[true, false, true]", int8(), "[0, -1, 0]", &arr)); } +TEST_F(TestTakeKernel, Temporal) { + this->TestNumericBasics(time32(TimeUnit::MILLI)); + this->TestNumericBasics(time64(TimeUnit::MICRO)); + this->TestNumericBasics(timestamp(TimeUnit::NANO, "Europe/Paris")); + this->TestNumericBasics(duration(TimeUnit::SECOND)); + this->TestNumericBasics(date32()); + CheckTake(date64(), "[0, 86400000, null]", "[null, 1, 1, 0]", + "[null, 86400000, 86400000, 0]"); +} + +TEST_F(TestTakeKernel, Duration) { + for (auto type : DurationTypes()) { + this->TestNumericBasics(type); + } +} + +TEST_F(TestTakeKernel, Interval) { + this->TestNumericBasics(month_interval()); + + auto type = day_time_interval(); + CheckTake(type, "[[1, -600], [2, 3000], null]", "[0, null, 2, 1]", + "[[1, -600], null, null, [2, 3000]]"); + type = month_day_nano_interval(); + CheckTake(type, "[[1, -2, 34567890123456789], [2, 3, -34567890123456789], null]", + "[0, null, 2, 1]", + "[[1, -2, 34567890123456789], null, null, [2, 3, -34567890123456789]]"); +} + template <typename ArrowType> class TestTakeKernelWithNumeric : public TestTakeKernelTyped<ArrowType> { protected: @@ -1216,18 +1289,7 @@ class TestTakeKernelWithNumeric : public TestTakeKernelTyped<ArrowType> { TYPED_TEST_SUITE(TestTakeKernelWithNumeric, NumericArrowTypes); TYPED_TEST(TestTakeKernelWithNumeric, TakeNumeric) { - this->AssertTake("[7, 8, 9]", "[]", "[]"); - this->AssertTake("[7, 8, 9]", "[0, 1, 0]", "[7, 8, 7]"); - this->AssertTake("[null, 8, 9]", "[0, 1, 0]", "[null, 8, null]"); - this->AssertTake("[7, 8, 9]", "[null, 1, 0]", "[null, 8, 7]"); - this->AssertTake("[null, 8, 9]", "[]", "[]"); - this->AssertTake("[7, 8, 9]", "[0, 0, 0, 0, 0, 0, 2]", "[7, 7, 7, 7, 7, 7, 9]"); - - std::shared_ptr<Array> arr; - ASSERT_RAISES(IndexError, - TakeJSON(this->type_singleton(), "[7, 8, 9]", int8(), "[0, 9, 0]", &arr)); - ASSERT_RAISES(IndexError, TakeJSON(this->type_singleton(), "[7, 8, 9]", int8(), - "[0, -1, 0]", &arr)); + this->TestNumericBasics(this->type_singleton()); } template <typename TypeClass> @@ -1816,6 +1878,7 @@ TEST(TestTakeMetaFunction, ArityChecking) { template <typename Unused = void> struct FilterRandomTest { static void Test(const std::shared_ptr<DataType>& type) { + ARROW_SCOPED_TRACE("type = ", *type); auto rand = random::RandomArrayGenerator(kRandomSeed); const int64_t length = static_cast<int64_t>(1ULL << 10); for (auto null_probability : {0.0, 0.01, 0.1, 0.999, 1.0}) { @@ -1856,6 +1919,7 @@ void CheckTakeRandom(const std::shared_ptr<Array>& values, int64_t indices_lengt template <typename ValuesType> struct TakeRandomTest { static void Test(const std::shared_ptr<DataType>& type) { + ARROW_SCOPED_TRACE("type = ", *type); auto rand = random::RandomArrayGenerator(kRandomSeed); const int64_t values_length = 64 * 16 + 1; const int64_t indices_length = 64 * 4 + 1; @@ -1897,8 +1961,10 @@ TEST(TestFilter, RandomString) { } TEST(TestFilter, RandomFixedSizeBinary) { - FilterRandomTest<>::Test(fixed_size_binary(0)); - FilterRandomTest<>::Test(fixed_size_binary(16)); + // FixedSizeBinary filter is special-cased for some widths + for (int32_t width : {0, 1, 16, 32, 35}) { + FilterRandomTest<>::Test(fixed_size_binary(width)); + } } TEST(TestTake, PrimitiveRandom) { TestRandomPrimitiveCTypes<TakeRandomTest>(); } @@ -1911,8 +1977,10 @@ TEST(TestTake, RandomString) { } TEST(TestTake, RandomFixedSizeBinary) { - TakeRandomTest<FixedSizeBinaryType>::Test(fixed_size_binary(0)); - TakeRandomTest<FixedSizeBinaryType>::Test(fixed_size_binary(16)); + // FixedSizeBinary take is special-cased for some widths + for (int32_t width : {0, 1, 16, 32, 35}) { + TakeRandomTest<FixedSizeBinaryType>::Test(fixed_size_binary(width)); + } } // ----------------------------------------------------------------------
