alamb commented on code in PR #20823:
URL: https://github.com/apache/datafusion/pull/20823#discussion_r3046452282


##########
datafusion/core/tests/datasource/object_store_access.rs:
##########
@@ -397,6 +400,348 @@ async fn query_partitioned_csv_file() {
     );
 }
 
+// =====================================================================
+// JSON (NDJSON) tests — mirrors the CSV tests above
+// =====================================================================
+
+#[tokio::test]
+async fn create_single_json_file() {
+    let test = Test::new().with_single_file_json().await;
+    assert_snapshot!(
+        test.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - GET  (opts) path=json_table.json head=true
+    - GET  (opts) path=json_table.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_single_json_file() {
+    let test = Test::new().with_single_file_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.00001 | 5e-12 | true  |
+    | 0.00002 | 4e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - GET  (opts) path=json_table.json head=true
+    - GET  (opts) path=json_table.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn create_multi_file_json() {
+    let test = Test::new().with_multi_file_json().await;
+    assert_snapshot!(
+        test.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn multi_query_multi_file_json() {
+    let test = Test::new().with_multi_file_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+
+    // Force a cache eviction by removing the data limit for the cache
+    assert_snapshot!(
+        test.query("set 
datafusion.runtime.list_files_cache_limit=\"0K\"").await,
+        @r"
+    ------- Query Output (0 rows) -------
+    ++
+    ++
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 0
+    "
+    );
+
+    // Then re-enable the cache
+    assert_snapshot!(
+        test.query("set 
datafusion.runtime.list_files_cache_limit=\"1M\"").await,
+        @r"
+    ------- Query Output (0 rows) -------
+    ++
+    ++
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 0
+    "
+    );
+
+    // this query should list the table since the cache entries were evicted
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+
+    // this query should not list the table since the entries were added in 
the previous query
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_multi_json_file() {
+    let test = Test::new().with_multi_file_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_partitioned_json_file() {
+    let test = Test::new().with_partitioned_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00001 | 1e-12 | true  | 1 | 10 | 100 |
+    | 0.00003 | 5e-12 | false | 1 | 10 | 100 |
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    | 0.00003 | 3e-12 | true  | 3 | 30 | 300 |
+    | 0.00003 | 5e-12 | false | 3 | 30 | 300 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/a=1/b=10/c=100/file_1.json
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    - GET  (opts) path=data/a=3/b=30/c=300/file_3.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE a=2").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE b=20").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE c=200").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE a=2 AND 
b=20").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE a<2 AND b=10 
AND c=100").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00001 | 1e-12 | true  | 1 | 10 | 100 |
+    | 0.00003 | 5e-12 | false | 1 | 10 | 100 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=1/b=10/c=100/file_1.json
+    "
+    );
+}
+
+/// Test that a JSON file split into byte ranges via repartitioning produces
+/// exactly one GET request per byte range — no extra requests for boundary 
seeking.
+///
+/// With a single file and `target_partitions=3`, the repartitioner produces
+/// exactly 3 ranges.  Each range is served by a single 
[`AlignedBoundaryStream`]
+/// which issues exactly one bounded `get_opts` call, so there are 3 data GETs
+/// plus 1 HEAD (to determine file size) = **4 total**.
+///
+/// This differs from the CSV reader, which needs multiple GETs per range.

Review Comment:
   This would be a nice follow on PR  (add CSV tests to document the current 
behavior). Maybe someone wants to do that



##########
datafusion/core/tests/datasource/object_store_access.rs:
##########
@@ -397,6 +400,348 @@ async fn query_partitioned_csv_file() {
     );
 }
 
+// =====================================================================
+// JSON (NDJSON) tests — mirrors the CSV tests above
+// =====================================================================
+
+#[tokio::test]
+async fn create_single_json_file() {
+    let test = Test::new().with_single_file_json().await;
+    assert_snapshot!(
+        test.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - GET  (opts) path=json_table.json head=true
+    - GET  (opts) path=json_table.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_single_json_file() {
+    let test = Test::new().with_single_file_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.00001 | 5e-12 | true  |
+    | 0.00002 | 4e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - GET  (opts) path=json_table.json head=true
+    - GET  (opts) path=json_table.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn create_multi_file_json() {
+    let test = Test::new().with_multi_file_json().await;
+    assert_snapshot!(
+        test.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn multi_query_multi_file_json() {
+    let test = Test::new().with_multi_file_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+
+    // Force a cache eviction by removing the data limit for the cache
+    assert_snapshot!(
+        test.query("set 
datafusion.runtime.list_files_cache_limit=\"0K\"").await,
+        @r"
+    ------- Query Output (0 rows) -------
+    ++
+    ++
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 0
+    "
+    );
+
+    // Then re-enable the cache
+    assert_snapshot!(
+        test.query("set 
datafusion.runtime.list_files_cache_limit=\"1M\"").await,
+        @r"
+    ------- Query Output (0 rows) -------
+    ++
+    ++
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 0
+    "
+    );
+
+    // this query should list the table since the cache entries were evicted
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+
+    // this query should not list the table since the entries were added in 
the previous query
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_multi_json_file() {
+    let test = Test::new().with_multi_file_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_partitioned_json_file() {
+    let test = Test::new().with_partitioned_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00001 | 1e-12 | true  | 1 | 10 | 100 |
+    | 0.00003 | 5e-12 | false | 1 | 10 | 100 |
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    | 0.00003 | 3e-12 | true  | 3 | 30 | 300 |
+    | 0.00003 | 5e-12 | false | 3 | 30 | 300 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/a=1/b=10/c=100/file_1.json
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    - GET  (opts) path=data/a=3/b=30/c=300/file_3.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE a=2").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE b=20").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE c=200").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE a=2 AND 
b=20").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE a<2 AND b=10 
AND c=100").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00001 | 1e-12 | true  | 1 | 10 | 100 |
+    | 0.00003 | 5e-12 | false | 1 | 10 | 100 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=1/b=10/c=100/file_1.json
+    "
+    );
+}
+
+/// Test that a JSON file split into byte ranges via repartitioning produces
+/// exactly one GET request per byte range — no extra requests for boundary 
seeking.

Review Comment:
   👍 



##########
datafusion/core/tests/datasource/object_store_access.rs:
##########
@@ -397,6 +400,348 @@ async fn query_partitioned_csv_file() {
     );
 }
 
+// =====================================================================
+// JSON (NDJSON) tests — mirrors the CSV tests above
+// =====================================================================
+
+#[tokio::test]
+async fn create_single_json_file() {
+    let test = Test::new().with_single_file_json().await;
+    assert_snapshot!(
+        test.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - GET  (opts) path=json_table.json head=true
+    - GET  (opts) path=json_table.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_single_json_file() {
+    let test = Test::new().with_single_file_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.00001 | 5e-12 | true  |
+    | 0.00002 | 4e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - GET  (opts) path=json_table.json head=true
+    - GET  (opts) path=json_table.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn create_multi_file_json() {
+    let test = Test::new().with_multi_file_json().await;
+    assert_snapshot!(
+        test.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn multi_query_multi_file_json() {
+    let test = Test::new().with_multi_file_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+
+    // Force a cache eviction by removing the data limit for the cache
+    assert_snapshot!(
+        test.query("set 
datafusion.runtime.list_files_cache_limit=\"0K\"").await,
+        @r"
+    ------- Query Output (0 rows) -------
+    ++
+    ++
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 0
+    "
+    );
+
+    // Then re-enable the cache
+    assert_snapshot!(
+        test.query("set 
datafusion.runtime.list_files_cache_limit=\"1M\"").await,
+        @r"
+    ------- Query Output (0 rows) -------
+    ++
+    ++
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 0
+    "
+    );
+
+    // this query should list the table since the cache entries were evicted
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+
+    // this query should not list the table since the entries were added in 
the previous query
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_multi_json_file() {
+    let test = Test::new().with_multi_file_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/file_0.json
+    - GET  (opts) path=data/file_1.json
+    - GET  (opts) path=data/file_2.json
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_partitioned_json_file() {
+    let test = Test::new().with_partitioned_json().await;
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00001 | 1e-12 | true  | 1 | 10 | 100 |
+    | 0.00003 | 5e-12 | false | 1 | 10 | 100 |
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    | 0.00003 | 3e-12 | true  | 3 | 30 | 300 |
+    | 0.00003 | 5e-12 | false | 3 | 30 | 300 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - GET  (opts) path=data/a=1/b=10/c=100/file_1.json
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    - GET  (opts) path=data/a=3/b=30/c=300/file_3.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE a=2").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE b=20").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE c=200").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE a=2 AND 
b=20").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.json
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from json_table_partitioned WHERE a<2 AND b=10 
AND c=100").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00001 | 1e-12 | true  | 1 | 10 | 100 |
+    | 0.00003 | 5e-12 | false | 1 | 10 | 100 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 1
+    - GET  (opts) path=data/a=1/b=10/c=100/file_1.json
+    "
+    );
+}
+
+/// Test that a JSON file split into byte ranges via repartitioning produces
+/// exactly one GET request per byte range — no extra requests for boundary 
seeking.
+///
+/// With a single file and `target_partitions=3`, the repartitioner produces
+/// exactly 3 ranges.  Each range is served by a single 
[`AlignedBoundaryStream`]
+/// which issues exactly one bounded `get_opts` call, so there are 3 data GETs
+/// plus 1 HEAD (to determine file size) = **4 total**.
+///
+/// This differs from the CSV reader, which needs multiple GETs per range.
+///
+/// This test documents the current request pattern to catch regressions.
+#[tokio::test]
+async fn query_json_file_with_byte_range_partitions() {

Review Comment:
   I ran this test locally without the code changes in this PR and it fails 
like this (as expected):
   
   ```
   • === query_json_file_with_byte_range_partitions ===
         Finished `test` profile [unoptimized + debuginfo] target(s) in 0.15s
          Running tests/core_integration.rs 
(target/debug/deps/core_integration-13d29de1dea6b31c)
   
     running 1 test
     test 
datasource::object_store_access::query_json_file_with_byte_range_partitions ... 
FAILED
   
     failures:
   
     ---- 
datasource::object_store_access::query_json_file_with_byte_range_partitions 
stdout ----
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Snapshot Summary 
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
     Snapshot: query_json_file_with_byte_range_partitions
     Source: datafusion/core/tests/datasource/object_store_access.rs:720
     
────────────────────────────────────────────────────────────────────────────────
     Expression: test.query("select * from json_range_table").await
     
────────────────────────────────────────────────────────────────────────────────
     -old snapshot
     +new results
     
────────────┬───────────────────────────────────────────────────────────────────
        10    10 │ | 0.00006 | 6e-12 | true |
        11    11 │ +---------+-------+------+
        12    12 │ ------- Object Store Request Summary -------
        13    13 │ RequestCountingObjectStore()
        14       │-Total Requests: 4
              14 │+Total Requests: 8
        15    15 │ - GET  (opts) path=json_range_table.json head=true
        16       │-- GET  (opts) path=json_range_table.json range=0-216
        17    16 │ - GET  (opts) path=json_range_table.json range=71-216
        18       │-- GET  (opts) path=json_range_table.json range=143-216
              17 │+- GET  (opts) path=json_range_table.json range=0-72
              18 │+- GET  (opts) path=json_range_table.json range=71-216
              19 │+- GET  (opts) path=json_range_table.json range=143-216
              20 │+- GET  (opts) path=json_range_table.json range=72-144
              21 │+- GET  (opts) path=json_range_table.json range=143-216
              22 │+- GET  (opts) path=json_range_table.json range=144-216
     
────────────┴───────────────────────────────────────────────────────────────────
     To update snapshots run `cargo insta review`
     Stopped on the first failure. Run `cargo insta test` to run all snapshots.
   
     thread 
'datasource::object_store_access::query_json_file_with_byte_range_partitions' 
(24436221) panicked at /Users/andrewlamb/.cargo/registry/src/index.crates.io-
     1949cf8c6b5b557f/insta-1.47.2/src/runtime.rs:719:13:
     snapshot assertion for 'query_json_file_with_byte_range_partitions' failed 
in line 720
     note: run with `RUST_BACKTRACE=1` environment variable to display a 
backtrace
   
   
     failures:
         
datasource::object_store_access::query_json_file_with_byte_range_partitions
   
     test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 934 
filtered out; finished in 0.13s
   
     error: test failed, to rerun pass `-p datafusion --test core_integration`
   ```
   
   
   Codex summarized this nicely:
   
     - PR expectation: 4 requests total
         - head=true
         - range=0-216
         - range=71-216
         - range=143-216
     - main actual: 8 requests total
         - head=true
         - range=71-216
         - range=0-72
         - range=71-216
         - range=143-216
         - range=72-144
         - range=143-216
         - range=144-216



##########
datafusion/core/tests/datasource/object_store_access.rs:
##########
@@ -397,6 +400,348 @@ async fn query_partitioned_csv_file() {
     );
 }
 
+// =====================================================================
+// JSON (NDJSON) tests — mirrors the CSV tests above
+// =====================================================================
+
+#[tokio::test]
+async fn create_single_json_file() {
+    let test = Test::new().with_single_file_json().await;
+    assert_snapshot!(
+        test.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - GET  (opts) path=json_table.json head=true
+    - GET  (opts) path=json_table.json
+    "
+    );
+}
+
+#[tokio::test]

Review Comment:
   these tests are probably not strictly necessary as they cover the same 
behavior as is tested in the parquet opener. However, I think having the 
additional coverage is a good idea in case we ever specialize the opening 
process more. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to