Hisoka-X commented on code in PR #5561: URL: https://github.com/apache/seatunnel/pull/5561#discussion_r1354531606
########## seatunnel-e2e/seatunnel-connector-v2-e2e/connector-http-e2e/src/test/resources/http_page_increase_page_num.conf: ########## @@ -0,0 +1,85 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + job.mode = "BATCH" +} + +source { + Http { + result_table_name = "http" + url = "http://mockserver:1080/query/pages" + method = "GET" + format = "json" + json_field = { + name = "$.data[*].name" + age = "$.data[*].age" + } + pageing={ + total_page_size=2 + page_field=page + } + schema = { + fields { + name = string + age = int + } + } Review Comment: Please fix format. ########## seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/config/HttpConfig.java: ########## @@ -29,6 +29,38 @@ public class HttpConfig { public static final boolean DEFAULT_ENABLE_MULTI_LINES = false; public static final Option<String> URL = Options.key("url").stringType().noDefaultValue().withDescription("Http request url"); + public static final Option<Long> TOTAL_PAGE_SIZE = + Options.key("total_page_size") + .longType() + .defaultValue(0L) + .withDescription("total page size"); + public static final Option<String> JSON_VERIFY_EXPRESSION = + Options.key("json_verify_expression") + .stringType() + .noDefaultValue() + .withDescription("json verify expression "); + public static final Option<String> JSON_VERIFY_VALUE = + Options.key("json_verify_value") + .stringType() + .noDefaultValue() + .withDescription("json verify value "); Review Comment: ditto ########## seatunnel-e2e/seatunnel-connector-v2-e2e/connector-http-e2e/src/test/resources/mockserver-config.json: ########## @@ -4447,5 +4447,117 @@ ] } } + }, + { + "httpRequest": { + "method" : "GET", + "path": "/query/pages", + "queryStringParameters": { + "page": "1" + } + }, + "httpResponse": { + "body": + { + "status": null, + "msg": null, + "data": [ + { + "name": "name1", + "age": 69 + }, + { + "name": "name2", + "age": 51 + } + ], + "currentPageIndex": 1, + "totalPage": 2 + } + } + }, + { + "httpRequest": { + "method" : "GET", + "path": "/query/pages", + "queryStringParameters": { + "page": "2" + } + }, + "httpResponse": { + "body": + { + "status": null, + "msg": null, + "data": [ + { + "name": "name1", + "age": 69 + }, + { + "name": "name2", + "age": 51 + } + ], + "currentPageIndex": 1, Review Comment: ```suggestion "currentPageIndex": 2, ``` ########## seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/config/HttpConfig.java: ########## @@ -29,6 +29,38 @@ public class HttpConfig { public static final boolean DEFAULT_ENABLE_MULTI_LINES = false; public static final Option<String> URL = Options.key("url").stringType().noDefaultValue().withDescription("Http request url"); + public static final Option<Long> TOTAL_PAGE_SIZE = + Options.key("total_page_size") + .longType() + .defaultValue(0L) + .withDescription("total page size"); + public static final Option<String> JSON_VERIFY_EXPRESSION = + Options.key("json_verify_expression") + .stringType() + .noDefaultValue() + .withDescription("json verify expression "); + public static final Option<String> JSON_VERIFY_VALUE = + Options.key("json_verify_value") + .stringType() + .noDefaultValue() + .withDescription("json verify value "); + public static final Option<Long> MAX_PAGE_SIZE = + Options.key("max_page_size") + .longType() + .defaultValue(10000L) + .withDescription("max page size "); + public static final Option<String> PAGE_FIELD = + Options.key("page_field") + .stringType() + .defaultValue("page") + .withDescription("page field"); Review Comment: ditto. I think you should copy description from doc to here. ########## seatunnel-e2e/seatunnel-connector-v2-e2e/connector-http-e2e/src/test/resources/http_page_increase_by_expression.conf: ########## @@ -0,0 +1,86 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + job.mode = "BATCH" +} + +source { + Http { + result_table_name = "http" + url = "http://mockserver:1080/query/pagesByExpression" + method = "GET" + format = "json" + json_field = { + name = "$.data[*].name" + age = "$.data[*].age" + } + pageing={ + json_verify_expression="$.hasNext" + json_verify_value="false" + page_field=page + } + schema = { + fields { + name = string + age = int + } + } Review Comment: ditto ########## seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/config/HttpConfig.java: ########## @@ -29,6 +29,38 @@ public class HttpConfig { public static final boolean DEFAULT_ENABLE_MULTI_LINES = false; public static final Option<String> URL = Options.key("url").stringType().noDefaultValue().withDescription("Http request url"); + public static final Option<Long> TOTAL_PAGE_SIZE = + Options.key("total_page_size") + .longType() + .defaultValue(0L) + .withDescription("total page size"); + public static final Option<String> JSON_VERIFY_EXPRESSION = + Options.key("json_verify_expression") + .stringType() + .noDefaultValue() + .withDescription("json verify expression "); Review Comment: The description not clear, I don't know what's mean if I only read `json verify expression`. ########## docs/en/connector-v2/source/Http.md: ########## @@ -42,24 +42,31 @@ They can be downloaded via install-plugin.sh or from the Maven central repositor ## Source Options -| Name | Type | Required | Default | Description | -|-----------------------------|---------|----------|---------|--------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | Http request url. | -| schema | Config | No | - | Http and seatunnel data structure mapping | -| schema.fields | Config | No | - | The schema fields of upstream data | -| json_field | Config | No | - | This parameter helps you configure the schema,so this parameter must be used with schema. | -| content_json | String | No | - | This parameter can get some json data.If you only need the data in the 'book' section, configure `content_field = "$.store.book.*"`. | -| format | String | No | json | The format of upstream data, now only support `json` `text`, default `json`. | -| method | String | No | get | Http request method, only supports GET, POST method. | -| headers | Map | No | - | Http headers. | -| params | Map | No | - | Http params,the program will automatically add http header application/x-www-form-urlencoded. | -| body | String | No | - | Http body,the program will automatically add http header application/json,body is jsonbody. | -| poll_interval_millis | Int | No | - | Request http api interval(millis) in stream mode. | -| retry | Int | No | - | The max retry times if request http return to `IOException`. | -| retry_backoff_multiplier_ms | Int | No | 100 | The retry-backoff times(millis) multiplier if request http failed. | -| retry_backoff_max_ms | Int | No | 10000 | The maximum retry-backoff times(millis) if request http failed | -| enable_multi_lines | Boolean | No | false | | -| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | +| Name | Type | Required | Default | Description | +|--------------------------------|---------|----------|---------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | Http request url. | +| schema | Config | No | - | Http and seatunnel data structure mapping | +| schema.fields | Config | No | - | The schema fields of upstream data | +| json_field | Config | No | - | This parameter helps you configure the schema,so this parameter must be used with schema. | +| pageing | Config | No | - | This parameter is used for paging queries | +| pageing.page_field | String | No | - | This parameter is used to specify the page field name in the request parameter | +| *pageing.max_page_size* | Int | No | 10000 | Change the parameter to control the maximum number of pages (if using paging please ensure that this value is greater than the target number of pages otherwise it may cause early *data accuracy* problems) | Review Comment: Please remove `*` on `pageing.max_page_size` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
