This is an automated email from the ASF dual-hosted git repository.

ka94 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git


The following commit(s) were added to refs/heads/main by this push:
     new 40ec1570b docs: thorough godoc for 
StatefulApiCollectorForFinalizableEntity (#4626)
40ec1570b is described below

commit 40ec1570b964b1655f0d1756419fbf1af6aad9b1
Author: Klesh Wong <[email protected]>
AuthorDate: Fri Mar 31 06:22:00 2023 +0800

    docs: thorough godoc for StatefulApiCollectorForFinalizableEntity (#4626)
    
    * docs: thorough godoc for StatefulApiCollectorForFinalizableEntity
    
    * docs: reword finalizable
    
    * docs: update according to review
    
    * docs: clarify BuildInputIterator usage
    
    ---------
    
    Co-authored-by: Keon Amini <[email protected]>
---
 .../pluginhelper/api/api_collector_with_state.go   | 52 +++++++++++++---------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/backend/helpers/pluginhelper/api/api_collector_with_state.go 
b/backend/helpers/pluginhelper/api/api_collector_with_state.go
index f35fb0419..f62d6b365 100644
--- a/backend/helpers/pluginhelper/api/api_collector_with_state.go
+++ b/backend/helpers/pluginhelper/api/api_collector_with_state.go
@@ -122,18 +122,26 @@ func (m *ApiCollectorStateManager) Execute() errors.Error 
{
 }
 
 // NewStatefulApiCollectorForFinalizableEntity aims to add timeFilter/diffSync 
support for
-// APIs that do NOT support filtering data by updated date. However, it comes 
with the
+// APIs that do NOT support filtering data by the updated date. However, it 
comes with the
 // following constraints:
 //  1. The entity is a short-lived object or it is likely to be irrelevant
 //     a. ci/id pipelines are short-lived objects
 //     b. pull request might took a year to be closed or never, but it is 
likely irrelevant
-//  2. The entity must be Finalizable: when it is finalized, no modification 
forever
+//  2. The entity must be Finalizable, meaning no future modifications will 
happen to it once it
+//     enter some sort of `Closed`/`Finished` status.
 //  3. The API must fit one of the following traits:
-//     a. it supports filtering by Created Date, in this case, you may specify 
the `GetTotalPages`
-//     option to fetch data with Determined Strategy if possible.
+//     a. it supports filtering by Created Date, in this case, you must 
implement the filtering
+//     via the `UrlTemplate`, `Query` or `Header` hook based on the API 
specification.
 //     b. or sorting by Created Date in Descending order, in this case, you 
must use `Concurrency`
 //     or `GetNextPageCustomData` instead of `GetTotalPages` for Undetermined 
Strategy since we have
 //     to stop the process in the middle.
+//
+// Assuming the API fits the bill, the strategies can be categoried into:
+//   - Determined Strategy: if the API supports filtering by the Created Date, 
use the `GetTotalPages` hook
+//   - Undetermind Strategy: if the API supports sorting by the Created Date 
in Descending order and
+//     fetching by Page Number, use the `Concurrent` hook
+//   - Sequential Strategy: if the API supports sorting by the Created Date in 
Descending order but
+//     the next page can only be fetched by the Cursor/Token from the previous 
page, use the `GetNextPageCustomData` hook
 func NewStatefulApiCollectorForFinalizableEntity(args 
FinalizableApiCollectorArgs) (plugin.SubTask, errors.Error) {
        // create a manager which could execute multiple collector but acts as 
a single subtask to callers
        manager, err := NewStatefulApiCollector(RawDataSubTaskArgs{
@@ -258,27 +266,31 @@ type FinalizableApiCollectorArgs struct {
        CollectUnfinishedDetails FinalizableApiCollectorDetailArgs
 }
 
+// FinalizableApiCollectorCommonArgs is the common arguments for both list and 
detail collectors
+// Note that all request-related arguments would be called or utilized before 
any response-related arguments
 type FinalizableApiCollectorCommonArgs struct {
-       UrlTemplate     string `comment:"GoTemplate for API url"`
-       Query           func(reqData *RequestData, createdAfter *time.Time) 
(url.Values, errors.Error)
-       Header          func(reqData *RequestData, createdAfter *time.Time) 
(http.Header, errors.Error)
-       MinTickInterval *time.Duration
-       ResponseParser  func(res *http.Response) ([]json.RawMessage, 
errors.Error)
-       AfterResponse   common.ApiClientAfterResponse
-       RequestBody     func(reqData *RequestData) map[string]interface{}
-       Method          string
+       UrlTemplate     string                                                  
                        // required, url path template for the request, e.g. 
repos/{{ .Params.Name }}/pulls or incident/{{ .Input.Number }} (if using 
iterators)
+       Method          string                                                  
                        // optional, request method, e.g. GET(default), POST, 
PUT, DELETE
+       Query           func(reqData *RequestData, createdAfter *time.Time) 
(url.Values, errors.Error)  // optional, build query params for the request
+       Header          func(reqData *RequestData, createdAfter *time.Time) 
(http.Header, errors.Error) // optional, build header for the request
+       RequestBody     func(reqData *RequestData) map[string]interface{}       
                        // optional, build request body for the request if the 
Method set to POST or PUT
+       MinTickInterval *time.Duration                                          
                        // optional, minimum interval between two requests, 
some endpoints might have a more conservative rate limit than others within the 
same instance, you can mitigate this by setting a higher MinTickInterval to 
override the connection level rate limit.
+       AfterResponse   common.ApiClientAfterResponse                           
                        // optional, hook to run after each response, would be 
called before the ResponseParser
+       ResponseParser  func(res *http.Response) ([]json.RawMessage, 
errors.Error)                      // required, parse the response body and 
return a list of entities
 }
+
+// FinalizableApiCollectorListArgs is the arguments for the list collector
 type FinalizableApiCollectorListArgs struct {
-       // optional, leave it be `nil` if API supports filtering by created 
date (Don't forget to set the Query)
-       GetCreated func(item json.RawMessage) (time.Time, errors.Error)
        FinalizableApiCollectorCommonArgs
-       Concurrency           int
-       PageSize              int
-       GetNextPageCustomData func(prevReqData *RequestData, prevPageResponse 
*http.Response) (interface{}, errors.Error)
-       // need to consider the data missing problem: what if new data gets 
created during collection?
-       GetTotalPages func(res *http.Response, args *ApiCollectorArgs) (int, 
errors.Error)
+       GetCreated            func(item json.RawMessage) (time.Time, 
errors.Error)                                        // optional, to extract 
create date from a raw json of a single record, leave it be `nil` if API 
supports filtering by updated date (Don't forget to set the Query)
+       PageSize              int                                               
                                          // required, number of records per 
page
+       Concurrency           int                                               
                                          // required for Undetermined 
Strategy, number of concurrent requests
+       GetNextPageCustomData func(prevReqData *RequestData, prevPageResponse 
*http.Response) (interface{}, errors.Error) // required for Sequential 
Strategy, to extract the next page cursor from the given response
+       GetTotalPages         func(res *http.Response, args *ApiCollectorArgs) 
(int, errors.Error)                        // required for Determined Strategy, 
to extract the total number of pages from the given response
 }
+
+// FinalizableApiCollectorDetailArgs is the arguments for the detail collector
 type FinalizableApiCollectorDetailArgs struct {
        FinalizableApiCollectorCommonArgs
-       BuildInputIterator func() (Iterator, errors.Error)
+       BuildInputIterator func() (Iterator, errors.Error) // required, create 
an iterator that iterates through all unfinalized records in the database. 
These records will be fed as the "Input" (or {{ .Input.* }} in URLTemplate) 
argument back into FinalizableApiCollectorCommonArgs which makes the API calls 
to re-collect their newest states.
 }

Reply via email to