Hi,
I have noticed a slight mismatch between typedefs in docs and header
files. On current master branch:
- CustomScanState is missing custom_ps, pscan_len and slotOps fields
  in docs.
- `methods` field of CustomPath, CustomScan and CustomScanState is
  missing `struct` in type.
- BrinOpcInfo.oi_regular_nulls is missing.
- pgNotify.next is missing. But the comment above it says apps should
  not use it, so I guess it can be left as is.

Attached diff file shows other mismatches I could find. There are some
comments that could be updated. Other differences are caused by indentation variations and false positives. The script I used for typedef extraction is also attached.

Regards,
Artem Fadeev.
https://postgrespro.com
diff --git a/tmp/typedefs_output.code.txt b/tmp/typedefs_output.docs.txt
index 25bee9e7846..e4e6c89fc90 100644
--- a/tmp/typedefs_output.code.txt
+++ b/tmp/typedefs_output.docs.txt
@@ -8,15 +8,11 @@ typedef struct OutputPluginCallbacks
  LogicalDecodeMessageCB message_cb;
  LogicalDecodeFilterByOriginCB filter_by_origin_cb;
  LogicalDecodeShutdownCB shutdown_cb;
-
- /* streaming of changes at prepare time */
  LogicalDecodeFilterPrepareCB filter_prepare_cb;
  LogicalDecodeBeginPrepareCB begin_prepare_cb;
  LogicalDecodePrepareCB prepare_cb;
  LogicalDecodeCommitPreparedCB commit_prepared_cb;
  LogicalDecodeRollbackPreparedCB rollback_prepared_cb;
-
- /* streaming of changes */
  LogicalDecodeStreamStartCB stream_start_cb;
  LogicalDecodeStreamStopCB stream_stop_cb;
  LogicalDecodeStreamAbortCB stream_abort_cb;
@@ -43,11 +39,13 @@ typedef struct ArchiveModuleCallbacks
 ===
 typedef struct
 {
- float8 x,
- y;
+ double x, y;
 } Point;
 ===
-typedef struct varlena text;
+typedef struct {
+ int32 length;
+ char data[FLEXIBLE_ARRAY_MEMBER];
+} text;
 ===
 typedef struct FuncCallContext
 {
@@ -62,16 +60,16 @@ typedef struct FuncCallContext
  /*
  * OPTIONAL maximum number of calls
  *
- * max_calls is here for convenience only and setting it is optional. If
- * not set, you must provide alternative means to know when the function
- * is done.
+ * max_calls is here for convenience only and setting it is optional.
+ * If not set, you must provide alternative means to know when the
+ * function is done.
  */
  uint64 max_calls;
 
  /*
  * OPTIONAL pointer to miscellaneous user-provided context information
  *
- * user_fctx is for use as a pointer to your own struct to retain
+ * user_fctx is for use as a pointer to your own data to retain
  * arbitrary context information between calls of your function.
  */
  void *user_fctx;
@@ -79,9 +77,10 @@ typedef struct FuncCallContext
  /*
  * OPTIONAL pointer to struct containing attribute type input metadata
  *
- * attinmeta is for use when returning tuples (i.e. composite data types)
- * and is not used when returning base data types. It is only needed if
- * you intend to use BuildTupleFromCStrings() to create the return tuple.
+ * attinmeta is for use when returning tuples (i.e., composite data types)
+ * and is not used when returning base data types. It is only needed
+ * if you intend to use BuildTupleFromCStrings() to create the return
+ * tuple.
  */
  AttInMetadata *attinmeta;
 
@@ -90,15 +89,15 @@ typedef struct FuncCallContext
  *
  * multi_call_memory_ctx is set by SRF_FIRSTCALL_INIT() for you, and used
  * by SRF_RETURN_DONE() for cleanup. It is the most appropriate memory
- * context for any memory that is to be reused across multiple calls of
- * the SRF.
+ * context for any memory that is to be reused across multiple calls
+ * of the SRF.
  */
  MemoryContext multi_call_memory_ctx;
 
  /*
  * OPTIONAL pointer to struct containing tuple description
  *
- * tuple_desc is for use when returning tuples (i.e. composite data types)
+ * tuple_desc is for use when returning tuples (i.e., composite data types)
  * and is only needed if you are going to build the tuples with
  * heap_form_tuple() rather than with BuildTupleFromCStrings(). Note that
  * the TupleDesc pointer stored here should usually have been run through
@@ -122,34 +121,33 @@ typedef struct SPITupleTable
  SubTransactionId subid; /* subxact in which tuptable was created */
 } SPITupleTable;
 ===
-typedef struct _PQconninfoOption
+typedef struct
 {
  char *keyword; /* The keyword of the option */
  char *envvar; /* Fallback environment variable name */
  char *compiled; /* Fallback compiled in default value */
  char *val; /* Option's current value, or NULL */
  char *label; /* Label for field in connect dialog */
- char *dispchar; /* Indicates how to display this field in a
- * connect dialog. Values are: "" Display
- * entered value as is "*" Password field -
- * hide value "D" Debug option - don't show
- * by default */
+ char *dispchar; /* Indicates how to display this field
+ in a connect dialog. Values are:
+ "" Display entered value as is
+ "*" Password field - hide value
+ "D" Debug option - don't show by default */
  int dispsize; /* Field size in characters for dialog */
 } PQconninfoOption;
 ===
-typedef struct _PQprintOpt
+typedef struct
 {
  pqbool header; /* print output field headings and row count */
  pqbool align; /* fill align the fields */
  pqbool standard; /* old brain dead format */
- pqbool html3; /* output html tables */
+ pqbool html3; /* output HTML tables */
  pqbool expanded; /* expand tables */
  pqbool pager; /* use pager for output if needed */
  char *fieldSep; /* field separator */
- char *tableOpt; /* insert to HTML <table ...> */
- char *caption; /* HTML <caption> */
- char **fieldName; /* null terminated array of replacement field
- * names */
+ char *tableOpt; /* attributes for HTML table element */
+ char *caption; /* HTML table caption */
+ char **fieldName; /* null-terminated array of replacement field names */
 } PQprintOpt;
 ===
 typedef struct
@@ -158,33 +156,31 @@ typedef struct
  int isint;
  union
  {
- int *ptr; /* can't use void (dec compiler barfs) */
+ int *ptr;
  int integer;
  } u;
 } PQArgBlock;
 ===
 typedef struct pgNotify
 {
- char *relname; /* notification condition name */
+ char *relname; /* notification channel name */
  int be_pid; /* process ID of notifying server process */
- char *extra; /* notification parameter */
- /* Fields below here are private to libpq; apps should not use 'em */
- struct pgNotify *next; /* list link */
+ char *extra; /* notification payload string */
 } PGnotify;
 ===
 typedef enum
 {
- PQERRORS_TERSE, /* single-line error messages */
- PQERRORS_DEFAULT, /* recommended style */
- PQERRORS_VERBOSE, /* all the facts, ma'am */
- PQERRORS_SQLSTATE /* only error severity and SQLSTATE code */
+ PQERRORS_TERSE,
+ PQERRORS_DEFAULT,
+ PQERRORS_VERBOSE,
+ PQERRORS_SQLSTATE
 } PGVerbosity;
 ===
 typedef enum
 {
- PQSHOW_CONTEXT_NEVER, /* never show CONTEXT field */
- PQSHOW_CONTEXT_ERRORS, /* show CONTEXT for errors only (default) */
- PQSHOW_CONTEXT_ALWAYS /* always show CONTEXT field */
+ PQSHOW_CONTEXT_NEVER,
+ PQSHOW_CONTEXT_ERRORS,
+ PQSHOW_CONTEXT_ALWAYS
 } PGContextVisibility;
 ===
 typedef struct
@@ -219,7 +215,11 @@ typedef struct
  PGresult *result;
 } PGEventResultDestroy;
 ===
-'mydata;' is not present in code
+typedef struct
+{
+ int n;
+ char *str;
+} mydata;
 ===
 typedef struct _PGpromptOAuthDevice
 {
@@ -233,68 +233,39 @@ typedef struct _PGpromptOAuthDevice
 typedef struct PGoauthBearerRequest
 {
  /* Hook inputs (constant across all calls) */
- const char *openid_configuration; /* OIDC discovery URI */
+ const char *openid_configuration; /* OIDC discovery URL */
  const char *scope; /* required scope(s), or NULL */
 
  /* Hook outputs */
 
- /*---------
- * Callback implementing a custom asynchronous OAuth flow.
- *
- * The callback may return
- * - PGRES_POLLING_READING/WRITING, to indicate that a socket descriptor
- * has been stored in *altsock and libpq should wait until it is
- * readable or writable before calling back;
- * - PGRES_POLLING_OK, to indicate that the flow is complete and
- * request->token has been set; or
- * - PGRES_POLLING_FAILED, to indicate that token retrieval has failed.
- *
- * This callback is optional. If the token can be obtained without
- * blocking during the original call to the PQAUTHDATA_OAUTH_BEARER_TOKEN
- * hook, it may be returned directly, but one of request->async or
- * request->token must be set by the hook.
- */
+ /* Callback implementing a custom asynchronous OAuth flow. */
  PostgresPollingStatusType (*async) (PGconn *conn,
  struct PGoauthBearerRequest *request,
- SOCKTYPE * altsock);
+ SOCKTYPE *altsock);
 
- /*
- * Callback to clean up custom allocations. A hook implementation may use
- * this to free request->token and any resources in request->user.
- *
- * This is technically optional, but highly recommended, because there is
- * no other indication as to when it is safe to free the token.
- */
+ /* Callback to clean up custom allocations. */
  void (*cleanup) (PGconn *conn, struct PGoauthBearerRequest *request);
 
- /*
- * The hook should set this to the Bearer token contents for the
- * connection, once the flow is completed. The token contents must remain
- * available to libpq until the hook's cleanup callback is called.
- */
- char *token;
-
- /*
- * Hook-defined data. libpq will not modify this pointer across calls to
- * the async callback, so it can be used to keep track of
- * application-specific state. Resources allocated here should be freed by
- * the cleanup callback.
- */
- void *user;
+ char *token; /* acquired Bearer token */
+ void *user; /* hook-defined allocated data */
 } PGoauthBearerRequest;
 ===
-'comp_t;' is not present in code
+typedef struct
+{
+ int intval;
+ varchar textval[33];
+} comp_t;
 ===
-typedef struct sqlda_struct sqlda_t;
+typedef struct sqlda_compat sqlda_t;
 ===
-typedef struct sqlvar_struct sqlvar_t;
+typedef struct sqlvar_compat sqlvar_t;
 ===
 typedef struct EventTriggerData
 {
  NodeTag type;
  const char *event; /* event name */
  Node *parsetree; /* parse tree */
- CommandTag tag;
+ CommandTag tag; /* command tag */
 } EventTriggerData;
 ===
 typedef struct RmgrData
@@ -310,52 +281,38 @@ typedef struct RmgrData
  struct XLogRecordBuffer *buf);
 } RmgrData;
 ===
-'Complex;' is not present in code
+typedef struct Complex {
+ double x;
+ double y;
+} Complex;
 ===
 typedef struct CustomPath
 {
  Path path;
- uint32 flags; /* mask of CUSTOMPATH_* flags, see
- * nodes/extensible.h */
- List *custom_paths; /* list of child Path nodes, if any */
+ uint32 flags;
+ List *custom_paths;
  List *custom_restrictinfo;
  List *custom_private;
- const struct CustomPathMethods *methods;
+ const CustomPathMethods *methods;
 } CustomPath;
 ===
 typedef struct CustomScan
 {
  Scan scan;
- /* mask of CUSTOMPATH_* flags, see nodes/extensible.h */
  uint32 flags;
- /* list of Plan nodes, if any */
  List *custom_plans;
- /* expressions that custom code may evaluate */
  List *custom_exprs;
- /* private data for custom code */
  List *custom_private;
- /* optional tlist describing scan tuple */
  List *custom_scan_tlist;
- /* RTIs generated by this scan */
  Bitmapset *custom_relids;
-
- /*
- * NOTE: The method field of CustomScan is required to be a pointer to a
- * static table of callback functions. So we don't copy the table itself,
- * just reference the original one.
- */
- const struct CustomScanMethods *methods;
+ const CustomScanMethods *methods;
 } CustomScan;
 ===
 typedef struct CustomScanState
 {
  ScanState ss;
- uint32 flags; /* mask of CUSTOMPATH_* flags, see
- * nodes/extensible.h */
- List *custom_ps; /* list of child PlanState nodes, if any */
- Size pscan_len; /* size of parallel coordination information */
- const struct CustomExecMethods *methods;
- const struct TupleTableSlotOps *slotOps;
+ uint32 flags;
+ const CustomExecMethods *methods;
 } CustomScanState;
 ===
 typedef struct TriggerData
@@ -375,8 +332,7 @@ typedef struct TriggerData
 ===
 typedef struct Trigger
 {
- Oid tgoid; /* OID of trigger (pg_trigger row) */
- /* Remaining fields are copied from pg_trigger, see pg_trigger.h */
+ Oid tgoid;
  char *tgname;
  Oid tgfoid;
  int16 tgtype;
@@ -408,21 +364,30 @@ typedef struct BackgroundWorker
  char bgw_function_name[BGW_MAXLEN];
  Datum bgw_main_arg;
  char bgw_extra[BGW_EXTRALEN];
- pid_t bgw_notify_pid; /* SIGUSR1 this backend on start/stop */
+ pid_t bgw_notify_pid;
 } BackgroundWorker;
 ===
-'MyEnumType;' is not present in code
+typedef enum MyEnumType
+{
+ MY_ENUM_ON,
+ MY_ENUM_OFF,
+ MY_ENUM_AUTO
+} MyEnumType;
 ===
-'MyOptionsStruct;' is not present in code
+typedef struct
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ int int_param; /* integer parameter */
+ double real_param; /* real parameter */
+ MyEnumType enum_param; /* enum parameter */
+ int str_param; /* string parameter */
+} MyOptionsStruct;
 ===
 typedef struct BrinOpcInfo
 {
  /* Number of columns stored in an index column of this opclass */
  uint16 oi_nstored;
 
- /* Regular processing of NULLs in BrinValues? */
- bool oi_regular_nulls;
-
  /* Opaque pointer for the opclass' private use */
  void *oi_opaque;
 
@@ -441,21 +406,7 @@ typedef struct OAuthValidatorCallbacks
 ===
 typedef struct ValidatorModuleResult
 {
- /*
- * Should be set to true if the token carries sufficient permissions for
- * the bearer to connect.
- */
  bool authorized;
-
- /*
- * If the token authenticates the user, this should be set to a palloc'd
- * string containing the SYSTEM_USER to use for HBA mapping. Consider
- * setting this even if result->authorized is false so that DBAs may use
- * the logs to match end users to token failures.
- *
- * This is required if the module is not configured for ident mapping
- * delegation. See the validator module documentation for details.
- */
  char *authn_id;
 } ValidatorModuleResult;
 ===
@@ -508,19 +459,14 @@ typedef struct IndexAmRoutine
  bool amcaninclude;
  /* does AM use maintenance_work_mem? */
  bool amusemaintenanceworkmem;
- /* does AM store tuple information only at block granularity? */
+ /* does AM summarize tuples, with at least all tuples in the block
+ * summarized in one summary */
  bool amsummarizing;
- /* OR of parallel vacuum flags. See vacuum.h for flags. */
+ /* OR of parallel vacuum flags */
  uint8 amparallelvacuumoptions;
  /* type of data stored in index, or InvalidOid if variable */
  Oid amkeytype;
 
- /*
- * If you add new properties to either the above or the below lists, then
- * they should also (usually) be exposed via the property API (see
- * IndexAMProperty at the top of the file, and utils/adt/amutils.c).
- */
-
  /* interface functions */
  ambuild_function ambuild;
  ambuildempty_function ambuildempty;
@@ -565,7 +511,7 @@ typedef struct spgConfigOut
  Oid labelType; /* Data type of inner-tuple node labels */
  Oid leafType; /* Data type of leaf-tuple values */
  bool canReturnData; /* Opclass can reconstruct original data */
- bool longValuesOK; /* Opclass can cope with values > 1 page */
+ bool longValuesOK; /* Opclass can cope with values &gt; 1 page */
 } spgConfigOut;
 ===
 typedef struct spgChooseIn
@@ -586,7 +532,7 @@ typedef enum spgChooseResultType
 {
  spgMatchNode = 1, /* descend into existing node */
  spgAddNode, /* add a node to the inner tuple */
- spgSplitTuple, /* split inner tuple (change its prefix) */
+ spgSplitTuple /* split inner tuple (change its prefix) */
 } spgChooseResultType;
 ===
 typedef struct spgChooseOut
@@ -611,8 +557,8 @@ typedef struct spgChooseOut
  bool prefixHasPrefix; /* tuple should have a prefix? */
  Datum prefixPrefixDatum; /* if so, its value */
  int prefixNNodes; /* number of nodes */
- Datum *prefixNodeLabels; /* their labels (or NULL for no
- * labels) */
+ Datum *prefixNodeLabels; /* their labels (or NULL for
+ * no labels) */
  int childNodeN; /* which node gets child tuple */
 
  /* Info to form new lower-level inner tuple with all old nodes */
#!/usr/bin/env python3
import pathlib
import re
import sys

OUTPUT_PREFIX = "./typedefs_output"
SEPARATOR = "==="  # separator line used in output files


def read_statement(text, start):
    """Extract text of a C statement starting at given index"""
    curr = start
    depth = 0  # depth of {} nesting
    while not (depth == 0 and text[curr] == ";"):
        if text[curr] == "{":
            depth += 1
        elif text[curr] == "}":
            depth -= 1
        curr += 1

    curr += 1  # trailing ";"

    return text[start:curr]


def standardize(text):
    text = text.replace("\t", "    ")  # no tabs
    text = re.sub("[ ]+", " ", text)  # squash spaces
    return text


def process_files(path_list, statement_regexp, name_regexp):
    """Parse path_list, return {what_is_declared: declaration, ...} dict"""
    result = {}
    for path in path_list:
        text = path.read_text()

        for match in statement_regexp.finditer(text):
            try:
                decl = read_statement(text, match.start(0))
            except IndexError:
                # Code with unmatched '{' in comments may break this little parser.
                # Report such cases and go on.
                print(
                    f"Failed parsing {path} at index {match.start(0)}.", file=sys.stderr
                )
                print("First 100 symbols:", file=sys.stderr)
                print(text[match.start(0) : match.start(0) + 100], file=sys.stderr)
                print(file=sys.stderr)
                continue

            decl = standardize(decl)
            try:
                name = re.search(name_regexp, decl)[0]
            except Exception as e:
                print(decl)
                print(
                    f"Regexp {name_regexp!r} could not find declaration name in {decl!r}",
                    file=sys.stderr,
                )
                raise e
            result[name] = decl
    return result


def main():
    if len(sys.argv) not in (3, 4):
        print(
            r"""
Usage:
    ./typedefs.py <stmt_regexp> <name_regexp> [<output_prefix>]

    # For finding function type typedefs
    ./typedefs.py '^typedef[^\{;]*\(' '\([^\)]*\)'

    # For finding struct/enum typedefs
    ./typedefs.py '^typedef\s+(struct|enum)' '[^\s]*;$'

    # Then compare
    diff ./typedefs_output*

Description:
    Scans working directory for code declarations and code declaration excerpts
    in docs matching "stmt_regexp" regular expression. For each declared 
    name ().

    Scans working directory for statements matching <stmt_regexp> in code and
    code excerpts in docs. <name_regexp> is used to extract name from declaration.
    Writes all found statements from docs into <output_prefix>.docs.txt and
    corresponding declarations from code into <output_prefix>.code.txt
    (<output_prefix> is "./typedefs_output" by default)
        """.strip()
        )
        exit(1)

    statement_regexp = re.compile(sys.argv[1], re.MULTILINE)
    name_regexp = sys.argv[2]
    output_prefix = sys.argv[3] if len(sys.argv) == 4 else OUTPUT_PREFIX

    docs_typedefs = process_files(
        pathlib.Path(".").glob("**/*.sgml"), statement_regexp, name_regexp
    )
    code_typedefs = process_files(
        pathlib.Path(".").glob("**/*.h"), statement_regexp, name_regexp
    )

    docs_output = []
    code_output = []
    for name, decl in docs_typedefs.items():
        docs_output.append(decl)
        code_output.append(code_typedefs.get(name, f"{name!r} is not present in code"))

    pathlib.Path(f"{output_prefix}.docs.txt").write_text(
        f"\n{SEPARATOR}\n".join(docs_output)
    )
    pathlib.Path(f"{output_prefix}.code.txt").write_text(
        f"\n{SEPARATOR}\n".join(code_output)
    )


if __name__ == "__main__":
    main()

Reply via email to