Inspired by David's patch [0], find attached fixing words duplicated, across line boundaries.
I should probably just call the algorithm proprietary, but if you really wanted to know, I've suffered again through sed's black/slashes. time find . -name '*.c' -o -name '*.h' |xargs sed -srn '/\/\*/!d; :l; /\*\//!{N; b l}; s/\n[[:space:]]*\*/\n/g; /(\<[[:alpha:]]{1,})\>\n[[:space:]]*\<\1\>/!d; s//>>&<</; p' Alternately: time for f in `find . -name '*.c' -o -name '*.h'`; do x=`<"$f" sed -rn '/\/\*/!d; :l; /\*\//!{N; b l}; s/\n[[:space:]]*\*/\n/g; /(\<[[:alpha:]]{1,})\>\n[[:space:]]*\<\1\>/!d; s//>>&<</; p'`; [ -n "$x" ] && echo "$f:" && echo "$x"; done |less [0] https://www.postgresql.org/message-id/flat/CAKJS1f8du35u5DprpykWvgNEScxapbWYJdHq%2Bz06Wj3Y2KFPbw%40mail.gmail.com PS. Not unrelated: http://3.bp.blogspot.com/-qgW9kcbSh-Q/T5olkOrTWVI/AAAAAAAAAB0/BQhmO5AW_QQ/s1600/4de3efb5846e117e579edc91d6dceb9c.jpg
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 030d0f4418..d5a568106c 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -211,7 +211,7 @@ freeGinBtreeStack(GinBtreeStack *stack) /* * Try to find parent for current stack position. Returns correct parent and * child's offset in stack->parent. The root page is never released, to - * to prevent conflict with vacuum process. + * prevent conflict with vacuum process. */ static void ginFindParents(GinBtree btree, GinBtreeStack *stack) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 4aff6cf7f2..3f778093cb 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1737,7 +1737,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len) * possible that GXACTs that were valid at checkpoint start will no longer * exist if we wait a little bit. With typical checkpoint settings this * will be about 3 minutes for an online checkpoint, so as a result we - * we expect that there will be no GXACTs that need to be copied to disk. + * expect that there will be no GXACTs that need to be copied to disk. * * If a GXACT remains valid across multiple checkpoints, it will already * be on disk so we don't bother to repeat that write. diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index 5c6de4989c..4a039b1190 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -422,7 +422,7 @@ ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOn /* * A file was restored from the archive under a temporary filename (path), * and now we want to keep it. Rename it under the permanent filename in - * in pg_wal (xlogfname), replacing any existing file with the same name. + * pg_wal (xlogfname), replacing any existing file with the same name. */ void KeepFileRestoredFromArchive(const char *path, const char *xlogfname) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 3e148f03d0..1262594058 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -2898,7 +2898,7 @@ analyze_mcv_list(int *mcv_counts, * significantly more common than the estimated selectivity they would * have if they weren't in the list. All non-MCV values are assumed to be * equally common, after taking into account the frequencies of all the - * the values in the MCV list and the number of nulls (c.f. eqsel()). + * values in the MCV list and the number of nulls (c.f. eqsel()). * * Here sumcount tracks the total count of all but the last (least common) * value in the MCV list, allowing us to determine the effect of excluding diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c index 5ee46905d8..1ac7756f2a 100644 --- a/src/backend/commands/seclabel.c +++ b/src/backend/commands/seclabel.c @@ -321,7 +321,7 @@ SetSharedSecurityLabel(const ObjectAddress *object, /* * SetSecurityLabel attempts to set the security label for the specified * provider on the specified object to the given value. NULL means that any - * any existing label should be deleted. + * existing label should be deleted. */ void SetSecurityLabel(const ObjectAddress *object, diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c index 1b659a5870..65f2ba85fe 100644 --- a/src/backend/libpq/be-secure-openssl.c +++ b/src/backend/libpq/be-secure-openssl.c @@ -956,7 +956,7 @@ info_cb(const SSL *ssl, int type, int args) * precomputed. * * Since few sites will bother to create a parameter file, we also - * also provide a fallback to the parameters provided by the + * provide a fallback to the parameters provided by the * OpenSSL project. * * These values can be static (once loaded or computed) since the diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 0dd55ac1ba..d241d9b3f9 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -2483,7 +2483,7 @@ get_matching_range_bounds(PartitionPruneContext *context, /* * If the query does not constrain all key columns, we'll need to scan the - * the default partition, if any. + * default partition, if any. */ if (nvalues < partnatts) result->scan_default = partition_bound_has_default(boundinfo); diff --git a/src/backend/storage/ipc/barrier.c b/src/backend/storage/ipc/barrier.c index 00ab57c0f6..bcfe87b854 100644 --- a/src/backend/storage/ipc/barrier.c +++ b/src/backend/storage/ipc/barrier.c @@ -115,7 +115,7 @@ BarrierInit(Barrier *barrier, int participants) * * While waiting, pg_stat_activity shows a wait_event_class and wait_event * controlled by the wait_event_info passed in, which should be a value from - * from one of the WaitEventXXX enums defined in pgstat.h. + * one of the WaitEventXXX enums defined in pgstat.h. * * Return true in one arbitrarily chosen participant. Return false in all * others. The return code can be used to elect one participant to execute a diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c index 017cc1a7b1..4f3d1f8872 100644 --- a/src/backend/utils/adt/datetime.c +++ b/src/backend/utils/adt/datetime.c @@ -1144,7 +1144,7 @@ DecodeDateTime(char **field, int *ftype, int nf, * Is this a YMD or HMS specification, or a year number? * YMD and HMS are required to be six digits or more, so * if it is 5 digits, it is a year. If it is six or more - * more digits, we assume it is YMD or HMS unless no date + * digits, we assume it is YMD or HMS unless no date * and no time values have been specified. This forces 6+ * digit years to be at the end of the string, or to use * the ISO date specification.