I mentioned earlier:
> Am 20.06.2025 um 12:01 schrieb Peter Dyballa <[email protected]>:
>
> In diff.c:882 we have in main():
>
> exit_status = compare_files (&noparent, de_unknowns, argv[optind],
> argv[optind + 1]);.
>
> This function is defined in diff.c, starting at lines #1376 (comments) or
> #1387 (code). It has close to its end on line #1633/1634:
>
> if (status == EXIT_SUCCESS)
> status = compare_prepped_files (parent, &cmp, O_RDONLY | oflags);
>
> compare_prepped_files() is new, compared to diffutils 3.10, and is also
> defined in diff.c, lines #1158 (comments) or #1162 (code). It has at its end
> on line #1372:
>
> return diff_2_files (cmp);
>
> This function is defined in analyze.c, starting at lines #451 (comments) or
> #452 (code). It has inside a switch statement
>
> 621 switch (output_style)
> 622 {
> 623 case OUTPUT_CONTEXT:
> 624 print_context_script (script, false);
> 625 break;
> 626
> 627 case OUTPUT_UNIFIED:
> 628 print_context_script (script, true);
> 629 break;
>
> So it's the print_context_script() branch of function calls that contains the
> faulty code. It's defined in context.c, starting at lines #109 (comments) or
> #111 (code). It makes a comparison between context and unified diff:
>
> 123 if (unidiff)
> 124 print_script (script, find_hunk, pr_unidiff_hunk);
> 125 else
> 126 print_script (script, find_hunk, pr_context_hunk);
The last argument of print_script() is later substituted for "printfun" on line
#974 here in util.c:
941 /* Divide SCRIPT into pieces by calling HUNKFUN and
942 print each piece with PRINTFUN.
943 Both functions take one arg, an edit script.
944
945 HUNKFUN is called with the tail of the script
946 and returns the last link that belongs together with the start
947 of the tail.
948
949 PRINTFUN takes a subscript which belongs together (with a null
950 link at the end) and prints it. */
951
952 void
953 print_script (struct change *script,
954 struct change * (*hunkfun) (struct change *),
955 void (*printfun) (struct change *))
956 {
957 struct change *next = script;
958
959 while (next)
960 {
961 /* Find a set of changes that belong together. */
962 struct change *this = next;
963 struct change *end = (*hunkfun) (next);
964
965 /* Disconnect them from the rest of the changes,
966 making them a hunk, and remember the rest for next iteration.
*/
967 next = end->link;
968 end->link = nullptr;
969 #ifdef DEBUG
970 debug_script (this);
971 #endif
972
973 /* Print this hunk. */
>>974 (*printfun) (this);
975
976 /* Reconnect the script so it will all be freed properly. */
977 end->link = next;
978 }
979 }
So it's either pr_unidiff_hunk() or pr_context_hunk() that is executed here.
pr_unidiff_hunk() has more than 100 lines. The interesting ones are:
363 putc ('\n', out);
364
365 struct change *next = hunk;
366 lin i = first0;
367 lin j = first1;
368
369 while (i <= last0 || j <= last1)
370 {
371
372 /* If the line isn't a difference, output the context from file
0. */
373
374 if (!next || i < next->line0)
375 {
376 char const *const *line = &curr.file[0].linbuf[i++];
377 if (! (suppress_blank_empty && **line == '\n'))
378 putc (initial_tab ? '\t' : ' ', out);
>>379 print_1_line (nullptr, line);
>><<===
380 j++;
381 }
382 else
With line #363 the header of the diff output was finally printed and now the
actual script will follow. Start is on line #379 and a null pointer for place
to output. The function is defined in util.c, starting a cascade of function
calls:
981 /* Print the text of a single line LINE,
982 flagging it with the characters in LINE_FLAG (which say whether
983 the line is inserted, deleted, changed, etc.). LINE_FLAG must not
984 end in a blank, unless it is a single blank. */
985
986 void
987 print_1_line (char const *line_flag, char const *const *line)
988 {
>>989 print_1_line_nl (line_flag, line, false);
>><<===
990 }
992 /* Print the text of a single line LINE,
993 flagging it with the characters in LINE_FLAG (which say whether
994 the line is inserted, deleted, changed, etc.). LINE_FLAG must not
995 end in a blank, unless it is a single blank. If SKIP_NL is set, then
996 the final '\n' is not printed. */
997
998 void
999 print_1_line_nl (char const *line_flag, char const *const *line, bool
skip_nl)
1000 {
1001 char const *base = line[0], *limit = line[1]; /* Help the compiler.
*/
1002 FILE *out = outfile; /* Help the compiler some more. */
1003 char const *flag_format = nullptr;
1004
1005 /* If -T was specified, use a Tab between the line-flag and the text.
1006 Otherwise use a Space (as Unix diff does).
1007 Print neither space nor tab if line-flags are empty.
1008 But omit trailing blanks if requested. */
1009
1010 if (line_flag && *line_flag)
1011 {
1012 char const *flag_format_1 = flag_format = initial_tab ? "%s\t" :
"%s ";
1013 char const *line_flag_1 = line_flag;
1014
1015 if (suppress_blank_empty && **line == '\n')
1016 {
1017 flag_format_1 = "%s";
1018
1019 /* This hack to omit trailing blanks takes advantage of the
1020 fact that the only way that LINE_FLAG can end in a blank
1021 is when LINE_FLAG consists of a single blank. */
1022 line_flag_1 += *line_flag_1 == ' ';
1023 }
1024
1025 fprintf (out, flag_format_1, line_flag_1);
1026 }
1027
»1028 output_1_line (base, limit - (skip_nl && limit[-1] == '\n'),
flag_format, line_flag); <<===
1029
1030 if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
1031 {
1032 set_color_context (RESET_CONTEXT);
1033 fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
1034 }
1035 }
The null pointer, passed to print_1_line(), is passed unchanged to
print_1_line_nl() and then to output_1_line():
1037 /* Output a line from BASE up to LIMIT.
1038 With -t, expand white space characters to spaces, and if FLAG_FORMAT
1039 is nonzero, output it with argument LINE_FLAG after every
1040 internal carriage return, so that tab stops continue to line up. */
1041
1042 void
1043 output_1_line (char const *base, char const *limit, char const
*flag_format,
1044 char const *line_flag)
1045 {
1046 enum { MAX_CHUNK = 1024 };
1047 if (!expand_tabs)
1048 {
1049 idx_t left = limit - base;
1050 while (left)
1051 {
1052 idx_t to_write = MIN (left, MAX_CHUNK);
»1053 idx_t written = fwrite (base, sizeof (char), to_write,
outfile); <<===
1054 process_signals ();
1055 if (written < to_write)
1056 return;
1057 base += written;
1058 left -= written;
1059 }
1060 }
1061 else
So, this is happening on PPC Mac OS X 10.4.11, Tiger. On x86_64 macOS High
Sierra, Version 10.13.6, it's exactly the same, but something else seems to be
passed as line…
GDB prints on Tiger in output_1_line():
print_1_line (line_flag=0x0, line=0x40139c) at util.c:989
print_1_line_nl (line_flag=0x0, line=0x40139c, skip_nl=false) at
util.c:1001
(gdb) p line[0]
$1 = 0x0
(gdb) p line[1]
$2 = 0x18015ef "distname", ' ' <repeats 12 times>,
"${name}-${version}\nextract.suffix .tgz\n\ncompiler.cxx_standard \\\n", '
' <repeats 20 times>, "2011\n\nset py_ver 3.12\nset py_ver_nodot
[string map {. {}} ${py_ver}]\n\ntest.run "...
On High Sierra it's:
print_1_line (line_flag=0x0, line=0x100607118) at util.c:989
print_1_line_nl (line_flag=0x0, line=0x100607118, skip_nl=false) at
util.c:1001
(gdb) p line[0]
$1 = 0x101800430 "compiler.cxx_standard \\\n", ' ' <repeats 20 times>,
"2011\n\nset py_ver 3.12\nset py_ver_nodot [string map {. {}}
${py_ver}]\n\ntest.run", ' ' <repeats 12 times>, "yes\n#test.env", ' ' <repeats
12 times>, "ARGS=-V\n# Use the same python"...
(gdb) p line[1]
$2 = 0x101800448 ' ' <repeats 20 times>, "2011\n\nset py_ver
3.12\nset py_ver_nodot [string map {. {}} ${py_ver}]\n\ntest.run", ' '
<repeats 12 times>, "yes\n#test.env", ' ' <repeats 12 times>, "ARGS=-V\n# Use
the same python as fonttools.\nconfigure"...
So something is filling 'line' incorrectly on Tiger…
Could there be a problem with 32-bit value vs. 64-bit values or addresses? The
text string "distname" is on line #28, "compiler.cxx_standard \\\n" comes on
line #31, three lines before the single difference of the two files. Working
"diff -u" outputs this script:
compiler.cxx_standard \
2011
-set py_ver 3.12
+set py_ver 3.13
set py_ver_nodot [string map {. {}} ${py_ver}]
test.run yes
So the next job is to find the code that fills the large curr struct with
(partial/commented?) contents of first file, containing the text string "3.12"…
--
Greetings
Pete
Our enemies are innovative and resourceful, and so are we. They never stop
thinking about new ways to harm our country and our people, and neither do we.
– Georges W. Bush