Module Name:    src
Committed By:   rillig
Date:           Sat Sep  5 11:13:07 UTC 2020

Added Files:
        src/distrib/sets: fmt-list

Log Message:
add distrib/sets/fmt-list to format the file lists consistently

This program is much more complicated than sort-list in the same
directory.  It takes care of aligning the fields of the lines so that
lines from the same directory are aligned to each other.  This reduces
horizontal jumps for the category and flags fields.


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 src/distrib/sets/fmt-list

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Added files:

Index: src/distrib/sets/fmt-list
diff -u /dev/null src/distrib/sets/fmt-list:1.1
--- /dev/null	Sat Sep  5 11:13:07 2020
+++ src/distrib/sets/fmt-list	Sat Sep  5 11:13:07 2020
@@ -0,0 +1,424 @@
+#! /usr/bin/lua
+-- $NetBSD: fmt-list,v 1.1 2020/09/05 11:13:07 rillig Exp $
+
+--[[
+
+Align the lines of a file list so that all lines from the same directory
+have the other fields at the same indentation.
+
+Sort the lines and remove duplicate lines.
+
+usage: ./fmt-list */*/{mi,ad.*,md.*}
+
+]]
+
+local function test(func)
+  func()
+end
+
+local function assert_equals(got, expected)
+  if got ~= expected then
+    assert(false, string.format("got %q, expected %q", got, expected))
+  end
+end
+
+
+-- Calculate the width of the given string on the screen, assuming that
+-- the tab width is 8 and that the string starts at a tabstop.
+local function tabwidth(str)
+  local width = 0
+  for i = 1, #str do
+    if string.sub(str, i, i) == "\t" then
+      width = width // 8 * 8 + 8
+    else
+      width = width + 1
+    end
+  end
+  return width
+end
+
+test(function()
+  assert_equals(tabwidth(""), 0)
+  assert_equals(tabwidth("1234"), 4)
+  assert_equals(tabwidth("\t"), 8)
+  assert_equals(tabwidth("1234567\t"), 8)
+  assert_equals(tabwidth("\t1234\t"), 16)
+  assert_equals(tabwidth("\t1234\t1"), 17)
+end)
+
+
+-- Calculate the tab characters that are necessary to set the width
+-- of the string to the desired width.
+local function tabs(str, width)
+  local strwidth = tabwidth(str)
+  local tabs = string.rep("\t", (width - strwidth + 7) // 8)
+  if tabs == "" then
+    error(string.format("%q\t%d\t%d", str, strwidth, width))
+  end
+  assert(tabs ~= "")
+  return tabs
+end
+
+test(function()
+  assert_equals(tabs("", 8), "\t")
+  assert_equals(tabs("1234567", 8), "\t")
+  assert_equals(tabs("", 64), "\t\t\t\t\t\t\t\t")
+end)
+
+
+-- Group the items by a key and then execute the action on each of the
+-- groups.
+local function foreach_group(items, get_key, action)
+  local key
+  local group = {}
+  for _, item in ipairs(items) do
+    local item_key = assert(get_key(item))
+    if item_key ~= key then
+      if #group > 0 then action(group, key) end
+      key = item_key
+      group = {}
+    end
+    table.insert(group, item)
+  end
+  if #group > 0 then action(group, key) end
+end
+
+test(function()
+  local items = {
+    {"prime", 2},
+    {"prime", 3},
+    {"not prime", 4},
+    {"prime", 5},
+    {"prime", 7}
+  }
+  local result = ""
+  foreach_group(
+    items,
+    function(item) return item[1] end,
+    function(group, key)
+      result = result .. string.format("%d %s\n", #group, key)
+    end)
+  assert_equals(result, "2 prime\n1 not prime\n2 prime\n")
+end)
+
+
+-- Parse a line from a file list and split it into its meaningful parts.
+local function parse_entry(line)
+
+  local category_align, prefix, fullname, flags_align, category, flags =
+    line:match("^((%-?)(%.%S*)%s+)((%S+)%s+)(%S+)$")
+  if fullname == nil then
+    category_align, prefix, fullname, category =
+      line:match("^((%-?)(%.%S*)%s+)(%S+)$")
+  end
+  if fullname == nil then
+    prefix, fullname = line:match("^(%-)(%.%S*)$")
+  end
+  if fullname == nil then
+    return
+  end
+
+  local dirname, basename = fullname:match("^(.+)/([^/]+)$")
+  if dirname == nil then
+    dirname, basename = "", fullname
+  end
+
+  local category_col, flags_col
+  if category_align ~= nil then
+    category_col = tabwidth(category_align)
+  end
+  if flags_align ~= nil then
+    flags_col = tabwidth(flags_align)
+  end
+
+  return {
+    prefix = prefix,
+    fullname = fullname,
+    dirname = dirname,
+    basename = basename,
+    category_col = category_col,
+    category = category,
+    flags_col = flags_col,
+    flags = flags
+  }
+end
+
+test(function()
+  local entry = parse_entry("./dirname/filename\t\t\tcategory\tflags")
+  assert_equals(entry.prefix, "")
+  assert_equals(entry.fullname, "./dirname/filename")
+  assert_equals(entry.dirname, "./dirname")
+  assert_equals(entry.basename, "filename")
+  assert_equals(entry.category_col, 40)
+  assert_equals(entry.category, "category")
+  assert_equals(entry.flags_col, 16)
+  assert_equals(entry.flags, "flags")
+end)
+
+
+-- Return the smaller of the given values, ignoring nil.
+local function min(curr, value)
+  if curr == nil or (value ~= nil and value < curr) then
+    return value
+  end
+  return curr
+end
+
+test(function()
+  assert_equals(min(nil, nil), nil)
+  assert_equals(min(0, nil), 0)
+  assert_equals(min(nil, 0), 0)
+  assert_equals(min(0, 0), 0)
+  assert_equals(min(1, -1), -1)
+  assert_equals(min(-1, 1), -1)
+end)
+
+
+-- Return the larger of the given values, ignoring nil.
+local function max(curr, value)
+  if curr == nil or (value ~= nil and value > curr) then
+    return value
+  end
+  return curr
+end
+
+test(function()
+  assert_equals(max(nil, nil), nil)
+  assert_equals(max(0, nil), 0)
+  assert_equals(max(nil, 0), 0)
+  assert_equals(max(0, 0), 0)
+  assert_equals(max(1, -1), 1)
+  assert_equals(max(-1, 1), 1)
+end)
+
+
+-- Calculate the column on which the field should be aligned.
+local function column(entries, get_width_before, colname)
+
+  local function nexttab(col)
+    return col // 8 * 8 + 8
+  end
+
+  local currmin, currmax, required
+
+  for _, entry in ipairs(entries) do
+    local width = get_width_before(entry)
+    if width ~= nil then
+      required = max(required, width)
+
+      local col = entry[colname]
+      currmin = min(currmin, col)
+      currmax = max(currmax, col)
+    end
+  end
+
+  if currmin == currmax then
+    return currmin, "aligned"
+  end
+  return nexttab(required), "unaligned"
+end
+
+test(function()
+
+  local function width_before_category(entry)
+    return tabwidth(entry.prefix .. entry.fullname)
+  end
+
+  local function width_before_flags(entry)
+    return tabwidth(entry.category)
+  end
+
+  -- The entries are nicely aligned, therefore there is no need to change
+  -- anything.
+  local entries = {
+    parse_entry("./file1\tcategory"),
+    parse_entry("./file2\tcategory")
+  }
+  assert_equals(entries[2].category_col, 8)
+  assert_equals(width_before_category(entries[2]), 7)
+  assert_equals(column(entries, width_before_category, "category_col"), 8)
+
+  -- The entries are currently not aligned, therefore they are aligned
+  -- to the minimum required column.
+  entries = {
+    parse_entry("./file1\tcategory"),
+    parse_entry("./directory/file2\tcategory"),
+  }
+  assert_equals(entries[2].category_col, 24)
+  assert_equals(column(entries, width_before_category, "category_col"), 24)
+
+  -- The entries are already aligned, therefore the current alignment is
+  -- preserved, even though it is more than the minimum required alignment
+  -- of 8.  There are probably reasons for the large indentation.
+  entries = {
+    parse_entry("./file1\t\t\tcategory"),
+    parse_entry("./file2\t\t\tcategory")
+  }
+  assert_equals(column(entries, width_before_category, "category_col"), 24)
+
+  -- The flags are already aligned, 4 tabs to the right of the category.
+  -- There is no reason to change anything here.
+  entries = {
+    parse_entry("./file1\tcategory\t\t\tflags"),
+    parse_entry("./file2\tcategory"),
+    parse_entry("./file3\tcat\t\t\t\tflags")
+  }
+  assert_equals(column(entries, width_before_flags, "flags_col"), 32)
+
+end)
+
+
+-- Amend the entries by the tabs used for alignment.
+local function add_tabs(entries)
+
+  local function width_before_category(entry)
+    return tabwidth(entry.prefix .. entry.fullname)
+  end
+  local function width_before_flags(entry)
+    if entry.flags ~= nil then
+      return tabwidth(entry.category)
+    end
+  end
+
+  local category_col, category_aligned =
+    column(entries, width_before_category, "category_col")
+  local flags_col = column(entries, width_before_flags, "flags_col")
+
+  -- To avoid horizontal jumps for the column, the minimum column is set
+  -- to 56.  This way, the third column is usually set to 72, which is
+  -- still visible on an 80-column screen.
+  if category_aligned == "unaligned" then
+    category_col = max(category_col, 56)
+  end
+
+  for _, entry in ipairs(entries) do
+    local prefix = entry.prefix
+    local fullname = entry.fullname
+    local category = entry.category
+    local flags = entry.flags
+
+    if category ~= nil then
+      entry.category_tabs = tabs(prefix .. fullname, category_col)
+      if flags ~= nil then
+        entry.flags_tabs = tabs(category, flags_col)
+      end
+    end
+  end
+end
+
+test(function()
+  local entries = {
+    parse_entry("./file1\t\t\t\tcategory\t\tflags"),
+    parse_entry("./file2\t\t\t\tcategory\t\tflags"),
+    parse_entry("./file3\t\t\tcategory\t\tflags")
+  }
+  add_tabs(entries)
+  assert_equals(entries[1].category_tabs, "\t\t\t\t\t\t\t")
+  assert_equals(entries[2].category_tabs, "\t\t\t\t\t\t\t")
+  assert_equals(entries[3].category_tabs, "\t\t\t\t\t\t\t")
+  assert_equals(entries[1].flags_tabs, "\t\t")
+  assert_equals(entries[2].flags_tabs, "\t\t")
+  assert_equals(entries[3].flags_tabs, "\t\t")
+end)
+
+
+-- Normalize the alignment of the fields of the entries.
+local function normalize(entries)
+
+  local function less(a, b)
+    if a.fullname ~= b.fullname then
+      return a.fullname < b.fullname
+    end
+    if a.category ~= nil and b.category ~= nil and a.category ~= b.category then
+      return a.category < b.category
+    end
+    return a.flags ~= nil and b.flags ~= nil and a.flags < b.flags
+  end
+  table.sort(entries, less)
+
+  local function by_dirname(entry)
+    return entry.dirname
+  end
+  foreach_group(entries, by_dirname, add_tabs)
+
+end
+
+
+-- Read a file list completely into memory.
+local function read_list(fname)
+  local head = {}
+  local entries = {}
+  local errors = {}
+
+  local f = assert(io.open(fname, "r"))
+  local lineno = 0
+  for line in f:lines() do
+    lineno = lineno + 1
+
+    local entry = parse_entry(line)
+    if entry ~= nil then
+      table.insert(entries, entry)
+    elseif line:match("^#") then
+      table.insert(head, line)
+    else
+      local msg = string.format(
+        "%s:%d: unknown line format %q", fname, lineno, line)
+      table.insert(errors, msg)
+    end
+  end
+
+  f:close()
+
+  return head, entries, errors
+end
+
+
+-- Write the normalized list file back to disk.
+--
+-- Duplicate lines are skipped.  This allows to append arbitrary lines to
+-- the end of the file and have them cleaned up automatically.
+local function write_list(fname, head, entries)
+  local f = assert(io.open(fname, "w"))
+
+  for _, line in ipairs(head) do
+    f:write(line, "\n")
+  end
+
+  local prev_line = ""
+  for _, entry in ipairs(entries) do
+    local line = entry.prefix .. entry.fullname
+    if entry.category ~= nil then
+      line = line .. entry.category_tabs .. entry.category
+    end
+    if entry.flags ~= nil then
+      line = line .. entry.flags_tabs .. entry.flags
+    end
+
+    if line ~= prev_line then
+      prev_line = line
+      f:write(line, "\n")
+    end
+  end
+
+  f:close()
+end
+
+
+-- Load a file list, normalize it and write it back to disk.
+local function format_list(fname)
+  local head, entries, errors = read_list(fname)
+  if #errors > 0 then
+    for _, err in ipairs(errors) do
+      print(err)
+    end
+    return
+  end
+
+  normalize(entries)
+  write_list(fname, head, entries)
+end
+
+
+for _, fname in ipairs(arg) do
+  format_list(fname)
+end

Reply via email to