On 8/9/2023 12:10 PM, denis.ma...@unibe.ch wrote:
Keith, you can also check hyphenations using a script:
-- check-hyphens.lua
--[[
analyze hyphenations based on a ConTeXt log file
enable hyphenation tracking in the ConTeXt file with
\enabletrackers[hyphenation.applied]
then run this script with
lua check-hyphens.lua input_file whitelist.ending
for the input_file we assume .log, so no need to add this
for the whitelist a file ending has to be supplied
the whitelist is optional
]]
-- local lines = string.splitlines(io.loaddata("oeps.tex")or "") or { }
-- local pprint = require('pprint')
function main (input_file, whitelist_file)
local lines = lines_from(input_file .. ".log")
local whitelist = {}
if whitelist_file == nil then
whitelist = {}
else
whitelist = lines_from(whitelist_file)
end
--pprint (lines)
--pprint (whitelist)
local filteredWordlist = filterHyphenationsWordlist
(cleanLines
(getHyphenationLines(lines)),
whitelist)
-- pprint(filteredWordlist)
saveResultsToFile(filteredWordlist, 'check-hyphens.log')
end
-- see if the file exists
-- http://lua-users.org/wiki/FileInputOutput
-- see if the file exists
function file_exists(file)
local f = io.open(file, "rb")
if f then f:close() end
return f ~= nil
end
-- get all lines from a file, returns an empty
-- list/table if the file does not exist
function lines_from(file)
if not file_exists(file) then return {} end
local lines = {}
for line in io.lines(file) do
lines[#lines + 1] = line
end
return lines
end
-- String testing
function starts_with(str, start)
return str:sub(1, #start) == start
end
-- get relevant lines
function getHyphenationLines(lines)
local lines_with_hyphenations = {}
for k,v in pairs(lines) do
if
(starts_with(v, "hyphenated")
and not string.find(v, "start hyphenated words")
and not string.find(v, "stop hyphenated words"))
then table.insert(lines_with_hyphenations, v) end
end
return lines_with_hyphenations
end
-- String cleaning
-- wrapper functions
function cleanLines (xs)
local cleanedLines = {}
for k,v in pairs(xs) do
table.insert(cleanedLines, cleanLine(v))
end
return cleanedLines
end
function cleanLine (x)
return removeTrailingPunctuation(getWord(x))
end
-- 1. Start reading at colon
function getWord(x)
-- wir lesen aber Zeichen 26
return string.sub(x,26)
end
-- 2. Remove trailing punctuation
function removeTrailingPunctuation (x)
if string.find(x, ',') then
return x:sub(1, -2)
else
return x
end
end
-- test if word is in second list
function inList (x, list)
for k,v in ipairs(list) do
if v == x then
return true
end
end
return nil
end
-- Filter hyphenated words based on second list (whitelist)
function filterHyphenationsWordlist (xs, list)
local result = {}
for k,v in ipairs(xs) do
if not inList(v, list) then table.insert (result, v) end
end
return result
end
function saveResultsToFile(results, output_file)
-- Opens a file in write mode
output_file = io.open("check_hyphens.log", "w")
-- sets the default output file as output_file
io.output(output_file)
-- iterate oiver
for k,v in ipairs(results) do
io.write(v..'\n')
end
-- closes the open file
io.close(output_file)
end
-- Run
main(arg[1], arg[2])
Ok, a little lua lesson, if you don't mind.
---- xxx.tex ----
\enabletrackers[hyphenation.applied]
\starttext
\input tufte
\stoptext
---- xxx.tmp ----
re-fine
---- xxx.lua ----
local function check(logname,whitename)
if not logname then
return
end
local data = io.loaddata(logname) or ""
if data == "" then
return
end
local blob = string.match(data,"start hyphenated words(.-)stop
hyphenated words")
if not blob then
return
end
local white = table.tohash(string.splitlines(whitename and
io.loaddata(whitename) or ""))
for n, s in string.gmatch(blob,"(%d+) *: (%S+)") do
if white[s] then
-- were good
else
print(n,s)
end
end
end
check(environment.files[1],environment.files[2])
-- print("TEST 1")
-- check("xxx.log")
-- print("TEST 2")
-- check("xxx.log","xxx.tmp")
-------------------
>mtxrun --script xxx xxx.log
1 dis-tinguish
1 harmo-nize
1 re-fine
>mtxrun --script xxx xxx.log xxx.tmp
1 dis-tinguish
1 harmo-nize
That said, i wonder if we should add the filename, just in case one
includes 20 files and a whitelist could be an option to the tracker.
Now the good news is that the tracker is actually already a bit more
clever. After a run you will see
xxx-hyphenation-new.lua
that has the hyphenated words (not the numbers)
and you can make a whitelist
xxx-hyphenation-old.lua
in which case you only get the new ones.
Hans
-----------------------------------------------------------------
Hans Hagen | PRAGMA ADE
Ridderstraat 27 | 8061 GH Hasselt | The Netherlands
tel: 038 477 53 69 | www.pragma-ade.nl | www.pragma-pod.nl
-----------------------------------------------------------------
___________________________________________________________________________________
If your question is of interest to others as well, please add an entry to the
Wiki!
maillist : ntg-context@ntg.nl / https://www.ntg.nl/mailman/listinfo/ntg-context
webpage : https://www.pragma-ade.nl / http://context.aanhet.net
archive : https://bitbucket.org/phg/context-mirror/commits/
wiki : https://contextgarden.net
___________________________________________________________________________________