Hi Hans,
On Sun, 2025-04-13 at 22:56 +0200, Hans Hagen wrote:
> \directlua {
> local n = \fontid\font
> local f = font.getfont(n)
> local c = f.characters[65]
> if c then
> local u = c.tounicode
> c.tounicode = "0042"
> font.addcharacters(n, { characters = { [65] = c } })
> f = font.getfont(n)
> c = f.characters[65]
> print(u,c.tounicode)
> end
> }
>
> \def\luaglyphtounicode#1#2%
> {\directlua {
> local n = \fontid\font
> local c = font.getfont(n).characters[\number#1]
> if c then
> c.tounicode = "#2"
> font.addcharacters(n, { characters = { [\number#1] = c } })
> end
> }}
>
> % This needs to be done before the font is used!
>
> \luaglyphtounicode{66}{0043}
>
> AB % BC
>
> \bye
This code doesn't seem to work for me; when running "pdftotext" I get
"AB", not "BC". I get the same results with both the LuaTeX in TL25 and
with a self-built version from the latest sources at
gitlab.lisn.upsaclay.fr/texlive/luatex.
Actually, I can't get "tounicode" to work with TFM fonts at all.
Compiling this file with Plain LuaTeX
\catcode`\%=12
\directlua{
pdf.setgentounicode(1)
local function define_font(name, filename)
local basename, extension = filename:match("^(.+)%.(.*)$")
local filetype = ({
otf = "opentype fonts",
pfb = "type1 fonts",
tfm = "tfm",
ttf = "truetype fonts",
})[extension]
local fonttype = ({
otf = "opentype",
pfb = "type1",
tfm = nil,
ttf = "truetype",
})[extension]
local font_name
if extension == "tfm" then
font_name = basename
else
font_name = name
end
local encodingbytes
if extension == "pfb" then
encodingbytes = 2
else
encodingbytes = nil
end
local path = kpse.find_file(filename, filetype)
chars = {}
for i = 0x0, 0xFF do
chars[i] = {
index = i,
width = tex.sp("1em"),
height = tex.sp("1em"),
depth = 0,
tounicode = { utf8.codepoint("it works!", 1, -1) },
}
end
local id = font.define {
name = font_name,
filename = path,
type = "real",
format = fonttype,
characters = chars,
tounicode = 1,
encodingbytes = encodingbytes,
}
token.set_char(name, id)
end
define_font("otf", "texgyrechorus-mediumitalic.otf")
define_font("ttf", "NotoSans-Black.ttf")
define_font("tfm", "ClearSans-Medium-tlf-ot1.tfm")
define_font("typeone", "pcrro8a.pfb")
}
\nopagenumbers
Initial: {a \par}
OpenType: {\setfontid\otf a \par}
TrueType: {\setfontid\ttf a \par}
TeX Font Metrics: {\setfontid\tfm a \par}
Type 1: {\setfontid\typeone a \par}
\bye
gives the following output with "pdftotext":
Initial: a
OpenType: it works!
TrueType: it works!
TeX Font Metrics: a
Type 1: it works!
Or if you prefer a ConTeXt example, this file
% engine=luatex
\start
\definedfont[file:ClearSans-Medium-tlf-ot1.tfm]
\startluacode
pdf.setgentounicode(1)
local current = font.fonts[font.current()]
current.name = "ClearSans-Medium-tlf-ot1"
current.tounicode = 1
for _, char in pairs(current.characters) do
char.tounicode = { utf8.codepoint("it works!", 1, -1) }
end
local id = font.define(current)
token.set_char("tfm", id, "global")
\stopluacode
\stop
\setuppagenumbering[state=stop]
\starttext
x{\setfontid\tfm a \par}
\stoptext
gives the following output with "pdftotext":
xa
Thanks,
-- Max