On Sun, Sep 14, 2025, at 3:21 PM, Robert Elz wrote:
> Date: Sun, 14 Sep 2025 13:23:06 -0400
> From: =?UTF-8?Q?Lawrence_Vel=C3=A1zquez?= <v...@larryv.me>
> Message-ID: <bbe1cd4d-2e4d-41a1-be56-f49ef925c...@app.fastmail.com>
>
> | yash takes this very seriously.
>
> And is very much the outlier - I couldn't find any other shell which
> reports other than "1 args", just yash. Not mksh, dash, ksh93, bosh,
> even not zsh (with or without --emulate sh). Can you?
Of the shells I have at hand, running under a UTF-8 locale on macOS,
only yash delimits on anything other than space and tab. (I tested
the twenty-five Unicode whitespace characters [*]. Eighteen of
them are <blank>s in my locale, and yash delimits on all of them.)
Under an ISO-8859-1 locale, only bash and yash delimit on anything
other than space and tab. (Specifically, they delimit on 0xA0. If
there were other <blank> characters, presumably those would be used
as well.)
[*]
https://en.wikipedia.org/w/index.php?title=Whitespace_character&oldid=1309009909#Unicode
--
vq
set --
for ch_octal in '\001' '\002' '\003' '\004' '\005' '\006' '\007' \
'\010' '\011' '\012' '\013' '\014' '\015' '\016' '\017' \
'\020' '\021' '\022' '\023' '\024' '\025' '\026' '\027' \
'\030' '\031' '\032' '\033' '\034' '\035' '\036' '\037' \
'\040' '\041' '\042' '\043' '\044' '\045' '\046' '\047' \
'\050' '\051' '\052' '\053' '\054' '\055' '\056' '\057' \
'\060' '\061' '\062' '\063' '\064' '\065' '\066' '\067' \
'\070' '\071' '\072' '\073' '\074' '\075' '\076' '\077' \
'\100' '\101' '\102' '\103' '\104' '\105' '\106' '\107' \
'\110' '\111' '\112' '\113' '\114' '\115' '\116' '\117' \
'\120' '\121' '\122' '\123' '\124' '\125' '\126' '\127' \
'\130' '\131' '\132' '\133' '\134' '\135' '\136' '\137' \
'\140' '\141' '\142' '\143' '\144' '\145' '\146' '\147' \
'\150' '\151' '\152' '\153' '\154' '\155' '\156' '\157' \
'\160' '\161' '\162' '\163' '\164' '\165' '\166' '\167' \
'\170' '\171' '\172' '\173' '\174' '\175' '\176' '\177' \
'\200' '\201' '\202' '\203' '\204' '\205' '\206' '\207' \
'\210' '\211' '\212' '\213' '\214' '\215' '\216' '\217' \
'\220' '\221' '\222' '\223' '\224' '\225' '\226' '\227' \
'\230' '\231' '\232' '\233' '\234' '\235' '\236' '\237' \
'\240' '\241' '\242' '\243' '\244' '\245' '\246' '\247' \
'\250' '\251' '\252' '\253' '\254' '\255' '\256' '\257' \
'\260' '\261' '\262' '\263' '\264' '\265' '\266' '\267' \
'\270' '\271' '\272' '\273' '\274' '\275' '\276' '\277' \
'\300' '\301' '\302' '\303' '\304' '\305' '\306' '\307' \
'\310' '\311' '\312' '\313' '\314' '\315' '\316' '\317' \
'\320' '\321' '\322' '\323' '\324' '\325' '\326' '\327' \
'\330' '\331' '\332' '\333' '\334' '\335' '\336' '\337' \
'\340' '\341' '\342' '\343' '\344' '\345' '\346' '\347' \
'\350' '\351' '\352' '\353' '\354' '\355' '\356' '\357' \
'\360' '\361' '\362' '\363' '\364' '\365' '\366' '\367' \
'\370' '\371' '\372' '\373' '\374' '\375' '\376' '\377'
do
ch=$(printf "$ch_octal")
if (eval "set a${ch}z" 2>/dev/null && test "$#" -eq 2)
then
printf '%s\n' "$ch_octal"
fi
done
set --
#
https://en.wikipedia.org/w/index.php?title=Whitespace_character&oldid=1309009909#Unicode
for ch_octal in '\011' \
'\012' \
'\013' \
'\014' \
'\015' \
'\040' \
'\302\205' \
'\302\240' \
'\341\232\200' \
'\342\200\200' \
'\342\200\201' \
'\342\200\202' \
'\342\200\203' \
'\342\200\204' \
'\342\200\205' \
'\342\200\206' \
'\342\200\207' \
'\342\200\210' \
'\342\200\211' \
'\342\200\212' \
'\342\200\250' \
'\342\200\251' \
'\342\200\257' \
'\342\201\237' \
'\343\200\200'
do
ch=$(printf "$ch_octal")
if (eval "set a${ch}z" 2>/dev/null && test "$#" -eq 2)
then
printf '%s\n' "$ch_octal"
fi
done