> How can I check what script a character belongs to? $ perl -Mutf8 -MUnicode::UCD=charinfo -E'say charinfo(ord "为")->{script}' Han
Sanity checks: $ perl -Mutf8 -E'say "为" =~ /\p{Han}/' 1 $ uniprops -a1 为 | ack Script Script=Han Script=Hani > check if it is the same as the > previous one - i.e. back to C mode of programming. Let the regex engine help you advance the character counter. $ cat langs ΕλληνικάEnglish한국어日本語Русскийไทย ---- $ cat langs.pl use 5.010; use strictures; use Unicode::UCD qw(charinfo); sub script { return charinfo(ord substr($_[0], 0, 1))->{script} }; # necessary because pos() magic is tracked on the scalar. my $copy = $_; while (/(\X)/g) { my $script = script $1; my ($part) = $copy =~ /(\p{$script}+)/; say $part; pos($_) = pos($_) + length($part); } ---- $ perl -C -ln langs.pl < langs Ελληνικά English 한국어 Русский ไทย
signature.asc
Description: PGP signature