#!/usr/bin/env perl
use
open
qw< :utf8 :std >
;
"decode"
,
"encode"
,
);
sub
main;
sub
check_options;
sub
cp2name($);
sub
hex_arg;
sub
m2u;
sub
show_encodings;
sub
show_mappings;
sub
showpairs;
sub
string_arg;
sub
tty_width;
sub
u2m;
sub
visichar($);
our
$Enc_Name
;
our
%Opt
;
our
$VERSION
=
"1.2 (2011-07-18)"
;
$| = 1;
$0 = basename($0);
main();
exit
;
die
"NOT REACHED"
;
sub
main {
check_options();
pod2usage(
"no arguments"
)
if
@ARGV
== 0;
pod2usage(
"No encoding specified"
)
unless
$Enc_Name
;
show_mappings(
@ARGV
);
exit
(0);
}
sub
check_options {
Getopt::Long::Configure
qw[ bundling auto_version pass_through ]
;
GetOptions(\
%Opt
,
qw[
help|h|?
man|m
debug|d
encoding|e=s
all|a
list|l
]
) || pod2usage(
"unknown option"
);
pod2usage(0)
if
$Opt
{help};
pod2usage(
-exitstatus
=> 0,
-verbose
=> 2)
if
$Opt
{man};
if
(
$Opt
{list}) {
show_encodings();
exit
0;
}
my
$enc
;
if
(
$Opt
{encoding}) {
$enc
=
$Opt
{encoding};
}
else
{
if
(
@ARGV
&&
$ARGV
[0] =~ s/^-(?=[^-])//) {
$enc
=
shift
@ARGV
;
}
else
{
$enc
=
"MacRoman"
;
}
}
if
(
my
$enc_obj
= Encode::find_encoding(
$enc
)) {
my
$name
=
$enc_obj
->name ||
$enc
;
$Enc_Name
=
$name
;
}
else
{
pod2usage(
"Couldn't find encoding for $enc"
);
}
if
(
$Opt
{
"all"
} ||
@ARGV
== 0) {
@ARGV
=
map
{
sprintf
"0x%02X"
,
$_
} 0 .. 255;
}
}
sub
show_encodings {
my
@names
= Encode->encodings(
":all"
);
my
$collator
= new Unicode::Collate::
preprocess
=>
sub
($) {
my
$str
=
shift
();
$str
=~ s/(\d+)/
sprintf
(
"%012d"
,$1)/ge;
return
$str
;
},
upper_before_lower
=> 1,
;
my
$fmtstr
=
join
(
", "
,
$collator
->
sort
(
@names
));
my
$width
= tty_width() - 2;
my
$code
=
"format STDOUT =\n"
;
$code
.=
"^"
. (
"<"
x
$width
) .
" ~~\n"
;
$code
.=
"\$fmtstr\n"
;
$code
.=
".\n"
;
$code
.=
"1;\n"
;
print
"CODE is:\n$code\n"
if
$Opt
{debug};
eval
(
$code
) ||
die
"eval failed: $@ on $code"
;
local
$: =
" \n"
;
write
;
}
sub
tty_width {
my
(
$rows
,
$cols
) = (24, 80);
if
(
$ENV
{COLUMNS} &&
$ENV
{COLUMNS} =~ /^(?!0)(\d+)$/) {
$cols
= $1;
}
else
{
my
(
$trows
,
$tcols
) =
split
" "
, `stty size < /dev/tty 2>/dev/null`;
$cols
=
$tcols
unless
$?;
}
return
$cols
;
}
BEGIN {
my
$enc
=
":utf8"
;
binmode
(STDOUT,
$enc
) ||
die
"can't binmode STDOUT to $enc: $!"
;
binmode
(STDERR,
$enc
) ||
die
"can't binmode STDERR to $enc: $!"
;
}
END {
close
(STDOUT) ||
die
"can't close STDOUT: $!"
;
}
{
my
(
$LQ
,
$RQ
); INIT {
$LQ
=
"\N{FULLWIDTH LESS-THAN SIGN}"
;
$RQ
=
"\N{FULLWIDTH GREATER-THAN SIGN}"
;
}
sub
quote {
my
@retlist
=
map
{
$LQ
.
" "
.
$_
.
" "
.
$RQ
}
@_
;
return
wantarray
?
@retlist
:
join
(
" "
=>
@retlist
);
}
}
sub
cp2name($) {
my
$cp
=
shift
();
if
(! looks_like_number(
$cp
)) {
die
"bad number: "
. quote(
$cp
);
}
return
$cp
== 0
?
"NULL"
: charnames::viacode(
$cp
)
||
sprintf
(
"U+%04X"
,
$cp
);
}
my
(
$INVALID_CHR
,
$INVALID_CP
); INIT {
$INVALID_CHR
=
"\N{REPLACEMENT CHARACTER}"
;
$INVALID_CP
=
ord
$INVALID_CHR
;
}
sub
showpairs {
my
(
$sep
,
$had
,
$want
) =
@_
;
if
(
$had
==
$INVALID_CP
) {
print
"$Enc_Name "
,
$INVALID_CHR
x 2;
$sep
=
" \N{LEFTWARDS DOUBLE ARROW WITH STROKE}"
;
}
else
{
printf
"%-12s %02X "
,
$Enc_Name
,
$had
;
$sep
=
"\N{LEFT RIGHT DOUBLE ARROW}"
if
$had
==
$want
;
}
print
" $sep "
;
printf
" U+%04X "
,
$want
;
print
quote(visichar(
chr
(
$want
)));
printf
" \\N{%s}"
, cp2name(
$want
);
if
(
chr
(
$want
) =~ /\p{Private_Use}/) {
my
$blockname
= charblock(
$want
) =~ s/ /_/rg;
print
" \\p{Block=$blockname}"
;
}
print
"\n"
;
}
sub
m2u {
my
(
$mac
,
$uni
) =
@_
;
showpairs(
"\N{RIGHTWARDS DOUBLE ARROW}"
,
$mac
,
$uni
);
}
sub
u2m {
my
(
$uni
,
$mac
) =
@_
;
showpairs(
"\N{LEFTWARDS DOUBLE ARROW}"
,
$uni
,
$mac
);
}
sub
hex_arg {
my
$arg
=
shift
();
my
$had_chr
=
hex
$arg
;
if
(
$had_chr
> 0xFF) {
warn
"$0: hex arg "
. quote(
$arg
) .
" too high: $Enc_Name codepoints < 0x100\n"
;
return
;
}
my
$want_chr
=
ord
(decode(
$Enc_Name
,
chr
(
$had_chr
)));
m2u(
$had_chr
=>
$want_chr
);
}
sub
string_arg {
my
$arg
=
shift
();
my
$uni_arg
= decode(
"utf-8"
,
$arg
);
my
$mac_arg
= encode(
$Enc_Name
,
$uni_arg
);
for
(
my
$i
= 0;
$i
<
length
(
$uni_arg
);
$i
++) {
my
$mac_chr
=
substr
(
$mac_arg
,
$i
, 1);
my
$uni_chr
=
substr
(
$uni_arg
,
$i
, 1);
if
(
$mac_chr
eq
"?"
&&
$uni_chr
ne
"?"
) {
$mac_chr
=
$INVALID_CHR
;
}
u2m(
ord
(
$mac_chr
),
ord
(
$uni_chr
));
}
}
sub
show_mappings {
for
(
@_
) {
if
(/ ^ 0x [a-f0-9]{2,} $ /xi) {
hex_arg(
$_
);
}
elsif
(/ ^ U \+ ([a-f0-9]{1,} ) $ /xi) {
string_arg(encode(
"UTF-8"
,
chr
hex
$1));
}
else
{
string_arg(
$_
);
}
}
}
sub
visichar($) {
my
$orig_char
=
shift
();
state
$glyph_map
= {
"\N{NULL}"
=>
"\N{SYMBOL FOR NULL}"
,
"\N{START OF HEADING}"
=>
"\N{SYMBOL FOR START OF HEADING}"
,
"\N{START OF TEXT}"
=>
"\N{SYMBOL FOR START OF TEXT}"
,
"\N{END OF TEXT}"
=>
"\N{SYMBOL FOR END OF TEXT}"
,
"\N{END OF TRANSMISSION}"
=>
"\N{SYMBOL FOR END OF TRANSMISSION}"
,
"\N{ENQUIRY}"
=>
"\N{SYMBOL FOR ENQUIRY}"
,
"\N{ACKNOWLEDGE}"
=>
"\N{SYMBOL FOR ACKNOWLEDGE}"
,
"\N{ALERT}"
=>
"\N{SYMBOL FOR BELL}"
,
"\N{BACKSPACE}"
=>
"\N{SYMBOL FOR BACKSPACE}"
,
"\N{CHARACTER TABULATION}"
=>
"\N{SYMBOL FOR HORIZONTAL TABULATION}"
,
"\N{LINE FEED}"
=>
"\N{SYMBOL FOR LINE FEED}"
,
"\N{LINE TABULATION}"
=>
"\N{SYMBOL FOR VERTICAL TABULATION}"
,
"\N{FORM FEED}"
=>
"\N{SYMBOL FOR FORM FEED}"
,
"\N{CARRIAGE RETURN}"
=>
"\N{SYMBOL FOR CARRIAGE RETURN}"
,
"\N{SHIFT OUT}"
=>
"\N{SYMBOL FOR SHIFT OUT}"
,
"\N{SHIFT IN}"
=>
"\N{SYMBOL FOR SHIFT IN}"
,
"\N{DATA LINK ESCAPE}"
=>
"\N{SYMBOL FOR DATA LINK ESCAPE}"
,
"\N{DEVICE CONTROL ONE}"
=>
"\N{SYMBOL FOR DEVICE CONTROL ONE}"
,
"\N{DEVICE CONTROL TWO}"
=>
"\N{SYMBOL FOR DEVICE CONTROL TWO}"
,
"\N{DEVICE CONTROL THREE}"
=>
"\N{SYMBOL FOR DEVICE CONTROL THREE}"
,
"\N{DEVICE CONTROL FOUR}"
=>
"\N{SYMBOL FOR DEVICE CONTROL FOUR}"
,
"\N{NEGATIVE ACKNOWLEDGE}"
=>
"\N{SYMBOL FOR NEGATIVE ACKNOWLEDGE}"
,
"\N{SYNCHRONOUS IDLE}"
=>
"\N{SYMBOL FOR SYNCHRONOUS IDLE}"
,
"\N{END OF TRANSMISSION BLOCK}"
=>
"\N{SYMBOL FOR END OF TRANSMISSION BLOCK}"
,
"\N{CANCEL}"
=>
"\N{SYMBOL FOR CANCEL}"
,
"\N{END OF MEDIUM}"
=>
"\N{SYMBOL FOR END OF MEDIUM}"
,
"\N{SUBSTITUTE}"
=>
"\N{SYMBOL FOR SUBSTITUTE}"
,
"\N{ESCAPE}"
=>
"\N{SYMBOL FOR ESCAPE}"
,
"\N{INFORMATION SEPARATOR FOUR}"
=>
"\N{SYMBOL FOR FILE SEPARATOR}"
,
"\N{INFORMATION SEPARATOR THREE}"
=>
"\N{SYMBOL FOR GROUP SEPARATOR}"
,
"\N{INFORMATION SEPARATOR TWO}"
=>
"\N{SYMBOL FOR RECORD SEPARATOR}"
,
"\N{INFORMATION SEPARATOR ONE}"
=>
"\N{SYMBOL FOR UNIT SEPARATOR}"
,
"\N{SPACE}"
=>
"\N{SYMBOL FOR SPACE}"
,
"\N{DELETE}"
=>
"\N{SYMBOL FOR DELETE}"
,
"\N{NEXT LINE}"
=>
"\N{SYMBOL FOR NEWLINE}"
,
};
my
$display_char
;
if
(
$display_char
=
$glyph_map
->{
$orig_char
}) {
}
elsif
(
$orig_char
=~ /[\pC\p{White_Space}]/) {
if
(
$orig_char
=~ /\p{Private_Use}/) {
$display_char
=
$orig_char
;
}
elsif
(
$orig_char
=~ /\p{Unknown}/) {
$display_char
=
"\N{SYMBOL FOR SUBSTITUTE FORM TWO}"
,
}
else
{
$display_char
=
"-"
;
}
}
else
{
$display_char
=
$orig_char
;
}
return
$display_char
;
}