#!perl
use
5.006;
BEGIN {
unless
(
"A"
eq
pack
(
'U'
, 0x41)) {
die
"Unicode::Collate cannot stringify a Unicode code point\n"
;
}
}
sub
_getHexArray {
map
hex
,
$_
[0] =~ /([0-9a-fA-F]+)/g }
our
$PACKAGE
=
'Unicode::Collate, mkheader'
;
our
$prefix
=
"UCA_"
;
our
%SimpleEntries
;
our
@Rest
;
{
my
(
$f
,
$fh
);
foreach
my
$d
(File::Spec->curdir()) {
$f
= File::Spec->catfile(
$d
,
"Collate"
,
"allkeys.txt"
);
last
if
open
(
$fh
,
$f
);
$f
=
undef
;
}
croak
"$PACKAGE: Collate/allkeys.txt is not found"
if
!
defined
$f
;
while
(
my
$line
= <
$fh
>) {
next
if
$line
=~ /^\s*
if
(
$line
=~ /^\s*\@/) {
push
@Rest
,
$line
;
next
;
}
next
if
$line
!~ /^\s*[0-9A-Fa-f]/;
$line
=~ s/[
my
(
$e
,
$k
) =
split
/;/,
$line
;
croak
"Wrong Entry: <charList> must be separated by ';' "
.
"from <collElement>"
if
!
$k
;
my
@uv
= _getHexArray(
$e
);
next
if
!
@uv
;
if
(
@uv
!= 1) {
push
@Rest
,
$line
;
next
;
}
my
$is_L3_ignorable
= TRUE;
my
@key
;
foreach
my
$arr
(
$k
=~ /\[([^\[\]]+)\]/g) {
my
$var
=
$arr
=~ /\*/;
my
@wt
= _getHexArray(
$arr
);
push
@key
,
pack
(VCE_TEMPLATE,
$var
,
@wt
);
$is_L3_ignorable
= FALSE
if
$wt
[0] ||
$wt
[1] ||
$wt
[2];
}
my
$mapping
=
$is_L3_ignorable
? [] : \
@key
;
my
$num
=
@$mapping
;
my
$str
=
chr
(
$num
).
join
(
''
,
@$mapping
);
$SimpleEntries
{
$uv
[0]} = stringify(
$str
);
}
}
sub
stringify {
my
$str
=
shift
;
return
sprintf
'"%s"'
,
join
''
,
map
sprintf
(
"\\x%02x"
,
ord
$_
),
split
//,
$str
;
}
my
$init
=
''
;
{
my
$type
=
"char*"
;
my
$head
=
$prefix
.
"rest"
;
$init
.=
"static $type $head [] = {\n"
;
for
my
$line
(
@Rest
) {
$line
=~ s/\s*\z//;
next
if
$line
eq
''
;
$init
.=
"/*$line*/\n"
if
$line
=~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/;
$init
.=
"($type)"
.stringify(
$line
).
",\n"
;
}
$init
.=
"NULL\n"
;
$init
.=
"};\n\n"
;
}
my
@tripletable
= (
{
file
=>
"ucatbl"
,
name
=>
"simple"
,
type
=>
"char*"
,
hash
=> \
%SimpleEntries
,
null
=>
"NULL"
,
init
=>
$init
,
},
);
foreach
my
$tbl
(
@tripletable
) {
my
$file
=
"$tbl->{file}.h"
;
my
$head
=
"${prefix}$tbl->{name}"
;
my
$type
=
$tbl
->{type};
my
$hash
=
$tbl
->{hash};
my
$null
=
$tbl
->{null};
my
$init
=
$tbl
->{init};
open
FH,
">$file"
or croak
"$PACKAGE: $file can't be made"
;
binmode
FH;
select
FH;
my
%val
;
print
FH <<
'EOF'
;
/*
* This file is auto-generated by mkheader.
* Any changes here will be lost!
*/
EOF
print
$init
if
defined
$init
;
foreach
my
$uv
(
keys
%$hash
) {
croak
sprintf
(
"a Unicode code point 0x%04X over 0x10FFFF."
,
$uv
)
unless
$uv
<= 0x10FFFF;
my
@c
=
unpack
'CCCC'
,
pack
'N'
,
$uv
;
$val
{
$c
[1] }{
$c
[2] }{
$c
[3] } =
$hash
->{
$uv
};
}
foreach
my
$p
(
sort
{
$a
<=>
$b
}
keys
%val
) {
next
if
!
$val
{
$p
};
for
(
my
$r
= 0;
$r
< 256;
$r
++) {
next
if
!
$val
{
$p
}{
$r
};
printf
"static $type ${head}_%02x_%02x [256] = {\n"
,
$p
,
$r
;
for
(
my
$c
= 0;
$c
< 256;
$c
++) {
print
"\t"
,
defined
$val
{
$p
}{
$r
}{
$c
}
?
"($type)"
.
$val
{
$p
}{
$r
}{
$c
}
:
$null
;
print
','
if
$c
!= 255;
print
"\n"
if
$c
% 8 == 7;
}
print
"};\n\n"
;
}
}
foreach
my
$p
(
sort
{
$a
<=>
$b
}
keys
%val
) {
next
if
!
$val
{
$p
};
printf
"static $type* ${head}_%02x [256] = {\n"
,
$p
;
for
(
my
$r
= 0;
$r
< 256;
$r
++) {
print
$val
{
$p
}{
$r
}
?
sprintf
(
"${head}_%02x_%02x"
,
$p
,
$r
)
:
"NULL"
;
print
','
if
$r
!= 255;
print
"\n"
if
$val
{
$p
}{
$r
} || (
$r
+1) % 8 == 0;
}
print
"};\n\n"
;
}
print
"static $type** $head [] = {\n"
;
for
(
my
$p
= 0;
$p
<= 0x10;
$p
++) {
print
$val
{
$p
} ?
sprintf
(
"${head}_%02x"
,
$p
) :
"NULL"
;
print
','
if
$p
!= 0x10;
print
"\n"
;
}
print
"};\n\n"
;
close
FH;
}
1;