#!perl
use
5.006;
BEGIN {
unless
(
"A"
eq
pack
(
'U'
, 0x41)) {
die
"Unicode::Normalize cannot stringify a Unicode code point\n"
;
}
}
our
$PACKAGE
=
'Unicode::Normalize, mkheader'
;
our
$prefix
=
"UNF_"
;
our
$structname
=
"${prefix}complist"
;
sub
pack_U {
return
pack
(
'U*'
,
@_
);
}
our
%Comp1st
;
our
%CompList
;
my
$File_CompExcl
= File::Spec->catfile(
File::Spec->curdir(),
'Normalize'
,
'CompExcl.pl'
);
our
%Combin
;
our
%Canon
;
our
%Compat
;
our
%Compos
;
our
%Exclus
;
our
%Single
;
our
%NonStD
;
our
%Comp2nd
;
our
$Combin
=
do
"unicore/CombiningClass.pl"
||
do
"unicode/CombiningClass.pl"
|| croak
"$PACKAGE: CombiningClass.pl not found"
;
our
$Decomp
=
do
"unicore/Decomposition.pl"
||
do
"unicode/Decomposition.pl"
|| croak
"$PACKAGE: Decomposition.pl not found"
;
our
$CompEx
=
do
$File_CompExcl
|| croak
"$PACKAGE: CompExcl.pl not found"
;
sub
decomposeHangul {
my
$sindex
=
$_
[0] - SBase;
my
$lindex
=
int
(
$sindex
/ NCount);
my
$vindex
=
int
((
$sindex
% NCount) / TCount);
my
$tindex
=
$sindex
% TCount;
my
@ret
= (
LBase +
$lindex
,
VBase +
$vindex
,
$tindex
? (TBase +
$tindex
) : (),
);
return
wantarray
?
@ret
: pack_U(
@ret
);
}
sub
_getHexArray {
map
hex
,
$_
[0] =~ /\G *([0-9A-Fa-f]+)/g }
while
(
$Combin
=~ /(.+)/g) {
my
@tab
=
split
/\t/, $1;
my
$ini
=
hex
$tab
[0];
if
(
$tab
[1] eq
''
) {
$Combin
{
$ini
} =
$tab
[2];
}
else
{
$Combin
{
$_
} =
$tab
[2]
foreach
$ini
..
hex
(
$tab
[1]);
}
}
while
(
$Decomp
=~ /(.+)/g) {
my
@tab
=
split
/\t/, $1;
my
$compat
=
$tab
[2] =~ s/<[^>]+>//;
my
$dec
= [ _getHexArray(
$tab
[2]) ];
my
$ini
=
hex
(
$tab
[0]);
my
$end
=
$tab
[1] eq
''
?
$ini
:
hex
(
$tab
[1]);
foreach
my
$u
(
$ini
..
$end
) {
$Compat
{
$u
} =
$dec
;
$Canon
{
$u
} =
$dec
if
!
$compat
;
}
}
while
(
$CompEx
=~ /(.+)/g) {
my
$s
= $1;
next
if
$s
=~ /^
$s
=~ s/
foreach
my
$u
(_getHexArray(
$s
)) {
next
if
!
$Canon
{
$u
};
next
if
$u
== 0xFB1D && !
$Canon
{0x1D15E};
$Exclus
{
$u
} = 1;
}
}
foreach
my
$u
(
keys
%Canon
) {
my
$dec
=
$Canon
{
$u
};
if
(
@$dec
== 2) {
if
(
$Combin
{
$dec
->[0] }) {
$NonStD
{
$u
} = 1;
}
else
{
$Compos
{
$dec
->[0] }{
$dec
->[1] } =
$u
;
$Comp2nd
{
$dec
->[1] } = 1
if
!
$Exclus
{
$u
};
}
}
elsif
(
@$dec
== 1) {
$Single
{
$u
} = 1;
}
else
{
my
$h
=
sprintf
'%04X'
,
$u
;
croak(
"Weird Canonical Decomposition of U+$h"
);
}
}
foreach
my
$j
(0x1161..0x1175, 0x11A8..0x11C2) {
$Comp2nd
{
$j
} = 1;
}
sub
getCanonList {
my
@src
=
@_
;
my
@dec
=
map
{
(SBase <=
$_
&&
$_
<= SFinal) ? decomposeHangul(
$_
)
:
$Canon
{
$_
} ? @{
$Canon
{
$_
} } :
$_
}
@src
;
return
join
(
" "
,
@src
) eq
join
(
" "
,
@dec
) ?
@dec
: getCanonList(
@dec
);
}
sub
getCompatList {
my
@src
=
@_
;
my
@dec
=
map
{
(SBase <=
$_
&&
$_
<= SFinal) ? decomposeHangul(
$_
)
:
$Compat
{
$_
} ? @{
$Compat
{
$_
} } :
$_
}
@src
;
return
join
(
" "
,
@src
) eq
join
(
" "
,
@dec
) ?
@dec
: getCompatList(
@dec
);
}
foreach
my
$key
(
keys
%Canon
) {
$Canon
{
$key
} = [ getCanonList(
$key
) ];
}
foreach
my
$key
(
keys
%Compat
) {
$Compat
{
$key
} = [ getCompatList(
$key
) ];
}
foreach
my
$comp1st
(
keys
%Compos
) {
my
$listname
=
sprintf
(
"${structname}_%06x"
,
$comp1st
);
$Comp1st
{
$comp1st
} =
$listname
;
my
$rh1st
=
$Compos
{
$comp1st
};
foreach
my
$comp2nd
(
keys
%$rh1st
) {
my
$uc
=
$rh1st
->{
$comp2nd
};
$CompList
{
$listname
}{
$comp2nd
} =
$uc
;
}
}
sub
split_into_char {
my
$uni
=
shift
;
my
$len
=
length
(
$uni
);
my
@ary
;
for
(
my
$i
= 0;
$i
<
$len
; ++
$i
) {
push
@ary
,
ord
(
substr
(
$uni
,
$i
,1));
}
return
@ary
;
}
sub
_U_stringify {
sprintf
'"%s"'
,
join
''
,
map
sprintf
(
"\\x%02x"
,
$_
), split_into_char(pack_U(
@_
));
}
foreach
my
$hash
(\
%Canon
, \
%Compat
) {
foreach
my
$key
(
keys
%$hash
) {
$hash
->{
$key
} = _U_stringify( @{
$hash
->{
$key
} } );
}
}
my
@boolfunc
= (
{
name
=>
"Exclusion"
,
type
=>
"bool"
,
hash
=> \
%Exclus
,
},
{
name
=>
"Singleton"
,
type
=>
"bool"
,
hash
=> \
%Single
,
},
{
name
=>
"NonStDecomp"
,
type
=>
"bool"
,
hash
=> \
%NonStD
,
},
{
name
=>
"Comp2nd"
,
type
=>
"bool"
,
hash
=> \
%Comp2nd
,
},
);
my
$file
=
"unfexc.h"
;
open
FH,
">$file"
or croak
"$PACKAGE: $file can't be made"
;
binmode
FH;
select
FH;
print
<<
'EOF'
;
/*
* This file is auto-generated by mkheader.
* Any changes here will be lost!
*/
EOF
foreach
my
$tbl
(
@boolfunc
) {
my
@temp
=
sort
{
$a
<=>
$b
}
keys
%{
$tbl
->{hash}};
my
$type
=
$tbl
->{type};
my
$name
=
$tbl
->{name};
print
"$type is$name (UV uv)\n{\nreturn\n\t"
;
while
(
@temp
) {
my
$cur
=
shift
@temp
;
if
(
@temp
&&
$cur
+ 1 ==
$temp
[0]) {
print
"($cur <= uv && uv <= "
;
while
(
@temp
&&
$cur
+ 1 ==
$temp
[0]) {
$cur
=
shift
@temp
;
}
print
"$cur)"
;
print
"\n\t|| "
if
@temp
;
}
else
{
print
"uv == $cur"
;
print
"\n\t|| "
if
@temp
;
}
}
print
"\n\t? TRUE : FALSE;\n}\n\n"
;
}
close
FH;
my
$compinit
=
"typedef struct { UV nextchar; UV composite; } $structname;\n\n"
;
foreach
my
$i
(
sort
keys
%CompList
) {
$compinit
.=
"$structname $i [] = {\n"
;
$compinit
.=
join
",\n"
,
map
sprintf
(
"\t{ %d, %d }"
,
$_
,
$CompList
{
$i
}{
$_
}),
sort
{
$a
<=>
$b
}
keys
%{
$CompList
{
$i
} };
$compinit
.=
",\n{0,0}\n};\n\n"
;
}
my
@tripletable
= (
{
file
=>
"unfcmb"
,
name
=>
"combin"
,
type
=>
"STDCHAR"
,
hash
=> \
%Combin
,
null
=> 0,
},
{
file
=>
"unfcan"
,
name
=>
"canon"
,
type
=>
"char*"
,
hash
=> \
%Canon
,
null
=>
"NULL"
,
},
{
file
=>
"unfcpt"
,
name
=>
"compat"
,
type
=>
"char*"
,
hash
=> \
%Compat
,
null
=>
"NULL"
,
},
{
file
=>
"unfcmp"
,
name
=>
"compos"
,
type
=>
"$structname *"
,
hash
=> \
%Comp1st
,
null
=>
"NULL"
,
init
=>
$compinit
,
},
);
foreach
my
$tbl
(
@tripletable
) {
my
$file
=
"$tbl->{file}.h"
;
my
$head
=
"${prefix}$tbl->{name}"
;
my
$type
=
$tbl
->{type};
my
$hash
=
$tbl
->{hash};
my
$null
=
$tbl
->{null};
my
$init
=
$tbl
->{init};
open
FH,
">$file"
or croak
"$PACKAGE: $file can't be made"
;
binmode
FH;
select
FH;
my
%val
;
print
FH <<
'EOF'
;
/*
* This file is auto-generated by mkheader.
* Any changes here will be lost!
*/
EOF
print
$init
if
defined
$init
;
foreach
my
$uv
(
keys
%$hash
) {
croak
sprintf
(
"a Unicode code point 0x%04X over 0x10FFFF."
,
$uv
)
unless
$uv
<= 0x10FFFF;
my
@c
=
unpack
'CCCC'
,
pack
'N'
,
$uv
;
$val
{
$c
[1] }{
$c
[2] }{
$c
[3] } =
$hash
->{
$uv
};
}
foreach
my
$p
(
sort
{
$a
<=>
$b
}
keys
%val
) {
next
if
!
$val
{
$p
};
for
(
my
$r
= 0;
$r
< 256;
$r
++) {
next
if
!
$val
{
$p
}{
$r
};
printf
"static $type ${head}_%02x_%02x [256] = {\n"
,
$p
,
$r
;
for
(
my
$c
= 0;
$c
< 256;
$c
++) {
print
"\t"
,
defined
$val
{
$p
}{
$r
}{
$c
}
?
"($type)"
.
$val
{
$p
}{
$r
}{
$c
}
:
$null
;
print
','
if
$c
!= 255;
print
"\n"
if
$c
% 8 == 7;
}
print
"};\n\n"
;
}
}
foreach
my
$p
(
sort
{
$a
<=>
$b
}
keys
%val
) {
next
if
!
$val
{
$p
};
printf
"static $type* ${head}_%02x [256] = {\n"
,
$p
;
for
(
my
$r
= 0;
$r
< 256;
$r
++) {
print
$val
{
$p
}{
$r
}
?
sprintf
(
"${head}_%02x_%02x"
,
$p
,
$r
)
:
"NULL"
;
print
','
if
$r
!= 255;
print
"\n"
if
$val
{
$p
}{
$r
} || (
$r
+1) % 8 == 0;
}
print
"};\n\n"
;
}
print
"static $type** $head [] = {\n"
;
for
(
my
$p
= 0;
$p
<= 0x10;
$p
++) {
print
$val
{
$p
} ?
sprintf
(
"${head}_%02x"
,
$p
) :
"NULL"
;
print
','
if
$p
!= 0x10;
print
"\n"
;
}
print
"};\n\n"
;
close
FH;
}
1;