#!perl
use
5.006;
BEGIN {
unless
(
'A'
eq
pack
(
'U'
, 0x41)) {
die
"Unicode::Normalize cannot stringify a Unicode code point\n"
;
}
unless
(0x41 ==
unpack
(
'U'
,
'A'
)) {
die
"Unicode::Normalize cannot get Unicode code point\n"
;
}
}
our
$PACKAGE
=
'Unicode::Normalize, mkheader'
;
our
$prefix
=
"UNF_"
;
our
$structname
=
"${prefix}complist"
;
*pack_U
= ($] ge 5.020)
?
sub
{
return
pack
(
'W*'
,
@_
).
pack
(
'U*'
); }
:
sub
{
return
pack
(
'U*'
,
@_
); };
our
%Comp1st
;
our
%CompList
;
our
%Combin
;
our
%Canon
;
our
%Compat
;
our
%Compos
;
our
%Exclus
;
our
%Single
;
our
%NonStD
;
our
%Comp2nd
;
our
$Combin
=
do
"unicore/CombiningClass.pl"
||
do
"unicode/CombiningClass.pl"
|| croak
"$PACKAGE: CombiningClass.pl not found"
;
our
$Decomp
=
do
"unicore/Decomposition.pl"
||
do
"unicode/Decomposition.pl"
|| croak
"$PACKAGE: Decomposition.pl not found"
;
our
@CompEx
=
qw(
0958 0959 095A 095B 095C 095D 095E 095F 09DC 09DD 09DF 0A33 0A36
0A59 0A5A 0A5B 0A5E 0B5C 0B5D 0F43 0F4D 0F52 0F57 0F5C 0F69 0F76
0F78 0F93 0F9D 0FA2 0FA7 0FAC 0FB9 FB1D FB1F FB2A FB2B FB2C FB2D
FB2E FB2F FB30 FB31 FB32 FB33 FB34 FB35 FB36 FB38 FB39 FB3A FB3B
FB3C FB3E FB40 FB41 FB43 FB44 FB46 FB47 FB48 FB49 FB4A FB4B FB4C
FB4D FB4E 2ADC 1D15E 1D15F 1D160 1D161 1D162 1D163 1D164 1D1BB
1D1BC 1D1BD 1D1BE 1D1BF 1D1C0
)
;
sub
decomposeHangul {
my
$sindex
=
$_
[0] - SBase;
my
$lindex
=
int
(
$sindex
/ NCount);
my
$vindex
=
int
((
$sindex
% NCount) / TCount);
my
$tindex
=
$sindex
% TCount;
my
@ret
= (
LBase +
$lindex
,
VBase +
$vindex
,
$tindex
? (TBase +
$tindex
) : (),
);
return
wantarray
?
@ret
: pack_U(
@ret
);
}
sub
_getHexArray {
map
hex
,
$_
[0] =~ /\G *([0-9A-Fa-f]+)/g }
while
(
$Combin
=~ /(.+)/g) {
my
@tab
=
split
/\t/, $1;
my
$ini
=
hex
$tab
[0];
if
(
$tab
[1] eq
''
) {
$Combin
{
$ini
} =
$tab
[2];
}
else
{
$Combin
{
$_
} =
$tab
[2]
foreach
$ini
..
hex
(
$tab
[1]);
}
}
while
(
$Decomp
=~ /(.+)/g) {
my
@tab
=
split
/\t/, $1;
my
$compat
=
$tab
[2] =~ s/<[^>]+>//;
my
$dec
= [ _getHexArray(
$tab
[2]) ];
my
$ini
=
hex
(
$tab
[0]);
my
$end
=
$tab
[1] eq
''
?
$ini
:
hex
(
$tab
[1]);
foreach
my
$u
(
$ini
..
$end
) {
$Compat
{
$u
} =
$dec
;
$Canon
{
$u
} =
$dec
if
!
$compat
;
}
}
for
my
$s
(
@CompEx
) {
my
$u
=
hex
$s
;
next
if
!
$Canon
{
$u
};
next
if
$u
== 0xFB1D && !
$Canon
{0x1D15E};
$Exclus
{
$u
} = 1;
}
foreach
my
$u
(
keys
%Canon
) {
my
$dec
=
$Canon
{
$u
};
if
(
@$dec
== 2) {
if
(
$Combin
{
$dec
->[0] }) {
$NonStD
{
$u
} = 1;
}
else
{
$Compos
{
$dec
->[0] }{
$dec
->[1] } =
$u
;
$Comp2nd
{
$dec
->[1] } = 1
if
!
$Exclus
{
$u
};
}
}
elsif
(
@$dec
== 1) {
$Single
{
$u
} = 1;
}
else
{
my
$h
=
sprintf
'%04X'
,
$u
;
croak(
"Weird Canonical Decomposition of U+$h"
);
}
}
foreach
my
$j
(0x1161..0x1175, 0x11A8..0x11C2) {
$Comp2nd
{
$j
} = 1;
}
sub
getCanonList {
my
@src
=
@_
;
my
@dec
=
map
{
(SBase <=
$_
&&
$_
<= SFinal) ? decomposeHangul(
$_
)
:
$Canon
{
$_
} ? @{
$Canon
{
$_
} } :
$_
}
@src
;
return
join
(
" "
,
@src
) eq
join
(
" "
,
@dec
) ?
@dec
: getCanonList(
@dec
);
}
sub
getCompatList {
my
@src
=
@_
;
my
@dec
=
map
{
(SBase <=
$_
&&
$_
<= SFinal) ? decomposeHangul(
$_
)
:
$Compat
{
$_
} ? @{
$Compat
{
$_
} } :
$_
}
@src
;
return
join
(
" "
,
@src
) eq
join
(
" "
,
@dec
) ?
@dec
: getCompatList(
@dec
);
}
foreach
my
$key
(
keys
%Canon
) {
$Canon
{
$key
} = [ getCanonList(
$key
) ];
}
foreach
my
$key
(
keys
%Compat
) {
$Compat
{
$key
} = [ getCompatList(
$key
) ];
}
foreach
my
$comp1st
(
keys
%Compos
) {
my
$listname
=
sprintf
(
"${structname}_%06x"
,
$comp1st
);
$Comp1st
{
$comp1st
} =
$listname
;
my
$rh1st
=
$Compos
{
$comp1st
};
foreach
my
$comp2nd
(
keys
%$rh1st
) {
my
$uc
=
$rh1st
->{
$comp2nd
};
$CompList
{
$listname
}{
$comp2nd
} =
$uc
;
}
}
sub
split_into_char {
my
$uni
=
shift
;
my
$len
=
length
(
$uni
);
my
@ary
;
for
(
my
$i
= 0;
$i
<
$len
; ++
$i
) {
push
@ary
,
ord
(
substr
(
$uni
,
$i
,1));
}
return
@ary
;
}
sub
_U_stringify {
sprintf
'"%s"'
,
join
''
,
map
sprintf
(
"\\x%02x"
,
$_
), split_into_char(pack_U(
@_
));
}
foreach
my
$hash
(\
%Canon
, \
%Compat
) {
foreach
my
$key
(
keys
%$hash
) {
$hash
->{
$key
} = _U_stringify( @{
$hash
->{
$key
} } );
}
}
my
@boolfunc
= (
{
name
=>
"Exclusion"
,
type
=>
"bool"
,
hash
=> \
%Exclus
,
},
{
name
=>
"Singleton"
,
type
=>
"bool"
,
hash
=> \
%Single
,
},
{
name
=>
"NonStDecomp"
,
type
=>
"bool"
,
hash
=> \
%NonStD
,
},
{
name
=>
"Comp2nd"
,
type
=>
"bool"
,
hash
=> \
%Comp2nd
,
},
);
my
$orig_fh
= SelectSaver->new;
{
my
$file
=
"unfexc.h"
;
open
FH,
">$file"
or croak
"$PACKAGE: $file can't be made"
;
binmode
FH;
select
FH;
print
<<
'EOF'
;
/*
* This file is auto-generated by mkheader.
* Any changes here will be lost!
*/
EOF
foreach
my
$tbl
(
@boolfunc
) {
my
@temp
=
sort
{
$a
<=>
$b
}
keys
%{
$tbl
->{hash}};
my
$type
=
$tbl
->{type};
my
$name
=
$tbl
->{name};
print
"$type is$name (UV uv)\n{\nreturn\n\t"
;
while
(
@temp
) {
my
$cur
=
shift
@temp
;
if
(
@temp
&&
$cur
+ 1 ==
$temp
[0]) {
print
"($cur <= uv && uv <= "
;
while
(
@temp
&&
$cur
+ 1 ==
$temp
[0]) {
$cur
=
shift
@temp
;
}
print
"$cur)"
;
print
"\n\t|| "
if
@temp
;
}
else
{
print
"uv == $cur"
;
print
"\n\t|| "
if
@temp
;
}
}
print
"\n\t? TRUE : FALSE;\n}\n\n"
;
}
close
FH;
my
$compinit
=
"typedef struct { UV nextchar; UV composite; } $structname;\n\n"
;
foreach
my
$i
(
sort
keys
%CompList
) {
$compinit
.=
"$structname $i [] = {\n"
;
$compinit
.=
join
",\n"
,
map
sprintf
(
"\t{ %d, %d }"
,
$_
,
$CompList
{
$i
}{
$_
}),
sort
{
$a
<=>
$b
}
keys
%{
$CompList
{
$i
} };
$compinit
.=
",\n{0,0}\n};\n\n"
;
}
my
@tripletable
= (
{
file
=>
"unfcmb"
,
name
=>
"combin"
,
type
=>
"STDCHAR"
,
hash
=> \
%Combin
,
null
=> 0,
},
{
file
=>
"unfcan"
,
name
=>
"canon"
,
type
=>
"char*"
,
hash
=> \
%Canon
,
null
=>
"NULL"
,
},
{
file
=>
"unfcpt"
,
name
=>
"compat"
,
type
=>
"char*"
,
hash
=> \
%Compat
,
null
=>
"NULL"
,
},
{
file
=>
"unfcmp"
,
name
=>
"compos"
,
type
=>
"$structname *"
,
hash
=> \
%Comp1st
,
null
=>
"NULL"
,
init
=>
$compinit
,
},
);
foreach
my
$tbl
(
@tripletable
) {
my
$file
=
"$tbl->{file}.h"
;
my
$head
=
"${prefix}$tbl->{name}"
;
my
$type
=
$tbl
->{type};
my
$hash
=
$tbl
->{hash};
my
$null
=
$tbl
->{null};
my
$init
=
$tbl
->{init};
open
FH,
">$file"
or croak
"$PACKAGE: $file can't be made"
;
binmode
FH;
select
FH;
my
%val
;
print
FH <<
'EOF'
;
/*
* This file is auto-generated by mkheader.
* Any changes here will be lost!
*/
EOF
print
$init
if
defined
$init
;
foreach
my
$uv
(
keys
%$hash
) {
croak
sprintf
(
"a Unicode code point 0x%04X over 0x10FFFF."
,
$uv
)
unless
$uv
<= 0x10FFFF;
my
@c
=
unpack
'CCCC'
,
pack
'N'
,
$uv
;
$val
{
$c
[1] }{
$c
[2] }{
$c
[3] } =
$hash
->{
$uv
};
}
foreach
my
$p
(
sort
{
$a
<=>
$b
}
keys
%val
) {
next
if
!
$val
{
$p
};
for
(
my
$r
= 0;
$r
< 256;
$r
++) {
next
if
!
$val
{
$p
}{
$r
};
printf
"static $type ${head}_%02x_%02x [256] = {\n"
,
$p
,
$r
;
for
(
my
$c
= 0;
$c
< 256;
$c
++) {
print
"\t"
,
defined
$val
{
$p
}{
$r
}{
$c
}
?
"($type)"
.
$val
{
$p
}{
$r
}{
$c
}
:
$null
;
print
','
if
$c
!= 255;
print
"\n"
if
$c
% 8 == 7;
}
print
"};\n\n"
;
}
}
foreach
my
$p
(
sort
{
$a
<=>
$b
}
keys
%val
) {
next
if
!
$val
{
$p
};
printf
"static $type* ${head}_%02x [256] = {\n"
,
$p
;
for
(
my
$r
= 0;
$r
< 256;
$r
++) {
print
$val
{
$p
}{
$r
}
?
sprintf
(
"${head}_%02x_%02x"
,
$p
,
$r
)
:
"NULL"
;
print
','
if
$r
!= 255;
print
"\n"
if
$val
{
$p
}{
$r
} || (
$r
+1) % 8 == 0;
}
print
"};\n\n"
;
}
print
"static $type** $head [] = {\n"
;
for
(
my
$p
= 0;
$p
<= 0x10;
$p
++) {
print
$val
{
$p
} ?
sprintf
(
"${head}_%02x"
,
$p
) :
"NULL"
;
print
','
if
$p
!= 0x10;
print
"\n"
;
}
print
"};\n\n"
;
close
FH;
}
}
1;