#!perl
use
5.006;
my
$Use4th
;
my
$DEFAULT_LOCALE_VERSION
=
'1.31'
;
BEGIN {
unless
(
"A"
eq
pack
(
'U'
, 0x41)) {
die
"Unicode::Collate cannot stringify a Unicode code point\n"
;
}
unless
(0x41 ==
unpack
(
'U'
,
'A'
)) {
die
"Unicode::Collate cannot get a Unicode code point\n"
;
}
}
sub
_getHexArray {
map
hex
,
$_
[0] =~ /([0-9A-Fa-f]+)/g }
sub
trim {
$_
[0] =~ s/^\ +//;
$_
[0] =~ s/\ +\z// }
sub
ce {
my
$var
=
shift
;
my
$vc
=
$var
?
'*'
:
'.'
;
my
$hx
=
join
'.'
,
map
{
sprintf
'%04X'
,
$_
}
@_
;
return
"[$vc$hx]"
;
}
our
$PACKAGE
=
'Unicode::Collate, locale'
;
our
$ENT_FMT
=
"%-9s ; %s # %s\n"
;
our
$RE_CE
=
'(?:\[[0-9A-Fa-f\.\*]+\])'
;
our
$CUR_DIR
= File::Spec->curdir();
our
$OvCJK
=
'overrideCJK'
;
our
$OvHang
=
'overrideHangul'
;
sub
decHangul {
my
$code
=
shift
;
my
$si
=
$code
- SBase;
my
$li
=
int
(
$si
/ NCount);
my
$vi
=
int
((
$si
% NCount) / TCount);
my
$ti
=
$si
% TCount;
return
(
LBase +
$li
,
VBase +
$vi
,
$ti
? (TBase +
$ti
) : (),
);
}
sub
simple_cjk_deriv {
my
$u
=
hex
shift
;
my
$base
= (CJK_UidFr <=
$u
&&
$u
<= CJK_UidTo) ? 0xFB40 : 0xFB80;
my
$aaaa
=
$base
+ (
$u
>> 15);
my
$bbbb
= (
$u
& 0x7FFF) | 0x8000;
my
@u
=
$Use4th
? (
$u
) : ();
return
ce(0,
$aaaa
, Min2Wt, Min3Wt,
@u
).ce(0,
$bbbb
, 0, 0,
@u
);
}
my
%Keys
;
my
%Code
;
my
%Name
;
my
%Equiv
;
my
$vDUCET
;
my
%JamoWt1
;
my
$JamoWt2
= 0;
my
@CompId
;
my
@OtherEquiv
=
split
/\n=/,
<<'OTHEREQUIV';
=00C2= (A-circ)
1EA7;<00E2><0300>
1EA6;<00C2><0300>
1EA5;<00E2><0301>
1EA4;<00C2><0301>
1EAB;<00E2><0303>
1EAA;<00C2><0303>
1EA9;<00E2><0309>
1EA8;<00C2><0309>
1EAD;<00E2><0323>
1EAC;<00C2><0323>
=00C4= (A-uml)
01DF;<00E4><0304>
01DE;<00C4><0304>
=00C5= (A-ring)
01FB;<00E5><0301>
01FA;<00C5><0301>
=00C6= (AE-lig)
1D2D;<00C6>+++12
01FD;<00E6><0301>
01FC;<00C6><0301>
01E3;<00E6><0304>
01E2;<00C6><0304>
=00CA= (E-circ)
1EC1;<00EA><0300>
1EC0;<00CA><0300>
1EBF;<00EA><0301>
1EBE;<00CA><0301>
1EC5;<00EA><0303>
1EC4;<00CA><0303>
1EC3;<00EA><0309>
1EC2;<00CA><0309>
1EC7;<00EA><0323>
1EC6;<00CA><0323>
=00D4= (O-circ)
1ED3;<00F4><0300>
1ED2;<00D4><0300>
1ED1;<00F4><0301>
1ED0;<00D4><0301>
1ED7;<00F4><0303>
1ED6;<00D4><0303>
1ED5;<00F4><0309>
1ED4;<00D4><0309>
1ED9;<00F4><0323>
1ED8;<00D4><0323>
=00D5= (O-tilde)
1E4D;<00F5><0301>
1E4C;<00D5><0301>
022D;<00F5><0304>
022C;<00D5><0304>
1E4F;<00F5><0308>
1E4E;<00D5><0308>
1EE1;<00F5><031B>
1EE0;<00D5><031B>
=00D6= (O-uml)
022B;<00F6><0304>
022A;<00D6><0304>
=00D8= (O-slash)
01FF;<00F8><0301>
01FE;<00D8><0301>
=00DC= (U-uml)
01DC;<00FC><0300>
01DB;<00DC><0300>
01D8;<00FC><0301>
01D7;<00DC><0301>
01D6;<00FC><0304>
01D5;<00DC><0304>
01DA;<00FC><030C>
01D9;<00DC><030C>
=0102= (A-breve)
1EB1;<0103><0300>
1EB0;<0102><0300>
1EAF;<0103><0301>
1EAE;<0102><0301>
1EB5;<0103><0303>
1EB4;<0102><0303>
1EB3;<0103><0309>
1EB2;<0102><0309>
1EB7;<0103><0323>
1EB6;<0102><0323>
=0186= (open-O)
1D53;<0186>+++?
=0190= (open-E)
2107;<0190>+++?
1D4B;<0190>+++?
=01A0= (O-horn)
1EDD;<01A1><0300>
1EDC;<01A0><0300>
1EDB;<01A1><0301>
1EDA;<01A0><0301>
1EE1;<01A1><0303>
1EE0;<01A0><0303>
1EDF;<01A1><0309>
1EDE;<01A0><0309>
1EE3;<01A1><0323>
1EE2;<01A0><0323>
=01AF= (U-horn)
1EEB;<01B0><0300>
1EEA;<01AF><0300>
1EE9;<01B0><0301>
1EE8;<01AF><0301>
1EEF;<01B0><0303>
1EEE;<01AF><0303>
1EED;<01B0><0309>
1EEC;<01AF><0309>
1EF1;<01B0><0323>
1EF0;<01AF><0323>
=0300= (grave)
00E0;a<0300>
00C0;A<0300>
00E8;e<0300>
00C8;E<0300>
00EC;i<0300>
00CC;I<0300>
00F2;o<0300>
00D2;O<0300>
00F9;u<0300>
00D9;U<0300>
1EF3;y<0300>
1EF2;Y<0300>
=0301= (acute)
00E1;a<0301>
00C1;A<0301>
00E9;e<0301>
00C9;E<0301>
00ED;i<0301>
00CD;I<0301>
00F3;o<0301>
00D3;O<0301>
00FA;u<0301>
00DA;U<0301>
00FD;y<0301>
00DD;Y<0301>
=0302= (circum)
00E2;a<0302>
00C2;A<0302>
00EA;e<0302>
00CA;E<0302>
00EE;i<0302>
00CE;I<0302>
00F4;o<0302>
00D4;O<0302>
00FB;u<0302>
00DB;U<0302>
0177;y<0302>
0176;Y<0302>
=0303= (tilde)
00E3;a<0303>
00C3;A<0303>
1EBD;e<0303>
1EBC;E<0303>
0129;i<0303>
0128;I<0303>
00F5;o<0303>
00D5;O<0303>
0169;u<0303>
0168;U<0303>
1EF9;y<0303>
1EF8;Y<0303>
=0308= (diaeresis)
00E4;a<0308>
00C4;A<0308>
00EB;e<0308>
00CB;E<0308>
00EF;i<0308>
00CF;I<0308>
00F6;o<0308>
00D6;O<0308>
00FC;u<0308>
00DC;U<0308>
00FF;y<0308>
0178;Y<0308>
=0309= (hook-above)
1EA3;a<0309>
1EA2;A<0309>
1EBB;e<0309>
1EBA;E<0309>
1EC9;i<0309>
1EC8;I<0309>
1ECF;o<0309>
1ECE;O<0309>
1EE7;u<0309>
1EE6;U<0309>
1EF7;y<0309>
1EF6;Y<0309>
=0323= (dot-below)
1EA1;a<0323>
1EA0;A<0323>
1EB9;e<0323>
1EB8;E<0323>
1ECB;i<0323>
1ECA;I<0323>
1ECD;o<0323>
1ECC;O<0323>
1EE5;u<0323>
1EE4;U<0323>
1EF5;y<0323>
1EF4;Y<0323>
=0406= (Cyrillic-Byelorussian-Ukrainian-I)
0457;<0456><0308>
A676;<0456>+++2 <0308>+++2
0407;<0406><0308>
=041E= (Cyrillic-O)
04E7;<043E><0308>
04E6;<041E><0308>
=0423= (Cyrillic-U)
045E;<0443><0306>
040E;<0423><0306>
04F1;<0443><0308>
04F0;<0423><0308>
04F3;<0443><030B>
04F2;<0423><030B>
04EF;<0443><0304>
04EE;<0423><0304>
=0933= (Devanagari-LLA)
0934;<0933><093C>
=0A3C= (Gurmukhi-Nukta)
0A33;<0A32><0A3C>
0A36;<0A38><0A3C>
0A59;<0A16><0A3C>
0A5A;<0A17><0A3C>
0A5B;<0A1C><0A3C>
0A5E;<0A2B><0A3C>
=1EB8= (E-dot-below)
1EC7;<1EB9><0302>
1EC6;<1EB8><0302>
=1ECC= (O-dot-below)
1ED9;<1ECD><0302>
1ED8;<1ECC><0302>
1EE3;<1ECD><031B>
1EE2;<1ECC><031B>
=1EE4= (U-dot-below)
1EF1;<1EE5><031B>
1EF0;<1EE4><031B>
OTHEREQUIV
my
%OtherEquiv
;
for
my
$o
(
@OtherEquiv
) {
my
@ln
=
split
/\n/,
$o
;
my
$uv
=
shift
@ln
;
$uv
=~ s/ *\([a-zA-Z-]+\) *//;
$uv
=~
tr
/=//d;
croak
"$PACKAGE: $uv is invalid in OTHEREQUIV"
if
$uv
!~ /^[0-9A-F]+\z/;
$OtherEquiv
{
$uv
} = \
@ln
;
}
{
my
(
$f
,
$fh
);
foreach
my
$d
(
$CUR_DIR
) {
$f
= File::Spec->catfile(
$d
,
"Collate"
,
"allkeys.txt"
);
last
if
open
(
$fh
,
$f
);
$f
=
undef
;
}
croak
"$PACKAGE: Collate/allkeys.txt is not found"
if
!
defined
$f
;
while
(
my
$line
= <
$fh
>) {
chomp
$line
;
next
if
$line
=~ /^\s*
$vDUCET
= $1
if
$line
=~ /^\
@version
\s*(\S*)/;
next
if
$line
!~ /^\s*[0-9A-Fa-f]/;
my
$name
= (
$line
=~ s/[
my
(
$e
,
$k
) =
split
/;/,
$line
;
trim(
$e
);
trim(
$k
);
$name
=~ s/; QQ[A-Z]+//;
$name
=~ s/^ ?\[[0-9A-F]+\] ?//;
if
(
$k
=~ /\[\.0000\.0000\.0000(\.?0*)\]/) {
$Use4th
= 1
if
$1;
$Name
{
$e
} =
$name
;
next
;
}
croak
"Wrong Entry: <charList> must be separated by ';' "
.
"from <collElement>"
if
!
$k
;
push
@{
$Equiv
{
$k
} },
$e
if
exists
$Code
{
$k
};
$Keys
{
$e
} =
$k
;
$Code
{
$k
} =
$e
if
!
exists
$Code
{
$k
};
$Name
{
$e
} =
$name
;
if
(
$e
=~ /^11[0-9A-F]{2}\z/) {
my
@ec
= _getHexArray(
$e
);
my
@kc
= _getHexArray(
$k
);
if
(
@ec
== 1) {
$JamoWt1
{
$ec
[0]} =
$kc
[0];
$JamoWt2
=
$kc
[1]
if
$JamoWt2
<
$kc
[1];
}
}
if
(
$k
=~ /^\[\.F[0-9A-F]+\.[0-9A-F]+\.0002[\.\]]/) {
my
@p
=
map
hex
(
$_
),
$k
=~ /\[\.([0-9A-F]+)\./g;
if
(
@p
== 2) {
my
$ui
= (((
$p
[0] & 0x3F) << 15) | (
$p
[1] & 0x7FFF));
my
$h
=
sprintf
"%04X"
,
$ui
;
push
@CompId
, [
$e
,
$ui
]
if
$e
ne
$h
;
}
}
}
close
$fh
;
}
my
@Contractions
;
for
my
$k
(
sort
keys
%Equiv
) {
if
(
$Code
{
$k
} !~ / / &&
$Equiv
{
$k
}[0] =~ / /) {
(
my
$eqs
=
"<$Equiv{$k}[0]>"
) =~ s/ /></g;
my
$starter
=
$eqs
=~ /^<([0-9A-Fa-f]+)>/ ?
hex
($1) :
''
;
push
@Contractions
, [
$starter
,
"$Code{$k};$eqs"
];
}
}
{
my
(
$f
,
$fh
);
foreach
my
$d
(
$CUR_DIR
) {
$f
= File::Spec->catfile(
$d
,
"Collate"
,
"CJK"
,
"Korean.pm"
);
last
if
open
(
$fh
,
$f
);
$f
=
undef
;
}
croak
"$PACKAGE: Collate/CJK/Korean.pm is not found"
if
!
defined
$f
;
my
%KO_jamo
;
my
$KO_head
=
''
;
my
$KO_foot
=
''
;
while
(
my
$line
= <
$fh
>) {
chomp
$line
;
if
((
$line
=~ /^__DATA__/) .. (
$line
=~ /^__END__/)) {
if
(
$line
=~ /^[A-D]/) {
$line
=~ s/:.*//;
my
@u
= _getHexArray(
$line
);
croak
"unexpected $line"
if
@u
!= 1;
my
$uv
=
$u
[0];
croak
"unexpected $line"
unless
SBase <=
$uv
&&
$uv
<= SFinal;
my
@dec
= decHangul(
$uv
);
$KO_jamo
{
$_
} = 1
for
@dec
;
$line
.=
':'
.
join
'-'
,
map
sprintf
(
'%04X'
,
$_
),
@dec
;
}
}
$line
.=
"\n"
;
$KO_head
.=
$line
if
1 .. (
$line
=~ /^
my
%jamo2prim
/);
$KO_foot
.=
$line
if
(
$line
=~ /^\);
}
close
$fh
;
open
my
$pm
,
">Korean.pm"
or
die
'Korean.pm'
;
binmode
$pm
;
print
$pm
$KO_head
;
my
$count
= 0;
for
(
sort
keys
%KO_jamo
) {
print
$pm
' '
if
(
$count
% 4) == 0;
++
$count
;
print
$pm
' '
;
print
$pm
sprintf
q!'%04X', 0x%04X,!
,
$_
,
$JamoWt1
{
$_
};
print
$pm
"\n"
if
(
$count
% 4) == 0;
}
print
$pm
"\n"
if
$count
% 4 != 0;
$JamoWt2
=
sprintf
"0x%02X"
,
$JamoWt2
;
$KO_foot
=~ s/(\
$wt
= )[0-9x]+/$1
$JamoWt2
/;
$KO_foot
=~ s/^(\);
print
$pm
$KO_foot
;
close
$pm
;
}
my
$source
=
'data'
;
opendir
DIR,
$source
or croak
"no $source"
;
my
@txts
=
grep
/^[a-zA-Z]/,
readdir
DIR;
closedir
DIR;
my
$target
=
'Locale'
;
mkdir
$target
;
for
my
$txt
(
@txts
) {
my
%locale_keys
;
my
$txtfile
= File::Spec->catfile(
$source
,
$txt
);
my
$pl
=
$txt
;
$pl
=~ s/\.txt\z/.pl/ or croak
"$PACKAGE: $source/$txt is not .txt"
;
my
$plfile
= File::Spec->catfile(
$target
,
$pl
);
open
my
$fh
,
$txtfile
or croak
"$PACKAGE: $source/$txt is not found: $!"
;
open
my
$ph
,
">$plfile"
or croak
"$PACKAGE: $target/$pl can't be made: $!"
;
binmode
$ph
;
my
$ptxt
=
''
;
my
$entry
=
''
;
my
$locale_version
=
$DEFAULT_LOCALE_VERSION
;
while
(<
$fh
>) {
chomp
;
next
if
/^\s*\z/;
if
(s/^locale_version//) {
$locale_version
= $1
if
/(\S+)/;
next
;
}
if
(/^
use
/) {
print
$ph
"$_;\n"
;
s/^[^:]+:://;
s/\s*\z/.pm/;
my
$f
= File::Spec->catfile(
$CUR_DIR
,
split
/::/,
$_
);
$f
=
'Korean.pm'
if
/::Korean\.pm/;
require
"./$f"
;
next
;
}
if
(/^(alternate)\s+(\S+)/) {
my
$v
=
"variable"
;
$ptxt
.=
" $v => '$2',\n"
;
$ptxt
.=
" $1 => '$2',\n"
;
next
;
}
if
(/^backwards$/) {
$ptxt
.=
" backwards => 2,\n"
;
next
;
}
if
(s/^(
override
)(CJK|Hangul)[ \t]+(?:\\&|)/\\&/) {
my
$key
= $1.$2;
$ptxt
.=
" $key => $_,\n"
;
$locale_keys
{
$key
} =
eval
$_
;
next
;
}
if
(/^upper$/) {
$ptxt
.=
" upper_before_lower => 1,\n"
;
next
;
}
if
(s/^suppress//) {
s/\s*-\s*/../g;
my
@c
=
split
;
s/(?:0[Xx])?([0-9A-Fa-f]+)/0x$1/g
for
@c
;
my
$list
=
join
", "
,
@c
;
$ptxt
.=
" suppress => [$list],\n"
;
next
;
}
if
(/^([\s\-0-9A-Fa-fXx]+)\z/) {
s/\s*-\s*/../g;
my
@c
=
split
;
s/(?:0[Xx])?([0-9A-Fa-f]+)/0x$1/g
for
@c
;
my
$list
=
join
", "
,
@c
;
$ptxt
=~ s/\](,$)/$1/;
$ptxt
.=
"\t\t$list],\n"
;
next
;
}
if
(/^\s*(
$ptxt
.=
"$_\n"
if
$1 ne
'#'
;
next
;
}
$entry
.= parse_entry(
$_
, \
%locale_keys
);
}
if
(
$ptxt
=~ /
suppress
=> \[(.*)\]/s) {
my
@suplist
=
eval
$1;
my
%suppressed
;
@suppressed
{
@suplist
} = (1) x
@suplist
;
for
my
$contract
(
@Contractions
) {
my
$starter
=
$contract
->[0];
my
$addline
=
$contract
->[1];
next
if
!
$suppressed
{
$starter
};
$entry
.= parse_entry(
$addline
, \
%locale_keys
);
}
}
if
(
$locale_keys
{
$OvCJK
}) {
for
my
$c
(
@CompId
) {
my
$r
=
$locale_keys
{
$OvCJK
}->(
$c
->[1]);
next
if
!
defined
$r
;
my
$addline
=
sprintf
'%s;<%04X>'
,
$c
->[0],
$c
->[1];
$entry
.= parse_entry(
$addline
, \
%locale_keys
);
}
}
if
(
$entry
) {
my
$v
=
$vDUCET
?
" # for DUCET v$vDUCET"
:
''
;
$ptxt
.=
" entry => <<'ENTRY',$v\n"
;
$ptxt
.=
$entry
;
$ptxt
.=
"ENTRY\n"
;
}
my
$lv
=
" locale_version => $locale_version,\n"
;
print
$ph
"+{\n$lv$ptxt};\n"
;
close
$fh
;
close
$ph
;
}
sub
parse_entry {
my
$line
=
shift
;
my
$lockeys
=
shift
;
my
(
$e
,
$rule
) = split_e_rule(
$line
);
my
$name
= getname(
$e
);
my
$eq_rule
=
$rule
eq
'='
;
$rule
=
join
''
,
map
"<$_>"
,
split
' '
,
$e
if
$eq_rule
;
my
(
$newce
,
$simpdec
) = parse_rule(
$e
,
$rule
,
$lockeys
);
my
$newentry
=
''
;
if
(!
$lockeys
->{
$e
}) {
$newentry
.=
sprintf
$ENT_FMT
,
$e
,
$newce
,
$name
if
!
$eq_rule
;
$lockeys
->{
$e
} =
$newce
;
}
else
{
$newentry
.=
"# already tailored: $_\n"
;
}
if
(!
$simpdec
&&
$Keys
{
$e
}) {
my
$key
=
$Keys
{
$e
};
my
@ce
=
$key
=~ /
$RE_CE
/go;
if
(
@ce
> 1) {
my
$ok
= 1;
my
$ee
=
''
;
for
my
$c
(
@ce
) {
$ok
= 0,
last
if
!
$Code
{
$c
};
$ee
.=
' '
if
$ee
ne
''
;
$ee
.=
$Code
{
$c
};
}
if
(
$ok
&& !
$lockeys
->{
$ee
}) {
$newentry
.=
sprintf
$ENT_FMT
,
$ee
,
$newce
,
$name
;
$lockeys
->{
$ee
} =
$newce
;
}
if
(
$ee
=~ s/ 030([01])/ 034$1/ &&
$ok
&& !
$lockeys
->{
$ee
}) {
$newentry
.=
sprintf
$ENT_FMT
,
$ee
,
$newce
,
$name
;
$lockeys
->{
$ee
} =
$newce
;
}
}
if
(
$Equiv
{
$key
}) {
for
my
$eq
(@{
$Equiv
{
$key
} }) {
next
if
$key
=~ /^\[\.0000\.[^]]+\]\z/;
next
if
$lockeys
->{
$eq
};
next
if
$eq
eq
'3038'
;
$newentry
.=
sprintf
$ENT_FMT
,
$eq
,
$newce
,
$Name
{
$eq
};
$lockeys
->{
$eq
} =
$newce
;
}
}
}
if
(
$OtherEquiv
{
$e
}) {
for
my
$o
(@{
$OtherEquiv
{
$e
} }) {
my
(
$e
,
$rule
) = split_e_rule(
$o
);
my
$name
= getname(
$e
);
(
my
$newce
,
undef
) = parse_rule(
$e
,
$rule
,
$lockeys
);
next
if
$lockeys
->{
$e
};
$newentry
.=
sprintf
$ENT_FMT
,
$e
,
$newce
,
$name
;
$lockeys
->{
$e
} =
$newce
;
}
}
return
$newentry
;
}
sub
getunicode {
return
join
' '
,
map
{
sprintf
'%04X'
,
$_
}
unpack
'U*'
,
shift
;
}
sub
parse_element {
my
$e
=
shift
;
$e
=~ s/\{([A-Za-z
']+)\}/'
'.getunicode($1).'
'/ge;
$e
=~ s/ +/ /g;
trim(
$e
);
return
$e
;
}
sub
split_e_rule {
my
$line
=
shift
;
my
(
$e
,
$r
) =
split
/;/,
$line
;
return
(parse_element(
$e
),
$r
);
}
sub
getname {
my
$e
=
shift
;
return
$Name
{
$e
}
if
$Name
{
$e
};
my
@e
=
split
' '
,
$e
;
my
@name
=
map
{
$Name
{
$_
} ?
$Name
{
$_
} :
/^FD[DE][0-9A-F]\z/ ?
"noncharacter-$_"
:
'unknown'
}
@e
;
return
sprintf
'<%s>'
,
join
', '
,
@name
;
}
sub
parse_rule {
my
$e
=
shift
;
my
$e1
=
$e
=~ /^([0-9A-F]+)/ ? $1 :
''
;
my
$rule
=
shift
;
my
$lockeys
=
shift
;
my
$result
=
''
;
my
$simple_decomp
= 1;
for
(
my
$prerule
=
$rule
;
$rule
ne
''
;
$prerule
=
$rule
) {
$rule
=~ s/^ +//;
last
if
$rule
=~ /^
if
(
$rule
=~ s/^(
$RE_CE
)//o) {
my
$k
= $1;
my
$var
=
$k
=~ /^\[\*/ ? 1 : 0;
my
@c
= _getHexArray(
$k
);
@c
=
@c
[0..2]
if
!
$Use4th
;
$result
.= ce(
$var
,
@c
);
next
;
}
if
(
$rule
=~ s/^(<([0-9A-F ]+)>\+\+\+\?)//) {
my
$cr
= $1;
my
@c
=
split
' '
, $2;
my
$compat
=
$Keys
{
$e
};
my
$decomp
=
join
''
,
map
{
$Keys
{
$_
} ?
$Keys
{
$_
} : simple_cjk_deriv(
$_
)
}
@c
;
my
$regexp
=
$decomp
;
$regexp
=~ s/([\[\]\.\*])/\\$1/g;
$regexp
=~ s/\.00(?:0[1-9A-F]|1[0-9A-F])(?:\\\.[0-9A-F]+|)\\\]
/.(00(?:0[1-9A-F]|1[0-9A-F]))(?:\\.[0-9A-F]+|)\\\]/gx;
my
@tD
=
map
hex
(
$_
),
$decomp
=~ /^
$regexp
\z/;
my
@tC
=
map
hex
(
$_
),
$compat
=~ /^
$regexp
\z/;
croak
"wrong at $cr"
unless
@c
==
@tD
&&
@c
==
@tC
;
my
$r
=
join
' '
,
map
"<$c[$_]>+++"
.(
$tC
[
$_
] -
$tD
[
$_
]), 0..
@c
-1;
$rule
=
$r
.
$rule
;
next
;
}
my
$key
;
if
(
$rule
=~ s/^(<[0-9A-Za-z
'{ }]+>|[A-Za-z'
"])//) {
my
$e
= $1;
my
$c
=
$e
=~
tr
/<>//d ? parse_element(
$e
) : getunicode(
$e
);
croak
"<$c> is too short"
if
4 >
length
$c
;
$key
=
$lockeys
->{
$c
} ||
$Keys
{
$c
};
if
(!
defined
$key
) {
my
$u
=
hex
$c
;
my
@u
=
$Use4th
? (
$u
) : ();
my
@r
;
if
(SBase <=
$u
&&
$u
<= SFinal) {
@r
=
$lockeys
->{
$OvHang
}->(
$u
)
if
$lockeys
->{
$OvHang
};
}
else
{
@r
=
$lockeys
->{
$OvCJK
} ->(
$u
)
if
$lockeys
->{
$OvCJK
};
}
if
(
@r
) {
$key
=
join
''
,
map
{
ref
$_
? ce(0,
@$_
) : ce(0,
$_
, Min2Wt, Min3Wt,
@u
)
}
@r
;
}
}
}
my
@base
;
for
my
$k
(
$key
=~ /
$RE_CE
/go) {
my
$var
=
$k
=~ /^\[\*/ ? 1 : 0;
push
@base
, [
$var
, _getHexArray(
$k
)];
}
croak
"the rule seems wrong at $prerule"
if
!
@base
;
my
$replaced
= 0;
while
(
$rule
=~ s/^(([+-])\2*)(\d+)//) {
my
$idx
=
length
($1);
my
$num
= $2 eq
'-'
? -$3 : $3;
$base
[0][
$idx
] +=
$num
;
++
$replaced
;
}
$simple_decomp
= 0
if
$replaced
;
for
my
$c
(
@base
) {
$c
->[4] =
hex
$e1
if
$replaced
&&
$Use4th
;
$result
.= ce(
@$c
);
}
croak
"something wrong at $rule"
if
$prerule
eq
$rule
;
}
return
(
$result
,
$simple_decomp
);
}