#!perl -w
BEGIN {
$::IS_ASCII = (
ord
(
"A"
) == 65) ? 1 : 0;
$::IS_EBCDIC = (
ord
(
"A"
) == 193) ? 1 : 0;
chdir
't'
if
-d
't'
;
@INC
=
'../lib'
;
require
Config;
import
Config;
if
(
$Config
{
'extensions'
} !~ /\bStorable\b/) {
print
"1..0 # Skip: Storable was not built; Unicode::UCD uses Storable\n"
;
exit
0;
}
}
my
@warnings
;
local
$SIG
{__WARN__} =
sub
{
push
@warnings
,
@_
};
my
$expected_version
=
'15.0.0'
;
my
$current_version
= Unicode::UCD::UnicodeVersion;
my
$v_unicode_version
=
pack
"C*"
,
split
/\./,
$current_version
;
my
$unknown_script
= (
$v_unicode_version
lt v5.0.0)
?
'Common'
:
'Unknown'
;
my
$input_record_separator
= 7;
$/ =
$input_record_separator
;
my
$charinfo
;
is(charinfo(0x110000),
undef
,
"Verify charinfo() of non-unicode is undef"
);
if
(
$v_unicode_version
ge v3.2.0) {
is(
lc
charprop(0x110000,
'age'
),
lc
"Unassigned"
,
"Verify charprop(age) of non-unicode is Unassigned"
);
is(charprop(0x110000,
'in'
),
"Unassigned"
,
"Verify charprop(in), a bipartite Perl extension, works"
);
}
is(charprop(0x110000,
'Any'
),
undef
,
"Verify charprop of non-bipartite Perl extension returns undef"
);
my
$cp
= 0;
$charinfo
= charinfo(
$cp
);
is(
$charinfo
->{code},
"0000"
,
"Next tests are for charinfo and charprop; first NULL"
);
is(
$charinfo
->{name},
"<control>"
);
is(charprop(
$cp
,
"name"
),
""
);
if
(
$v_unicode_version
ge v6.1.0) {
is(charprop(
$cp
,
"name_alias"
),
"NULL: control,NUL: abbreviation"
);
}
is(
$charinfo
->{category},
"Cc"
);
is(charprop(
$cp
,
"category"
),
"Control"
);
is(
$charinfo
->{combining},
"0"
);
is(charprop(
$cp
,
"ccc"
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
"BN"
);
is(charprop(
$cp
,
"bc"
),
"Boundary_Neutral"
);
is(
$charinfo
->{decomposition},
""
);
is(charprop(
$cp
,
"dm"
),
"\0"
);
is(
$charinfo
->{decimal},
""
);
is(
$charinfo
->{digit},
""
);
is(
$charinfo
->{numeric},
""
);
is(charprop(
$cp
,
"nv"
),
"NaN"
);
is(
$charinfo
->{mirrored},
"N"
);
is(charprop(
$cp
,
"bidim"
),
"No"
);
is(
$charinfo
->{unicode10},
"NULL"
);
is(charprop(
$cp
,
"na1"
),
"NULL"
);
is(
$charinfo
->{comment},
""
);
is(charprop(
$cp
,
"isc"
),
""
);
is(
$charinfo
->{upper},
""
);
is(charprop(
$cp
,
"uc"
),
"\0"
);
is(
$charinfo
->{lower},
""
);
is(charprop(
$cp
,
"lc"
),
"\0"
);
is(
$charinfo
->{title},
""
);
is(charprop(
$cp
,
"tc"
),
"\0"
);
is(
$charinfo
->{block},
"Basic Latin"
);
is(charprop(
$cp
,
"block"
),
"Basic_Latin"
);
is(
$charinfo
->{script},
"Common"
)
if
$v_unicode_version
gt v3.0.1;
is(charprop(
$cp
,
"script"
),
"Common"
)
if
$v_unicode_version
gt v3.0.1;
$cp
= utf8::unicode_to_native(0x41);
my
$A_code
=
sprintf
(
"%04X"
,
ord
(
"A"
));
my
$a_code
=
sprintf
(
"%04X"
,
ord
(
"a"
));
$charinfo
= charinfo(
$cp
);
is(
$charinfo
->{code},
$A_code
,
"LATIN CAPITAL LETTER A"
);
is(
$charinfo
->{name},
"LATIN CAPITAL LETTER A"
);
is(charprop(
$cp
,
'name'
),
"LATIN CAPITAL LETTER A"
);
is(
$charinfo
->{category},
"Lu"
);
is(charprop(
$cp
,
'gc'
),
"Uppercase_Letter"
);
is(
$charinfo
->{combining},
"0"
);
is(charprop(
$cp
,
'ccc'
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
"L"
);
is(charprop(
$cp
,
'bc'
),
"Left_To_Right"
);
is(
$charinfo
->{decomposition},
""
);
is(charprop(
$cp
,
'dm'
),
"A"
);
is(
$charinfo
->{decimal},
""
);
is(
$charinfo
->{digit},
""
);
is(
$charinfo
->{numeric},
""
);
is(charprop(
$cp
,
'nv'
),
"NaN"
);
is(
$charinfo
->{mirrored},
"N"
);
is(charprop(
$cp
,
'bidim'
),
"No"
);
is(
$charinfo
->{unicode10},
""
);
is(charprop(
$cp
,
'na1'
),
""
);
is(
$charinfo
->{comment},
""
);
is(charprop(
$cp
,
'isc'
),
""
);
is(
$charinfo
->{upper},
""
);
is(charprop(
$cp
,
'uc'
),
"A"
);
is(
$charinfo
->{lower},
$a_code
);
is(charprop(
$cp
,
'lc'
),
"a"
);
is(
$charinfo
->{title},
""
);
is(charprop(
$cp
,
'tc'
),
"A"
);
is(
$charinfo
->{block},
"Basic Latin"
);
is(charprop(
$cp
,
'block'
),
"Basic_Latin"
);
is(
$charinfo
->{script},
"Latin"
)
if
$v_unicode_version
gt v3.0.1;
is(charprop(
$cp
,
'script'
),
"Latin"
)
if
$v_unicode_version
gt v3.0.1;
$cp
= 0x100;
$charinfo
= charinfo(
$cp
);
is(
$charinfo
->{code},
"0100"
,
"LATIN CAPITAL LETTER A WITH MACRON"
);
is(
$charinfo
->{name},
"LATIN CAPITAL LETTER A WITH MACRON"
);
is(charprop(
$cp
,
'name'
),
"LATIN CAPITAL LETTER A WITH MACRON"
);
is(
$charinfo
->{category},
"Lu"
);
is(charprop(
$cp
,
'gc'
),
"Uppercase_Letter"
);
is(
$charinfo
->{combining},
"0"
);
is(charprop(
$cp
,
'ccc'
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
"L"
);
is(charprop(
$cp
,
'bc'
),
"Left_To_Right"
);
is(
$charinfo
->{decomposition},
"$A_code 0304"
);
is(charprop(
$cp
,
'dm'
),
"A\x{0304}"
);
is(
$charinfo
->{decimal},
""
);
is(
$charinfo
->{digit},
""
);
is(
$charinfo
->{numeric},
""
);
is(charprop(
$cp
,
'nv'
),
"NaN"
);
is(
$charinfo
->{mirrored},
"N"
);
is(charprop(
$cp
,
'bidim'
),
"No"
);
is(
$charinfo
->{unicode10},
"LATIN CAPITAL LETTER A MACRON"
);
is(charprop(
$cp
,
'na1'
),
"LATIN CAPITAL LETTER A MACRON"
);
is(
$charinfo
->{comment},
""
);
is(charprop(
$cp
,
'isc'
),
""
);
is(
$charinfo
->{upper},
""
);
is(charprop(
$cp
,
'uc'
),
"\x{100}"
);
is(
$charinfo
->{lower},
"0101"
);
is(charprop(
$cp
,
'lc'
),
"\x{101}"
);
is(
$charinfo
->{title},
""
);
is(charprop(
$cp
,
'tc'
),
"\x{100}"
);
is(
$charinfo
->{block},
"Latin Extended-A"
);
is(charprop(
$cp
,
'block'
),
"Latin_Extended_A"
);
is(
$charinfo
->{script},
"Latin"
)
if
$v_unicode_version
gt v3.0.1;
is(charprop(
$cp
,
'script'
),
"Latin"
)
if
$v_unicode_version
gt v3.0.1;
$cp
= 0x590;
$charinfo
= charinfo(
$cp
);
is(
$charinfo
->{code},
undef
,
"0x0590 - unused Hebrew"
);
is(
$charinfo
->{name},
undef
);
is(charprop(
$cp
,
'name'
),
""
);
is(
$charinfo
->{category},
undef
);
is(charprop(
$cp
,
'gc'
),
"Unassigned"
);
is(
$charinfo
->{combining},
undef
);
is(charprop(
$cp
,
'ccc'
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
undef
);
if
(
$v_unicode_version
gt v3.2.0) {
is(charprop(
$cp
,
'bc'
),
"Right_To_Left"
);
}
is(
$charinfo
->{decomposition},
undef
);
is(charprop(
$cp
,
'dm'
),
"\x{590}"
);
is(
$charinfo
->{decimal},
undef
);
is(
$charinfo
->{digit},
undef
);
is(
$charinfo
->{numeric},
undef
);
is(charprop(
$cp
,
'nv'
),
"NaN"
);
is(
$charinfo
->{mirrored},
undef
);
is(charprop(
$cp
,
'bidim'
),
"No"
);
is(
$charinfo
->{unicode10},
undef
);
is(charprop(
$cp
,
'na1'
),
""
);
is(
$charinfo
->{comment},
undef
);
is(charprop(
$cp
,
'isc'
),
""
);
is(
$charinfo
->{upper},
undef
);
is(charprop(
$cp
,
'uc'
),
"\x{590}"
);
is(
$charinfo
->{lower},
undef
);
is(charprop(
$cp
,
'lc'
),
"\x{590}"
);
is(
$charinfo
->{title},
undef
);
is(charprop(
$cp
,
'tc'
),
"\x{590}"
);
is(
$charinfo
->{block},
undef
);
is(charprop(
$cp
,
'block'
),
"Hebrew"
);
is(
$charinfo
->{script},
undef
);
is(charprop(
$cp
,
'script'
),
$unknown_script
)
if
$v_unicode_version
gt
v3.0.1;
$cp
= 0x5d0;
$charinfo
= charinfo(
$cp
);
is(
$charinfo
->{code},
"05D0"
,
"05D0 - used Hebrew"
);
is(
$charinfo
->{name},
"HEBREW LETTER ALEF"
);
is(charprop(
$cp
,
'name'
),
"HEBREW LETTER ALEF"
);
is(
$charinfo
->{category},
"Lo"
);
is(charprop(
$cp
,
'gc'
),
"Other_Letter"
);
is(
$charinfo
->{combining},
"0"
);
is(charprop(
$cp
,
'ccc'
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
"R"
);
is(charprop(
$cp
,
'bc'
),
"Right_To_Left"
);
is(
$charinfo
->{decomposition},
""
);
is(charprop(
$cp
,
'dm'
),
"\x{5d0}"
);
is(
$charinfo
->{decimal},
""
);
is(
$charinfo
->{digit},
""
);
is(
$charinfo
->{numeric},
""
);
is(charprop(
$cp
,
'nv'
),
"NaN"
);
is(
$charinfo
->{mirrored},
"N"
);
is(charprop(
$cp
,
'bidim'
),
"No"
);
is(
$charinfo
->{unicode10},
""
);
is(charprop(
$cp
,
'na1'
),
""
);
is(
$charinfo
->{comment},
""
);
is(charprop(
$cp
,
'isc'
),
""
);
is(
$charinfo
->{upper},
""
);
is(charprop(
$cp
,
'uc'
),
"\x{5d0}"
);
is(
$charinfo
->{lower},
""
);
is(charprop(
$cp
,
'lc'
),
"\x{5d0}"
);
is(
$charinfo
->{title},
""
);
is(charprop(
$cp
,
'tc'
),
"\x{5d0}"
);
is(
$charinfo
->{block},
"Hebrew"
);
is(charprop(
$cp
,
'block'
),
"Hebrew"
);
is(
$charinfo
->{script},
"Hebrew"
)
if
$v_unicode_version
gt v3.0.1;
is(charprop(
$cp
,
'script'
),
"Hebrew"
)
if
$v_unicode_version
gt v3.0.1;
$cp
= 0xAC00;
$charinfo
= charinfo(
$cp
);
is(
$charinfo
->{code},
"AC00"
,
"HANGUL SYLLABLE U+AC00"
);
is(
$charinfo
->{name},
"HANGUL SYLLABLE GA"
);
is(charprop(
$cp
,
'name'
),
"HANGUL SYLLABLE GA"
);
is(
$charinfo
->{category},
"Lo"
);
is(charprop(
$cp
,
'gc'
),
"Other_Letter"
);
is(
$charinfo
->{combining},
"0"
);
is(charprop(
$cp
,
'ccc'
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
"L"
);
is(charprop(
$cp
,
'bc'
),
"Left_To_Right"
);
is(
$charinfo
->{decomposition},
"1100 1161"
);
is(charprop(
$cp
,
'dm'
),
"\x{1100}\x{1161}"
);
is(
$charinfo
->{decimal},
""
);
is(
$charinfo
->{digit},
""
);
is(
$charinfo
->{numeric},
""
);
is(charprop(
$cp
,
'nv'
),
"NaN"
);
is(
$charinfo
->{mirrored},
"N"
);
is(charprop(
$cp
,
'bidim'
),
"No"
);
is(
$charinfo
->{unicode10},
""
);
is(charprop(
$cp
,
'na1'
),
""
);
is(
$charinfo
->{comment},
""
);
is(charprop(
$cp
,
'isc'
),
""
);
is(
$charinfo
->{upper},
""
);
is(charprop(
$cp
,
'uc'
),
"\x{AC00}"
);
is(
$charinfo
->{lower},
""
);
is(charprop(
$cp
,
'lc'
),
"\x{AC00}"
);
is(
$charinfo
->{title},
""
);
is(charprop(
$cp
,
'tc'
),
"\x{AC00}"
);
is(
$charinfo
->{block},
"Hangul Syllables"
);
is(charprop(
$cp
,
'block'
),
"Hangul_Syllables"
);
is(
$charinfo
->{script},
"Hangul"
)
if
$v_unicode_version
gt v3.0.1;
is(charprop(
$cp
,
'script'
),
"Hangul"
)
if
$v_unicode_version
gt v3.0.1;
$cp
= 0xAE00;
$charinfo
= charinfo(
$cp
);
is(
$charinfo
->{code},
"AE00"
,
"HANGUL SYLLABLE U+AE00"
);
is(
$charinfo
->{name},
"HANGUL SYLLABLE GEUL"
);
is(charprop(
$cp
,
'name'
),
"HANGUL SYLLABLE GEUL"
);
is(
$charinfo
->{category},
"Lo"
);
is(charprop(
$cp
,
'gc'
),
"Other_Letter"
);
is(
$charinfo
->{combining},
"0"
);
is(charprop(
$cp
,
'ccc'
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
"L"
);
is(charprop(
$cp
,
'bc'
),
"Left_To_Right"
);
is(
$charinfo
->{decomposition},
"1100 1173 11AF"
);
is(charprop(
$cp
,
'dm'
),
"\x{1100}\x{1173}\x{11AF}"
);
is(
$charinfo
->{decimal},
""
);
is(
$charinfo
->{digit},
""
);
is(
$charinfo
->{numeric},
""
);
is(charprop(
$cp
,
'nv'
),
"NaN"
);
is(
$charinfo
->{mirrored},
"N"
);
is(charprop(
$cp
,
'bidim'
),
"No"
);
is(
$charinfo
->{unicode10},
""
);
is(charprop(
$cp
,
'na1'
),
""
);
is(
$charinfo
->{comment},
""
);
is(charprop(
$cp
,
'isc'
),
""
);
is(
$charinfo
->{upper},
""
);
is(charprop(
$cp
,
'uc'
),
"\x{AE00}"
);
is(
$charinfo
->{lower},
""
);
is(charprop(
$cp
,
'lc'
),
"\x{AE00}"
);
is(
$charinfo
->{title},
""
);
is(charprop(
$cp
,
'tc'
),
"\x{AE00}"
);
is(
$charinfo
->{block},
"Hangul Syllables"
);
is(charprop(
$cp
,
'block'
),
"Hangul_Syllables"
);
is(
$charinfo
->{script},
"Hangul"
)
if
$v_unicode_version
gt v3.0.1;
is(charprop(
$cp
,
'script'
),
"Hangul"
)
if
$v_unicode_version
gt v3.0.1;
if
(
$v_unicode_version
gt v3.0.1) {
$cp
= 0x1D400;
$charinfo
= charinfo(
$cp
);
is(
$charinfo
->{code},
"1D400"
,
"MATHEMATICAL BOLD CAPITAL A"
);
is(
$charinfo
->{name},
"MATHEMATICAL BOLD CAPITAL A"
);
is(charprop(
$cp
,
'name'
),
"MATHEMATICAL BOLD CAPITAL A"
);
is(
$charinfo
->{category},
"Lu"
);
is(charprop(
$cp
,
'gc'
),
"Uppercase_Letter"
);
is(
$charinfo
->{combining},
"0"
);
is(charprop(
$cp
,
'ccc'
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
"L"
);
is(charprop(
$cp
,
'bc'
),
"Left_To_Right"
);
is(
$charinfo
->{decomposition},
"<font> $A_code"
);
is(charprop(
$cp
,
'dm'
),
"A"
);
is(
$charinfo
->{decimal},
""
);
is(
$charinfo
->{digit},
""
);
is(
$charinfo
->{numeric},
""
);
is(charprop(
$cp
,
'nv'
),
"NaN"
);
is(
$charinfo
->{mirrored},
"N"
);
is(charprop(
$cp
,
'bidim'
),
"No"
);
is(
$charinfo
->{unicode10},
""
);
is(charprop(
$cp
,
'na1'
),
""
);
is(
$charinfo
->{comment},
""
);
is(charprop(
$cp
,
'isc'
),
""
);
is(
$charinfo
->{upper},
""
);
is(charprop(
$cp
,
'uc'
),
"\x{1D400}"
);
is(
$charinfo
->{lower},
""
);
is(charprop(
$cp
,
'lc'
),
"\x{1D400}"
);
is(
$charinfo
->{title},
""
);
is(charprop(
$cp
,
'tc'
),
"\x{1D400}"
);
is(
$charinfo
->{block},
"Mathematical Alphanumeric Symbols"
);
is(charprop(
$cp
,
'block'
),
"Mathematical_Alphanumeric_Symbols"
);
is(
$charinfo
->{script},
"Common"
);
is(charprop(
$cp
,
'script'
),
"Common"
);
}
if
(
$v_unicode_version
ge v4.1.0) {
$cp
= 0x9FBA;
$charinfo
= charinfo(0x9FBA);
is(
$charinfo
->{code},
"9FBA"
,
"U+9FBA"
);
is(
$charinfo
->{name},
"CJK UNIFIED IDEOGRAPH-9FBA"
);
is(charprop(
$cp
,
'name'
),
"CJK UNIFIED IDEOGRAPH-9FBA"
);
is(
$charinfo
->{category},
"Lo"
);
is(charprop(
$cp
,
'gc'
),
"Other_Letter"
);
is(
$charinfo
->{combining},
"0"
);
is(charprop(
$cp
,
'ccc'
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
"L"
);
is(charprop(
$cp
,
'bc'
),
"Left_To_Right"
);
is(
$charinfo
->{decomposition},
""
);
is(charprop(
$cp
,
'dm'
),
"\x{9FBA}"
);
is(
$charinfo
->{decimal},
""
);
is(
$charinfo
->{digit},
""
);
is(
$charinfo
->{numeric},
""
);
is(charprop(
$cp
,
'nv'
),
"NaN"
);
is(
$charinfo
->{mirrored},
"N"
);
is(charprop(
$cp
,
'bidim'
),
"No"
);
is(
$charinfo
->{unicode10},
""
);
is(charprop(
$cp
,
'na1'
),
""
);
is(
$charinfo
->{comment},
""
);
is(charprop(
$cp
,
'isc'
),
""
);
is(
$charinfo
->{upper},
""
);
is(charprop(
$cp
,
'uc'
),
"\x{9FBA}"
);
is(
$charinfo
->{lower},
""
);
is(charprop(
$cp
,
'lc'
),
"\x{9FBA}"
);
is(
$charinfo
->{title},
""
);
is(charprop(
$cp
,
'tc'
),
"\x{9FBA}"
);
is(
$charinfo
->{block},
"CJK Unified Ideographs"
);
is(charprop(
$cp
,
'block'
),
"CJK_Unified_Ideographs"
);
is(
$charinfo
->{script},
"Han"
);
is(charprop(
$cp
,
'script'
),
"Han"
);
}
is(charblock(0x590),
"Hebrew"
,
"0x0590 - Hebrew unused charblock"
);
is(charscript(0x590),
$unknown_script
,
"0x0590 - Hebrew unused charscript"
)
if
$v_unicode_version
gt v3.0.1;
is(charblock(0x1FFFF),
"No_Block"
,
"0x1FFFF - unused charblock"
);
{
my
@warnings
;
local
$SIG
{__WARN__} =
sub
{
push
@warnings
,
@_
};
is(charblock(
chr
(0x6237)),
undef
,
"Verify charblock of non-code point returns <undef>"
);
cmp_ok(
scalar
@warnings
,
'=='
, 1,
" ... and generates 1 warning"
);
like(
$warnings
[0],
qr/unknown code/
,
" ... with the right text"
);
}
my
$fraction_3_4_code
=
sprintf
(
"%04X"
, utf8::unicode_to_native(0xbe));
$cp
=
$fraction_3_4_code
;
$charinfo
= charinfo(
$fraction_3_4_code
);
is(
$charinfo
->{code},
$fraction_3_4_code
,
"VULGAR FRACTION THREE QUARTERS"
);
is(
$charinfo
->{name},
"VULGAR FRACTION THREE QUARTERS"
);
is(charprop(
$cp
,
'name'
),
"VULGAR FRACTION THREE QUARTERS"
);
is(
$charinfo
->{category},
"No"
);
is(charprop(
$cp
,
'gc'
),
"Other_Number"
);
is(
$charinfo
->{combining},
"0"
);
is(charprop(
$cp
,
'ccc'
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
"ON"
);
is(charprop(
$cp
,
'bc'
),
"Other_Neutral"
);
is(
$charinfo
->{decomposition},
"<fraction> "
.
sprintf
(
"%04X"
,
ord
"3"
)
.
" 2044 "
.
sprintf
(
"%04X"
,
ord
"4"
));
is(charprop(
$cp
,
'dm'
),
"3\x{2044}4"
);
is(
$charinfo
->{decimal},
""
);
is(
$charinfo
->{digit},
""
);
is(
$charinfo
->{numeric},
"3/4"
);
is(charprop(
$cp
,
'nv'
),
"0.75"
);
is(
$charinfo
->{mirrored},
"N"
);
is(charprop(
$cp
,
'bidim'
),
"No"
);
is(
$charinfo
->{unicode10},
"FRACTION THREE QUARTERS"
);
is(charprop(
$cp
,
'na1'
),
"FRACTION THREE QUARTERS"
);
is(
$charinfo
->{comment},
""
);
is(charprop(
$cp
,
'isc'
),
""
);
is(
$charinfo
->{upper},
""
);
is(charprop(
$cp
,
'uc'
),
chr
hex
$cp
);
is(
$charinfo
->{lower},
""
);
is(charprop(
$cp
,
'lc'
),
chr
hex
$cp
);
is(
$charinfo
->{title},
""
);
is(charprop(
$cp
,
'tc'
),
chr
hex
$cp
);
is(
$charinfo
->{block},
"Latin-1 Supplement"
);
is(charprop(
$cp
,
'block'
),
"Latin_1_Supplement"
);
is(
$charinfo
->{script},
"Common"
)
if
$v_unicode_version
gt v3.0.1;
is(charprop(
$cp
,
'script'
),
"Common"
)
if
$v_unicode_version
gt v3.0.1;
$cp
= 0x130;
$charinfo
= charinfo(
$cp
);
my
$I_code
=
sprintf
(
"%04X"
,
ord
(
"I"
));
my
$i_code
=
sprintf
(
"%04X"
,
ord
(
"i"
));
is(
$charinfo
->{code},
"0130"
,
"LATIN CAPITAL LETTER I WITH DOT ABOVE"
);
is(
$charinfo
->{name},
"LATIN CAPITAL LETTER I WITH DOT ABOVE"
);
is(charprop(
$cp
,
'name'
),
"LATIN CAPITAL LETTER I WITH DOT ABOVE"
);
is(
$charinfo
->{category},
"Lu"
);
is(charprop(
$cp
,
'gc'
),
"Uppercase_Letter"
);
is(
$charinfo
->{combining},
"0"
);
is(charprop(
$cp
,
'ccc'
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
"L"
);
is(charprop(
$cp
,
'bc'
),
"Left_To_Right"
);
is(
$charinfo
->{decomposition},
"$I_code 0307"
);
is(charprop(
$cp
,
'dm'
),
"I\x{0307}"
);
is(
$charinfo
->{decimal},
""
);
is(
$charinfo
->{digit},
""
);
is(
$charinfo
->{numeric},
""
);
is(charprop(
$cp
,
'nv'
),
"NaN"
);
is(
$charinfo
->{mirrored},
"N"
);
is(charprop(
$cp
,
'bidim'
),
"No"
);
is(
$charinfo
->{unicode10},
"LATIN CAPITAL LETTER I DOT"
);
is(charprop(
$cp
,
'na1'
),
"LATIN CAPITAL LETTER I DOT"
);
is(
$charinfo
->{comment},
""
);
is(charprop(
$cp
,
'isc'
),
""
);
is(
$charinfo
->{upper},
""
);
is(charprop(
$cp
,
'uc'
),
"\x{130}"
);
is(
$charinfo
->{lower},
$i_code
);
is(charprop(
$cp
,
'lc'
),
"i\x{307}"
)
if
$v_unicode_version
ge v3.2.0;
is(
$charinfo
->{title},
""
);
is(charprop(
$cp
,
'tc'
),
"\x{130}"
);
is(
$charinfo
->{block},
"Latin Extended-A"
);
is(charprop(
$cp
,
'block'
),
"Latin_Extended_A"
);
is(
$charinfo
->{script},
"Latin"
)
if
$v_unicode_version
gt v3.0.1;
is(charprop(
$cp
,
'script'
),
"Latin"
)
if
$v_unicode_version
gt v3.0.1;
$cp
= 0x1F80;
$charinfo
= charinfo(
$cp
);
is(
$charinfo
->{code},
"1F80"
,
"GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI"
);
is(
$charinfo
->{name},
"GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI"
);
is(charprop(
$cp
,
"name"
),
"GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI"
);
is(
$charinfo
->{category},
"Ll"
);
is(charprop(
$cp
,
"gc"
),
"Lowercase_Letter"
);
is(
$charinfo
->{combining},
"0"
);
is(charprop(
$cp
,
"ccc"
),
"Not_Reordered"
);
is(
$charinfo
->{bidi},
"L"
);
is(charprop(
$cp
,
"bc"
),
"Left_To_Right"
);
is(
$charinfo
->{decomposition},
"1F00 0345"
);
is(charprop(
$cp
,
"dm"
),
"\x{1F00}\x{0345}"
);
is(
$charinfo
->{decimal},
""
);
is(
$charinfo
->{digit},
""
);
is(
$charinfo
->{numeric},
""
);
is(charprop(
$cp
,
"nv"
),
"NaN"
);
is(
$charinfo
->{mirrored},
"N"
);
is(charprop(
$cp
,
"bidim"
),
"No"
);
is(
$charinfo
->{unicode10},
""
);
is(charprop(
$cp
,
"na1"
),
""
);
is(
$charinfo
->{comment},
""
);
is(charprop(
$cp
,
"isc"
),
""
);
is(
$charinfo
->{upper},
"1F88"
);
is(charprop(
$cp
,
"uc"
),
"\x{1F08}\x{0399}"
);
is(charprop(
$cp
,
"suc"
),
"\x{1F88}"
);
is(
$charinfo
->{lower},
""
);
is(charprop(
$cp
,
"lc"
),
"\x{1F80}"
);
is(
$charinfo
->{title},
"1F88"
);
is(charprop(
$cp
,
"tc"
),
"\x{1F88}"
);
is(
$charinfo
->{block},
"Greek Extended"
);
is(charprop(
$cp
,
"block"
),
"Greek_Extended"
);
is(
$charinfo
->{script},
"Greek"
)
if
$v_unicode_version
gt v3.0.1;
is(charprop(
$cp
,
"script"
),
"Greek"
)
if
$v_unicode_version
gt v3.0.1;
is(charprop(
ord
(
"A"
),
"foo"
),
undef
,
"Verify charprop of unknown property returns <undef>"
);
if
(
$v_unicode_version
ge v6.3.0) {
is(charprop(
ord
(
"("
),
"bpb"
),
")"
,
"Verify charprop figures out that s-type properties can be char"
);
}
is(charprop(
ord
(
"9"
),
"nv"
), 9,
"Verify charprop can adjust an ar-type property"
);
if
(
$v_unicode_version
ge v5.2.0) {
is(charprop(utf8::unicode_to_native(0xAD),
"NFKC_Casefold"
),
""
,
"Verify charprop can handle an \"\" in ae-type property"
);
}
my
$mark_props_ref
= charprops_all(0x300);
is(
$mark_props_ref
->{
'Bidi_Class'
},
"Nonspacing_Mark"
,
"Next tests are charprops_all of 0x300"
);
is(
$mark_props_ref
->{
'Bidi_Mirrored'
},
"No"
);
is(
$mark_props_ref
->{
'Canonical_Combining_Class'
},
"Above"
);
is(
$mark_props_ref
->{
'Case_Folding'
},
"\x{300}"
);
is(
$mark_props_ref
->{
'Decomposition_Mapping'
},
"\x{300}"
);
is(
$mark_props_ref
->{
'Decomposition_Type'
}, (
$v_unicode_version
le v4.0.0)
?
"none"
:
"None"
);
is(
$mark_props_ref
->{
'General_Category'
},
"Nonspacing_Mark"
);
if
(
$v_unicode_version
gt v5.1.0) {
is(
$mark_props_ref
->{
'ISO_Comment'
},
""
);
}
is(
$mark_props_ref
->{
'Lowercase_Mapping'
},
"\x{300}"
);
is(
$mark_props_ref
->{
'Name'
},
"COMBINING GRAVE ACCENT"
);
is(
$mark_props_ref
->{
'Numeric_Type'
},
"None"
);
is(
$mark_props_ref
->{
'Numeric_Value'
},
"NaN"
);
is(
$mark_props_ref
->{
'Simple_Case_Folding'
},
"\x{300}"
);
is(
$mark_props_ref
->{
'Simple_Lowercase_Mapping'
},
"\x{300}"
);
is(
$mark_props_ref
->{
'Simple_Titlecase_Mapping'
},
"\x{300}"
);
is(
$mark_props_ref
->{
'Simple_Uppercase_Mapping'
},
"\x{300}"
);
is(
$mark_props_ref
->{
'Titlecase_Mapping'
},
"\x{300}"
);
is(
$mark_props_ref
->{
'Unicode_1_Name'
},
"NON-SPACING GRAVE"
);
is(
$mark_props_ref
->{
'Uppercase_Mapping'
},
"\x{300}"
);
my
$charblocks
= charblocks();
ok(
exists
$charblocks
->{Thai},
'Thai charblock exists'
);
is(
$charblocks
->{Thai}->[0]->[0],
hex
(
'0e00'
));
ok(!
exists
$charblocks
->{PigLatin},
'PigLatin charblock does not exist'
);
if
(
$v_unicode_version
gt v3.0.1) {
my
$charscripts
= charscripts();
ok(
exists
$charscripts
->{Armenian},
'Armenian charscript exists'
);
is(
$charscripts
->{Armenian}->[0]->[0],
hex
(
'0531'
));
ok(!
exists
$charscripts
->{PigLatin},
'PigLatin charscript does not exist'
);
my
$charscript
;
$charscript
= charscript(
"12ab"
);
is(
$charscript
,
'Ethiopic'
,
'Ethiopic charscript'
);
$charscript
= charscript(
"0x12ab"
);
is(
$charscript
,
'Ethiopic'
);
$charscript
= charscript(
"U+12ab"
);
is(
$charscript
,
'Ethiopic'
);
my
$ranges
;
if
(
$v_unicode_version
gt v4.0.0) {
$ranges
= charscript(
'Ogham'
);
is(
$ranges
->[0]->[0],
hex
(
'1680'
),
'Ogham charscript'
);
is(
$ranges
->[0]->[1],
hex
(
'169C'
));
}
$ranges
= charscript(
'Cherokee'
);
ok(!charinrange(
$ranges
,
"139f"
),
'Cherokee charscript'
);
ok( charinrange(
$ranges
,
"13a0"
));
ok( charinrange(
$ranges
,
"13f4"
));
ok(!charinrange(
$ranges
,
"13ff"
));
}
my
$gc
= general_categories();
ok(
exists
$gc
->{L},
'has L'
);
is(
$gc
->{L},
'Letter'
,
'L is Letter'
);
is(
$gc
->{Lu},
'UppercaseLetter'
,
'Lu is UppercaseLetter'
);
my
$bt
= bidi_types();
ok(
exists
$bt
->{L},
'has L'
);
is(
$bt
->{L},
'Left-to-Right'
,
'L is Left-to-Right'
);
is(
$bt
->{AL},
'Right-to-Left Arabic'
,
'AL is Right-to-Left Arabic'
);
ok(
$current_version
le
$expected_version
,
"Verify there isn't a new Unicode version to upgrade to"
);
ok(!compexcl(0x0100),
'compexcl'
);
ok(!compexcl(0xD801),
'compexcl of surrogate'
);
ok(!compexcl(0x110000),
'compexcl of non-Unicode code point'
);
ok( compexcl(0x0958));
my
$casefold
;
$casefold
= casefold(utf8::unicode_to_native(0x41));
is(
$casefold
->{code},
$A_code
,
'casefold native(0x41) code'
);
is(
$casefold
->{status},
'C'
,
'casefold native(0x41) status'
);
is(
$casefold
->{mapping},
$a_code
,
'casefold native(0x41) mapping'
);
is(
$casefold
->{full},
$a_code
,
'casefold native(0x41) full'
);
is(
$casefold
->{simple},
$a_code
,
'casefold native(0x41) simple'
);
is(
$casefold
->{turkic},
""
,
'casefold native(0x41) turkic'
);
my
$sharp_s_code
=
sprintf
(
"%04X"
, utf8::unicode_to_native(0xdf));
my
$S_code
=
sprintf
(
"%04X"
,
ord
"S"
);
my
$s_code
=
sprintf
(
"%04X"
,
ord
"s"
);
if
(
$v_unicode_version
gt v3.0.0) {
$casefold
= casefold(utf8::unicode_to_native(0xdf));
is(
$casefold
->{code},
$sharp_s_code
,
'casefold native(0xDF) code'
);
is(
$casefold
->{status},
'F'
,
'casefold native(0xDF) status'
);
is(
$casefold
->{mapping},
"$s_code $s_code"
,
'casefold native(0xDF) mapping'
);
is(
$casefold
->{full},
"$s_code $s_code"
,
'casefold native(0xDF) full'
);
is(
$casefold
->{simple},
""
,
'casefold native(0xDF) simple'
);
is(
$casefold
->{turkic},
""
,
'casefold native(0xDF) turkic'
);
if
(
$v_unicode_version
eq v3.0.1) {
$casefold
= casefold(0x130);
is(
$casefold
->{code},
'0130'
,
'casefold 0x130 code'
);
is(
$casefold
->{status},
'C'
,
'casefold 0x130 status'
);
is(
$casefold
->{mapping},
$i_code
,
'casefold 0x130 mapping'
);
is(
$casefold
->{full},
$i_code
,
'casefold 0x130 full'
);
is(
$casefold
->{simple},
$i_code
,
'casefold 0x130 simple'
);
is(
$casefold
->{turkic},
""
,
'casefold 0x130 turkic'
);
$casefold
= casefold(0x131);
is(
$casefold
->{code},
'0131'
,
'casefold 0x131 code'
);
is(
$casefold
->{status},
'C'
,
'casefold 0x131 status'
);
is(
$casefold
->{mapping},
$i_code
,
'casefold 0x131 mapping'
);
is(
$casefold
->{full},
$i_code
,
'casefold 0x131 full'
);
is(
$casefold
->{simple},
$i_code
,
'casefold 0x131 simple'
);
is(
$casefold
->{turkic},
""
,
'casefold 0x131 turkic'
);
}
elsif
(
$v_unicode_version
lt v3.2.0) {
$casefold
= casefold(0x130);
is(
$casefold
->{code},
'0130'
,
'casefold 0x130 code'
);
is(
$casefold
->{status},
'I'
,
'casefold 0x130 status'
);
is(
$casefold
->{mapping},
$i_code
,
'casefold 0x130 mapping'
);
is(
$casefold
->{full},
$i_code
,
'casefold 0x130 full'
);
is(
$casefold
->{simple},
$i_code
,
'casefold 0x130 simple'
);
is(
$casefold
->{turkic},
$i_code
,
'casefold 0x130 turkic'
);
$casefold
= casefold(0x131);
is(
$casefold
->{code},
'0131'
,
'casefold 0x131 code'
);
is(
$casefold
->{status},
'I'
,
'casefold 0x131 status'
);
is(
$casefold
->{mapping},
$i_code
,
'casefold 0x131 mapping'
);
is(
$casefold
->{full},
$i_code
,
'casefold 0x131 full'
);
is(
$casefold
->{simple},
$i_code
,
'casefold 0x131 simple'
);
is(
$casefold
->{turkic},
$i_code
,
'casefold 0x131 turkic'
);
}
else
{
$casefold
= casefold(utf8::unicode_to_native(0x49));
is(
$casefold
->{code},
$I_code
,
'casefold native(0x49) code'
);
is(
$casefold
->{status},
'C'
,
'casefold native(0x49) status'
);
is(
$casefold
->{mapping},
$i_code
,
'casefold native(0x49) mapping'
);
is(
$casefold
->{full},
$i_code
,
'casefold native(0x49) full'
);
is(
$casefold
->{simple},
$i_code
,
'casefold native(0x49) simple'
);
is(
$casefold
->{turkic},
"0131"
,
'casefold native(0x49) turkic'
);
$casefold
= casefold(0x130);
is(
$casefold
->{code},
'0130'
,
'casefold 0x130 code'
);
is(
$casefold
->{status},
'F'
,
'casefold 0x130 status'
);
is(
$casefold
->{mapping},
"$i_code 0307"
,
'casefold 0x130 mapping'
);
is(
$casefold
->{full},
"$i_code 0307"
,
'casefold 0x130 full'
);
is(
$casefold
->{simple},
""
,
'casefold 0x130 simple'
);
is(
$casefold
->{turkic},
$i_code
,
'casefold 0x130 turkic'
);
}
if
(
$v_unicode_version
gt v3.0.1) {
$casefold
= casefold(0x1F88);
is(
$casefold
->{code},
'1F88'
,
'casefold 0x1F88 code'
);
is(
$casefold
->{status},
'S'
,
'casefold 0x1F88 status'
);
is(
$casefold
->{mapping},
'1F80'
,
'casefold 0x1F88 mapping'
);
is(
$casefold
->{full},
'1F00 03B9'
,
'casefold 0x1F88 full'
);
is(
$casefold
->{simple},
'1F80'
,
'casefold 0x1F88 simple'
);
is(
$casefold
->{turkic},
""
,
'casefold 0x1F88 turkic'
);
}
}
ok(!casefold(utf8::unicode_to_native(0x20)));
my
$casespec
;
ok(!casespec(utf8::unicode_to_native(0x41)));
$casespec
= casespec(utf8::unicode_to_native(0xdf));
ok(
$casespec
->{code} eq
$sharp_s_code
&&
$casespec
->{lower} eq
$sharp_s_code
&&
$casespec
->{title} eq
"$S_code $s_code"
&&
$casespec
->{upper} eq
"$S_code $S_code"
&&
!
defined
$casespec
->{condition},
'casespec native(0xDF)'
);
$casespec
= casespec(0x307);
if
(
$v_unicode_version
gt v3.1.0) {
ok(
$casespec
->{az}->{code} eq
'0307'
&& !
defined
$casespec
->{az}->{lower}
&&
$casespec
->{az}->{title} eq
'0307'
&&
$casespec
->{az}->{upper} eq
'0307'
&&
$casespec
->{az}->{condition} eq (
$v_unicode_version
le v3.2)
?
'az After_Soft_Dotted'
:
'az After_I'
,
'casespec 0x307'
);
}
for
(1) {
my
$a
=compexcl
$_
}
ok(1,
'compexcl read-only $_: perl #7305'
);
map
{compexcl
$_
} %{{
1
=>2}};
ok(1,
'compexcl read-only hash: perl #7305'
);
is(Unicode::UCD::_getcode(
'123'
), 123,
"_getcode(123)"
);
is(Unicode::UCD::_getcode(
'0123'
), 0x123,
"_getcode(0123)"
);
is(Unicode::UCD::_getcode(
'0x123'
), 0x123,
"_getcode(0x123)"
);
is(Unicode::UCD::_getcode(
'0X123'
), 0x123,
"_getcode(0X123)"
);
is(Unicode::UCD::_getcode(
'U+123'
), 0x123,
"_getcode(U+123)"
);
is(Unicode::UCD::_getcode(
'u+123'
), 0x123,
"_getcode(u+123)"
);
is(Unicode::UCD::_getcode(
'U+1234'
), 0x1234,
"_getcode(U+1234)"
);
is(Unicode::UCD::_getcode(
'U+12345'
), 0x12345,
"_getcode(U+12345)"
);
is(Unicode::UCD::_getcode(
'123x'
),
undef
,
"_getcode(123x)"
);
is(Unicode::UCD::_getcode(
'x123'
),
undef
,
"_getcode(x123)"
);
is(Unicode::UCD::_getcode(
'0x123x'
),
undef
,
"_getcode(x123)"
);
is(Unicode::UCD::_getcode(
'U+123x'
),
undef
,
"_getcode(x123)"
);
SKIP:
{
skip(
"Script property not in this release"
, 3)
if
$v_unicode_version
lt v3.1.0;
{
my
@warnings
;
local
$SIG
{__WARN__} =
sub
{
push
@warnings
,
@_
};
is(charscript(
chr
(0x6237)),
undef
,
"Verify charscript of non-code point returns <undef>"
);
cmp_ok(
scalar
@warnings
,
'=='
, 1,
" ... and generates 1 warning"
);
like(
$warnings
[0],
qr/unknown code/
,
" ... with the right text"
);
}
my
$r1
= charscript(
'Latin'
);
if
(ok(
defined
$r1
,
"Found Latin script"
)) {
skip(
"Latin range count will be wrong when using older Unicode release"
,
2)
if
$current_version
lt
$expected_version
;
my
$n1
=
@$r1
;
is(
$n1
, 39,
"number of ranges in Latin script (Unicode $expected_version)"
)
if
$::IS_ASCII;
shift
@$r1
while
@$r1
;
my
$r2
= charscript(
'Latin'
);
is(
@$r2
,
$n1
,
"modifying results should not mess up internal caches"
);
}
}
{
is(charinfo(0xdeadbeef),
undef
,
"[perl #23273] warnings in Unicode::UCD"
);
}
if
(
$v_unicode_version
ge v4.1.0) {
is(namedseq(
"KEYCAP DIGIT ZERO"
),
"0\x{FE0F}\x{20E3}"
,
"namedseq with char that varies under EBCDIC"
);
is(namedseq(
"KATAKANA LETTER AINU P"
),
"\x{31F7}\x{309A}"
,
"namedseq"
);
is(namedseq(
"KATAKANA LETTER AINU Q"
),
undef
);
is(namedseq(),
undef
);
is(namedseq(
qw(foo bar)
),
undef
);
my
@ns
= namedseq(
"KATAKANA LETTER AINU P"
);
is(
scalar
@ns
, 2);
is(
$ns
[0], 0x31F7);
is(
$ns
[1], 0x309A);
my
%ns
= namedseq();
is(
$ns
{
"KATAKANA LETTER AINU P"
},
"\x{31F7}\x{309A}"
);
@ns
= namedseq(42);
is(
@ns
, 0);
}
my
$ret_len
;
is(num(
"0"
), 0,
'Verify num("0") == 0'
);
is(num(
"0"
, \
$ret_len
), 0,
'Verify num("0", \$ret_len) == 0'
);
is(
$ret_len
, 1,
"... and the returned length is 1"
);
ok(!
defined
num(
""
, \
$ret_len
),
'Verify num("", \$ret_len) isnt defined'
);
is(
$ret_len
, 0,
"... and the returned length is 0"
);
ok(!
defined
num(
"A"
, \
$ret_len
),
'Verify num("A") isnt defined'
);
is(
$ret_len
, 0,
"... and the returned length is 0"
);
is(num(
"98765"
, \
$ret_len
), 98765,
'Verify num("98765") == 98765'
);
is(
$ret_len
, 5,
"... and the returned length is 5"
);
ok(!
defined
num(
"98765\N{FULLWIDTH DIGIT FOUR}"
, \
$ret_len
),
'Verify num("98765\N{FULLWIDTH DIGIT FOUR}") isnt defined'
);
is(
$ret_len
, 5,
"... but the returned length is 5"
);
my
$tai_lue_2
;
if
(
$v_unicode_version
ge v4.1.0) {
my
$tai_lue_1
= charnames::string_vianame(
"NEW TAI LUE DIGIT ONE"
);
$tai_lue_2
= charnames::string_vianame(
"NEW TAI LUE DIGIT TWO"
);
is(num(
$tai_lue_2
), 2,
'Verify num("\N{NEW TAI LUE DIGIT TWO}") == 2'
);
is(num(
$tai_lue_1
), 1,
'Verify num("\N{NEW TAI LUE DIGIT ONE}") == 1'
);
is(num(
$tai_lue_2
.
$tai_lue_1
), 21,
'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE DIGIT ONE}") == 21'
);
}
if
(
$v_unicode_version
ge v5.2.0) {
ok(!
defined
num(
$tai_lue_2
. charnames::string_vianame(
"NEW TAI LUE THAM DIGIT ONE"
), \
$ret_len
),
'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE THAM DIGIT ONE}") isnt defined'
);
is(
$ret_len
, 1,
"... but the returned length is 1"
);
ok(!
defined
num(charnames::string_vianame(
"NEW TAI LUE THAM DIGIT ONE"
)
.
$tai_lue_2
, \
$ret_len
),
'Verify num("\N{NEW TAI LUE THAM DIGIT ONE}\N{NEW TAI LUE DIGIT TWO}") isnt defined'
);
is(
$ret_len
, 1,
"... but the returned length is 1"
);
}
if
(
$v_unicode_version
ge v5.1.0) {
my
$cham_0
= charnames::string_vianame(
"CHAM DIGIT ZERO"
);
is(num(
$cham_0
. charnames::string_vianame(
"CHAM DIGIT THREE"
)), 3,
'Verify num("\N{CHAM DIGIT ZERO}\N{CHAM DIGIT THREE}") == 3'
);
if
(
$v_unicode_version
ge v5.2.0) {
ok(!
defined
num(
$cham_0
. charnames::string_vianame(
"JAVANESE DIGIT NINE"
),
\
$ret_len
),
'Verify num("\N{CHAM DIGIT ZERO}\N{JAVANESE DIGIT NINE}") isnt defined'
);
is(
$ret_len
, 1,
"... but the returned length is 1"
);
}
}
is(num(
"\N{SUPERSCRIPT TWO}"
), 2,
'Verify num("\N{SUPERSCRIPT TWO} == 2'
);
if
(
$v_unicode_version
ge v3.0.0) {
is(num(charnames::string_vianame(
"ETHIOPIC NUMBER TEN THOUSAND"
)), 10000,
'Verify num("\N{ETHIOPIC NUMBER TEN THOUSAND}") == 10000'
);
}
if
(
$v_unicode_version
ge v5.2.0) {
is(num(charnames::string_vianame(
"NORTH INDIC FRACTION ONE HALF"
)),
.5,
'Verify num("\N{NORTH INDIC FRACTION ONE HALF}") == .5'
);
is(num(
"\N{U+12448}"
), 9,
'Verify num("\N{U+12448}") == 9'
);
}
if
(
$v_unicode_version
gt v3.2.0) {
is(num(
"\N{U+5146}"
), 1000000000000,
'Verify num("\N{U+5146}") == 1000000000000'
);
}
sub
InKana {
<<'END'}
3040 309F
30A0 30FF
END
is(prop_aliases(
undef
),
undef
,
"prop_aliases(undef) returns <undef>"
);
is(prop_aliases(
"unknown property"
),
undef
,
"prop_aliases(<unknown property>) returns <undef>"
);
is(prop_aliases(
"InKana"
),
undef
,
"prop_aliases(<user-defined property>) returns <undef>"
);
is(prop_aliases(
"Perl_Decomposition_Mapping"
),
undef
,
"prop_aliases('Perl_Decomposition_Mapping') returns <undef> since internal-Perl-only"
);
is(prop_aliases(
"Perl_Charnames"
),
undef
,
"prop_aliases('Perl_Charnames') returns <undef> since internal-Perl-only"
);
is(prop_aliases(
"isgc"
),
undef
,
"prop_aliases('isgc') returns <undef> since is not covered Perl extension"
);
is(prop_aliases(
"Is_Is_Any"
),
undef
,
"prop_aliases('Is_Is_Any') returns <undef> since two is's"
);
is(prop_aliases(
"ccc=vr"
),
undef
,
"prop_aliases('ccc=vr') doesn't generate a warning"
);
my
%props
;
my
$extra_chars
=
"-_ "
;
$props
{
'Perl_Decimal_Digit'
} = 1;
my
@list
= prop_aliases(
"perldecimaldigit"
);
is_deeply(\
@list
,
[
"Perl_Decimal_Digit"
,
"Perl_Decimal_Digit"
],
"prop_aliases('perldecimaldigit') returns Perl_Decimal_Digit as both short and full names"
);
SKIP: {
skip
"PropertyAliases.txt is not in this Unicode version"
, 1
if
$v_unicode_version
lt v3.2.0;
open
my
$props
,
"<"
,
"../lib/unicore/PropertyAliases.txt"
or
die
"Can't open Unicode PropertyAliases.txt"
;
local
$/ =
"\n"
;
while
(<
$props
>) {
s/\s*
next
if
/^\s* $/x;
chomp
;
local
$/ =
$input_record_separator
;
my
$count
= 0;
my
$short_name
;
my
$full_name
;
my
@names_via_short
;
foreach
my
$alias
(
split
/\s*;\s*/) {
my
$mod_name
=
"$extra_chars$alias"
;
my
$loose
=
&Unicode::UCD::loose_name
(
lc
$alias
);
$props
{
$loose
} = 1;
my
@all_names
= prop_aliases(
$mod_name
);
if
(
grep
{
$_
eq
$loose
}
@Unicode::UCD::suppressed_properties
) {
is(
@all_names
, 0,
"prop_aliases('$mod_name') returns undef since $alias is not installed"
);
next
;
}
elsif
(!
@all_names
) {
fail(
"prop_aliases('$mod_name')"
);
diag(
"'$alias' is unknown to prop_aliases()"
);
next
;
}
if
(
$count
== 0) {
@names_via_short
= prop_aliases(
$mod_name
);
last
unless
is(
$names_via_short
[0],
$alias
,
"prop_aliases: '$alias' is the short name for '$mod_name'"
);
$short_name
=
$alias
;
}
elsif
(
$count
== 1) {
if
(
$alias
ne
$short_name
) {
my
@names_via_full
= prop_aliases(
$mod_name
);
is_deeply(\
@names_via_full
, \
@names_via_short
,
"prop_aliases() returns the same list for both '$short_name' and '$mod_name'"
);
}
is(prop_aliases(
$short_name
),
$alias
,
"prop_aliases: '$alias' is the long name for '$short_name'"
);
}
else
{
is_deeply(\
@all_names
, \
@names_via_short
,
"prop_aliases() returns the same list for both '$short_name' and '$mod_name'"
);
ok((
grep
{
$_
=~ /^
$alias
$/i }
@all_names
),
"prop_aliases: '$alias' is listed as an alias for '$mod_name'"
);
}
$count
++;
}
}
}
foreach
my
$alias
(
sort
keys
%Unicode::UCD::loose_to_file_of
) {
next
if
$alias
=~ /=/;
my
$lc_name
=
lc
$alias
;
my
$loose
=
&Unicode::UCD::loose_name
(
$lc_name
);
next
if
exists
$props
{
$loose
};
$props
{
$loose
} = 1;
my
$mod_name
=
"$extra_chars$alias"
;
my
@aliases
= prop_aliases(
$mod_name
);
my
$found_it
=
grep
{
&Unicode::UCD::loose_name
(
lc
$_
) eq
$lc_name
}
@aliases
;
if
(
$found_it
) {
pass(
"prop_aliases: '$lc_name' is listed as an alias for '$mod_name'"
);
}
elsif
(
$lc_name
=~ /l[_&]$/) {
my
@LC
= prop_aliases(
'Is_LC'
);
is_deeply(\
@aliases
, \
@LC
,
"prop_aliases: '$lc_name' returns the same list as 'Is_LC'"
);
}
else
{
my
$stripped
=
$lc_name
=~ s/^is//;
if
(
$stripped
) {
$found_it
=
grep
{
&Unicode::UCD::loose_name
(
lc
$_
) eq
$lc_name
}
@aliases
;
}
if
(!
$found_it
) {
$found_it
=
grep
{
&Unicode::UCD::loose_name
(s/^In_(.*)/\L$1/r) eq
$lc_name
}
@aliases
;
$lc_name
=
"in$lc_name"
if
$found_it
;
}
my
$message
=
"prop_aliases: '$lc_name' is listed as an alias for '$mod_name'"
;
(
$found_it
) ? pass(
$message
) : fail(
$message
);
}
}
for
my
$prop
(
qw(Alnum Blank Cntrl Digit Graph Print Word XDigit)
) {
my
@expected
= (
$prop
,
"XPosix$prop"
);
my
@got
= prop_aliases(
$prop
);
splice
@got
, 2;
is_deeply(\
@got
, \
@expected
,
"Got expected aliases for $prop"
);
}
my
$done_equals
= 0;
foreach
my
$alias
(
keys
%Unicode::UCD::stricter_to_file_of
) {
if
(
$alias
=~ /=/) {
next
if
$done_equals
;
$done_equals
= 1;
}
my
$lc_name
=
lc
$alias
;
my
@list
= prop_aliases(
$alias
);
if
(
$alias
=~ /^_/) {
is(
@list
, 0,
"prop_aliases: '$lc_name' returns an empty list since it is internal_only"
);
}
elsif
(
$alias
=~ /=/) {
is(
@list
, 0,
"prop_aliases: '$lc_name' returns an empty list since is illegal property name"
);
}
else
{
ok((
grep
{
lc
$_
eq
$lc_name
}
@list
),
"prop_aliases: '$lc_name' is listed as an alias for '$alias'"
);
}
}
is(prop_value_aliases(
"unknown property"
,
"unknown value"
),
undef
,
"prop_value_aliases(<unknown property>, <unknown value>) returns <undef>"
);
is(prop_value_aliases(
undef
,
undef
),
undef
,
"prop_value_aliases(undef, undef) returns <undef>"
);
is((prop_value_aliases(
"na"
,
"A"
)),
"A"
,
"test that prop_value_aliases returns its input for properties that don't have synonyms"
);
is(prop_value_aliases(
"isgc"
,
"C"
),
undef
,
"prop_value_aliases('isgc', 'C') returns <undef> since is not covered Perl extension"
);
is(prop_value_aliases(
"gc"
,
"isC"
),
undef
,
"prop_value_aliases('gc', 'isC') returns <undef> since is not covered Perl extension"
);
is(prop_value_aliases(
"Any"
,
"None"
),
undef
,
"prop_value_aliases('Any', 'None') returns <undef> since is Perl extension and 'None' is not valid"
);
is(prop_value_aliases(
"lc"
,
"A"
),
"A"
,
"prop_value_aliases('lc', 'A') returns its input, as docs say it does"
);
my
%pva_tested
;
SKIP: {
skip
"PropValueAliases.txt is not in this Unicode version"
, 1
if
$v_unicode_version
lt v3.2.0;
open
my
$propvalues
,
"<"
,
"../lib/unicore/PropValueAliases.txt"
or
die
"Can't open Unicode PropValueAliases.txt"
;
local
$/ =
"\n"
;
my
$prev_prop
=
""
;
my
@this_prop_values
;
while
(<
$propvalues
>) {
s/\s*
next
if
/^\s* $/x;
chomp
;
local
$/ =
$input_record_separator
;
s/CCC133/CCC132/g
if
$v_unicode_version
eq v6.1.0;
my
@fields
=
split
/\s*;\s*/;
my
$prop
=
shift
@fields
;
if
(
$prop
eq
'qc'
&&
$v_unicode_version
le v4.0.0) {
$prop
=
"NFKC_QC"
;
}
if
(
$prev_prop
ne
$prop
) {
if
(
$prev_prop
ne
""
) {
my
@ucd_function_values
= prop_values(
$prev_prop
);
@ucd_function_values
= ()
unless
@ucd_function_values
;
if
(
$prev_prop
eq
'ccc'
&&
$v_unicode_version
le v6.0.0) {
@ucd_function_values
=
grep
{ /\D/ }
@ucd_function_values
;
}
push
@this_prop_values
,
"Non_Canon"
if
$prev_prop
eq
'dt'
;
my
@file_values
=
undef
;
@file_values
=
sort
{
lc
(
$a
=~ s/_//gr) cmp
lc
(
$b
=~ s/_//gr) }
@this_prop_values
if
@this_prop_values
;
is_deeply(\
@ucd_function_values
, \
@file_values
,
"prop_values('$prev_prop') returns correct list of values"
);
}
$prev_prop
=
$prop
;
undef
@this_prop_values
;
}
my
$count
= 0;
my
$short_name
;
my
@names_via_short
;
my
$mod_prop
=
"$extra_chars$prop"
;
if
(
$prop
eq
'blk'
&&
$v_unicode_version
le v5.0.0) {
foreach
my
$element
(
@fields
) {
$element
=~ s/-/_/g;
}
}
if
(
$fields
[0] eq
'n/a'
) {
$fields
[0] =
$fields
[1];
}
elsif
(
$fields
[0] ne
$fields
[1]
&&
&Unicode::UCD::loose_name
(
lc
$fields
[0])
eq
&Unicode::UCD::loose_name
(
lc
$fields
[1])
&&
$fields
[1] !~ /[[:upper:]]/)
{
$fields
[1] =
$fields
[0];
}
splice
(
@fields
, 0, 0,
splice
(
@fields
, 1, 2))
if
$prop
eq
'ccc'
;
my
$loose_prop
=
&Unicode::UCD::loose_name
(
lc
$prop
);
my
$suppressed
=
grep
{
$_
eq
$loose_prop
}
@Unicode::UCD::suppressed_properties
;
push
@this_prop_values
,
$fields
[0]
unless
$suppressed
;
foreach
my
$value
(
@fields
) {
if
(
$suppressed
) {
is(prop_value_aliases(
$prop
,
$value
),
undef
,
"prop_value_aliases('$prop', '$value') returns undef for suppressed property $prop"
);
next
;
}
elsif
(
grep
{
$_
eq (
"$loose_prop="
.
&Unicode::UCD::loose_name
(
lc
$value
)) }
@Unicode::UCD::suppressed_properties
) {
is(prop_value_aliases(
$prop
,
$value
),
undef
,
"prop_value_aliases('$prop', '$value') returns undef for suppressed property $prop=$value"
);
next
;
}
my
$mod_value
=
"$extra_chars$value"
;
if
(
$value
=~ / ^ -? \d+ (?: [\/.] \d+ )? $ /x) {
is(prop_value_aliases(
$mod_prop
,
$mod_value
),
undef
,
"prop_value_aliases('$mod_prop', '$mod_value') returns undef because '$mod_value' should be strictly matched"
);
$mod_value
=
$value
;
}
if
(
$count
== 0) {
@names_via_short
= prop_value_aliases(
$mod_prop
,
$mod_value
);
last
unless
is(
$names_via_short
[0],
$value
,
"prop_value_aliases: In '$prop', '$value' is the short name for '$mod_value'"
);
$short_name
=
$value
;
}
elsif
(
$count
== 1) {
if
(
$value
ne
$short_name
) {
my
@names_via_full
=
prop_value_aliases(
$mod_prop
,
$mod_value
);
is_deeply(\
@names_via_full
, \
@names_via_short
,
"In '$prop', prop_value_aliases() returns the same list for both '$short_name' and '$mod_value'"
);
}
is(prop_value_aliases(
$prop
,
$short_name
),
$value
,
"'$value' is the long name for prop_value_aliases('$prop', '$short_name')"
);
}
else
{
my
@all_names
= prop_value_aliases(
$mod_prop
,
$mod_value
);
is_deeply(\
@all_names
, \
@names_via_short
,
"In '$prop', prop_value_aliases() returns the same list for both '$short_name' and '$mod_value'"
);
ok((
grep
{
&Unicode::UCD::loose_name
(
lc
$_
) eq
&Unicode::UCD::loose_name
(
lc
$value
) } prop_value_aliases(
$prop
,
$short_name
)),
"'$value' is listed as an alias for prop_value_aliases('$prop', '$short_name')"
);
}
$pva_tested
{
&Unicode::UCD::loose_name
(
lc
$prop
) .
"="
.
&Unicode::UCD::loose_name
(
lc
$value
)} = 1;
$count
++;
}
}
}
foreach
my
$hash
(\
%Unicode::UCD::loose_to_file_of
, \
%Unicode::UCD::stricter_to_file_of
) {
foreach
my
$test
(
sort
keys
%$hash
) {
next
if
exists
$pva_tested
{
$test
};
my
(
$prop
,
$value
) =
split
"="
,
$test
;
next
unless
defined
$value
;
my
$mod_value
;
if
(
$hash
== \
%Unicode::UCD::loose_to_file_of
) {
$mod_value
=
"$extra_chars$value"
;
}
else
{
next
if
$value
!~ /\D/ &&
exists
$hash
->{
"$prop=$value.0"
};
next
unless
is(prop_value_aliases(
$prop
,
"$extra_chars$value"
),
undef
,
"prop_value_aliases('$prop', '$extra_chars$value') returns undef since '$value' should be strictly matched"
),
$mod_value
=
$value
;
while
(
$mod_value
=~ s/(\d)(\d)/$1_$2/g) {}
}
my
$mod_prop
=
"$extra_chars$prop"
;
if
(
$prop
eq
'gc'
&&
$value
=~ /l[_&]$/) {
my
@LC
= prop_value_aliases(
'gc'
,
'lc'
);
my
@l_
= prop_value_aliases(
$mod_prop
,
$mod_value
);
is_deeply(\
@l_
, \
@LC
,
"prop_value_aliases('$mod_prop', '$mod_value) returns the same list as prop_value_aliases('gc', 'lc')"
);
}
else
{
ok((
grep
{
&Unicode::UCD::loose_name
(
lc
$_
) eq
&Unicode::UCD::loose_name
(
lc
$value
) }
prop_value_aliases(
$mod_prop
,
$mod_value
)),
"'$value' is listed as an alias for prop_value_aliases('$mod_prop', '$mod_value')"
);
}
}
}
undef
%pva_tested
;
no
warnings
'once'
;
my
$prop
;
my
(
$invlist_ref
,
$invmap_ref
,
$format
,
$missing
);
if
($::IS_ASCII) {
$prop
=
"uc"
;
(
$invlist_ref
,
$invmap_ref
,
$format
,
$missing
) = prop_invmap(
$prop
);
is(
$format
,
'al'
,
"prop_invmap() format of '$prop' is 'al'"
);
is(
$missing
,
'0'
,
"prop_invmap() missing of '$prop' is '0'"
);
is(
$invlist_ref
->[1], 0x61,
"prop_invmap('$prop') list[1] is 0x61"
);
is(
$invmap_ref
->[1], 0x41,
"prop_invmap('$prop') map[1] is 0x41"
);
$prop
=
"upper"
;
(
$invlist_ref
,
$invmap_ref
,
$format
,
$missing
) = prop_invmap(
$prop
);
is(
$format
,
's'
,
"prop_invmap() format of '$prop' is 's"
);
is(
$missing
,
'N'
,
"prop_invmap() missing of '$prop' is 'N'"
);
is(
$invlist_ref
->[1], 0x41,
"prop_invmap('$prop') list[1] is 0x41"
);
is(
$invmap_ref
->[1],
'Y'
,
"prop_invmap('$prop') map[1] is 'Y'"
);
$prop
=
"lower"
;
(
$invlist_ref
,
$invmap_ref
,
$format
,
$missing
) = prop_invmap(
$prop
);
is(
$format
,
's'
,
"prop_invmap() format of '$prop' is 's'"
);
is(
$missing
,
'N'
,
"prop_invmap() missing of '$prop' is 'N'"
);
is(
$invlist_ref
->[1], 0x61,
"prop_invmap('$prop') list[1] is 0x61"
);
is(
$invmap_ref
->[1],
'Y'
,
"prop_invmap('$prop') map[1] is 'Y'"
);
$prop
=
"lc"
;
(
$invlist_ref
,
$invmap_ref
,
$format
,
$missing
) = prop_invmap(
$prop
);
my
$lc_format
= (
$v_unicode_version
ge v3.2.0) ?
'al'
:
'a'
;
is(
$format
,
$lc_format
,
"prop_invmap() format of '$prop' is '$lc_format"
);
is(
$missing
,
'0'
,
"prop_invmap() missing of '$prop' is '0'"
);
is(
$invlist_ref
->[1], 0x41,
"prop_invmap('$prop') list[1] is 0x41"
);
is(
$invmap_ref
->[1], 0x61,
"prop_invmap('$prop') map[1] is 0x61"
);
}
if
(
$v_unicode_version
gt v3.1.0) {
$prop
=
"ASCII_Hex_Digit"
;
(
$invlist_ref
,
$invmap_ref
,
$format
,
$missing
) = prop_invmap(
$prop
);
is(
$format
,
's'
,
"prop_invmap() format of '$prop' is 's'"
);
is(
$missing
,
'N'
,
"prop_invmap() missing of '$prop' is 'N'"
);
if
($::IS_ASCII) {
is_deeply(
$invlist_ref
, [ 0x0000, 0x0030, 0x003A,
0x0041, 0x0047,
0x0061, 0x0067, 0x110000
],
"prop_invmap('$prop') code point list is correct"
);
}
elsif
($::IS_EBCDIC) {
is_deeply(
$invlist_ref
, [
utf8::unicode_to_native(0x0000),
utf8::unicode_to_native(0x0061), utf8::unicode_to_native(0x0066) + 1,
utf8::unicode_to_native(0x0041), utf8::unicode_to_native(0x0046) + 1,
utf8::unicode_to_native(0x0030), utf8::unicode_to_native(0x0039) + 1,
utf8::unicode_to_native(0x110000)
],
"prop_invmap('$prop') code point list is correct"
);
}
is_deeply(
$invmap_ref
, [
'N'
,
'Y'
,
'N'
,
'Y'
,
'N'
,
'Y'
,
'N'
,
'N'
] ,
"prop_invmap('$prop') map list is correct"
);
}
is(prop_invlist(
"Unknown property"
),
undef
,
"prop_invlist(<Unknown property>) returns undef"
);
is(prop_invlist(
undef
),
undef
,
"prop_invlist(undef) returns undef"
);
is(prop_invlist(
"Any"
), 2,
"prop_invlist('Any') returns the number of elements in scalar context"
);
my
@invlist
= prop_invlist(
"Is_Any"
);
is_deeply(\
@invlist
, [ 0, 0x110000 ],
"prop_invlist works on 'Is_' prefixes"
);
is(prop_invlist(
"Is_Is_Any"
),
undef
,
"prop_invlist('Is_Is_Any') returns <undef> since two is's"
);
is(prop_invlist(
"InKana"
),
undef
,
"prop_invlist(<user-defined property returns undef>)"
);
if
(
$v_unicode_version
gt v3.1.0) {
if
($::IS_ASCII) {
@invlist
= prop_invlist(
"AHex"
);
is_deeply(\
@invlist
, [ 0x0030, 0x003A, 0x0041,
0x0047, 0x0061, 0x0067 ],
"prop_invlist('AHex') is exactly the expected set of points"
);
@invlist
= prop_invlist(
"AHex=f"
);
is_deeply(\
@invlist
, [ 0x0000, 0x0030, 0x003A, 0x0041,
0x0047, 0x0061, 0x0067 ],
"prop_invlist('AHex=f') is exactly the expected set of points"
);
}
elsif
($::IS_EBCDIC) {
@invlist
= prop_invlist(
"AHex"
);
is_deeply(\
@invlist
, [
utf8::unicode_to_native(0x0061), utf8::unicode_to_native(0x0066) + 1,
utf8::unicode_to_native(0x0041), utf8::unicode_to_native(0x0046) + 1,
utf8::unicode_to_native(0x0030), utf8::unicode_to_native(0x0039) + 1,
],
"prop_invlist('AHex') is exactly the expected set of points"
);
@invlist
= prop_invlist(
"AHex=f"
);
is_deeply(\
@invlist
, [
utf8::unicode_to_native(0x0000),
utf8::unicode_to_native(0x0061),
utf8::unicode_to_native(0x0066) + 1,
utf8::unicode_to_native(0x0041),
utf8::unicode_to_native(0x0046) + 1,
utf8::unicode_to_native(0x0030),
utf8::unicode_to_native(0x0039) + 1,
],
"prop_invlist('AHex=f') is exactly the expected set of points"
);
}
}
sub
fail_with_diff ($$$$) {
my
(
$prop
,
$official
,
$constructed
,
$tested_function_name
) =
@_
;
if
(!
$ENV
{PERL_TEST_DIFF}) {
is(
$constructed
,
$official
,
"$tested_function_name('$prop')"
);
diag(
"Set environment variable PERL_TEST_DIFF=diff_tool to see just "
.
"the differences."
);
return
;
}
fail(
"$tested_function_name('$prop')"
);
my
$off
= File::Temp->new();
local
$/ =
"\n"
;
chomp
$official
;
print
$off
$official
,
"\n"
;
close
$off
||
die
"Can't close official"
;
chomp
$constructed
;
my
$gend
= File::Temp->new();
print
$gend
$constructed
,
"\n"
;
close
$gend
||
die
"Can't close gend"
;
my
$diff
= File::Temp->new();
system
(
"$ENV{PERL_TEST_DIFF} $off $gend > $diff"
);
open
my
$fh
,
"<"
,
$diff
||
die
"Can't open $diff"
;
my
@diffs
= <
$fh
>;
diag(
"In the diff output below '<' marks lines from the filesystem tables;\n'>' are from $tested_function_name()"
);
diag(
@diffs
);
}
my
%tested_invlist
;
foreach
my
$set_of_tables
(\
%Unicode::UCD::stricter_to_file_of
, \
%Unicode::UCD::loose_to_file_of
)
{
foreach
my
$table
(
sort
keys
%$set_of_tables
) {
my
$mod_table
;
my
(
$prop_only
,
$value
) =
split
"="
,
$table
;
if
(
defined
$value
) {
if
(
$set_of_tables
== \
%Unicode::UCD::loose_to_file_of
) {
$value
=
"$extra_chars$value"
;
}
else
{
next
unless
is(prop_invlist(
"$prop_only=$extra_chars$value"
),
undef
,
"prop_invlist('$prop_only=$extra_chars$value') returns undef since should be strictly matched"
);
while
(
$value
=~ s/(\d)(\d)/$1_$2/g) {}
}
$mod_table
=
"$extra_chars$prop_only = $value"
;
}
else
{
if
(
$set_of_tables
== \
%Unicode::UCD::loose_to_file_of
) {
$mod_table
=
"$extra_chars$table"
;
}
else
{
$mod_table
=
$table
;
while
(
$mod_table
=~ s/(\d)(\d)/$1_$2/g) {}
}
}
my
@tested
= prop_invlist(
$mod_table
);
if
(
$table
=~ /^_/) {
is(
@tested
, 0,
"prop_invlist('$mod_table') returns an empty list since is internal-only"
);
next
;
}
my
$file
=
$set_of_tables
->{
$table
};
if
(
exists
$tested_invlist
{
$file
}) {
is_deeply(\
@tested
,
$tested_invlist
{
$file
},
"prop_invlist('$mod_table') gave same results as its name synonym"
);
next
;
}
$tested_invlist
{
$file
} = dclone \
@tested
;
my
$invert
=
$file
=~ s/!//;
my
$official
;
if
(
$file
=~ s!^
$official
=
$Unicode::UCD::inline_definitions
[
$file
];
}
else
{
$official
=
do
"unicore/lib/$file.pl"
;
}
$official
=~ s/\s*(
local
$/ =
"\n"
;
chomp
$official
;
$/ =
$input_record_separator
;
if
(
$invert
) {
if
(
@tested
&&
$tested
[0] == 0) {
shift
@tested
;
}
else
{
unshift
@tested
, 0;
}
}
my
$tested
=
join
"\n"
, (
"V"
.
scalar
@tested
),
@tested
;
local
$/ =
"\n"
;
chomp
$tested
;
$/ =
$input_record_separator
;
if
(
$tested
ne
$official
) {
fail_with_diff(
$mod_table
,
$official
,
$tested
,
"prop_invlist"
);
next
;
}
pass(
"prop_invlist('$mod_table')"
);
}
}
@list
= prop_invmap(
"Unknown property"
);
is (
@list
, 0,
"prop_invmap(<Unknown property>) returns an empty list"
);
@list
= prop_invmap(
undef
);
is (
@list
, 0,
"prop_invmap(undef) returns an empty list"
);
ok (!
eval
"prop_invmap('gc')"
&& $@ ne
""
,
"prop_invmap('gc') dies in scalar context"
);
@list
= prop_invmap(
"_X_Begin"
);
is (
@list
, 0,
"prop_invmap(<internal property>) returns an empty list"
);
@list
= prop_invmap(
"InKana"
);
is(
@list
, 0,
"prop_invmap(<user-defined property returns undef>)"
);
@list
= prop_invmap(
"Perl_Decomposition_Mapping"
),
undef
,
is(
@list
, 0,
"prop_invmap('Perl_Decomposition_Mapping') returns <undef> since internal-Perl-only"
);
@list
= prop_invmap(
"Perl_Charnames"
),
undef
,
is(
@list
, 0,
"prop_invmap('Perl_Charnames') returns <undef> since internal-Perl-only"
);
@list
= prop_invmap(
"Is_Is_Any"
);
is(
@list
, 0,
"prop_invmap('Is_Is_Any') returns <undef> since two is's"
);
my
@legacy_file_format
= (
qw( Bidi_Mirroring_Glyph
NFKC_Casefold
)
);
my
%tested_invmaps
;
PROPERTY:
foreach
my
$prop
(
sort
(
keys
%props
)) {
my
$loose_prop
=
&Unicode::UCD::loose_name
(
lc
$prop
);
my
$suppressed
=
grep
{
$_
eq
$loose_prop
}
@Unicode::UCD::suppressed_properties
;
my
$actual_lookup_prop
;
my
$display_prop
;
my
(
$name
,
$full_name
) = prop_aliases(
$prop
);
if
(!
$name
) {
if
(!
$suppressed
) {
fail(
"prop_invmap('$prop')"
);
diag(
"is unknown to prop_aliases(), and we need it in order to test prop_invmap"
);
}
next
PROPERTY;
}
$name
=
&Unicode::UCD::loose_name
(
lc
$name
);
$name
=
&Unicode::UCD::loose_name
(
lc
$prop
)
if
exists
$Unicode::UCD::combination_property
{
$name
};
$display_prop
=
"$extra_chars$prop"
unless
$display_prop
;
$actual_lookup_prop
=
$display_prop
unless
$actual_lookup_prop
;
my
(
$invlist_ref
,
$invmap_ref
,
$format
,
$missing
) = prop_invmap(
$actual_lookup_prop
);
my
$return_ref
= [
$invlist_ref
,
$invmap_ref
,
$format
,
$missing
];
if
(
exists
$tested_invmaps
{
$name
}) {
is_deeply(
$return_ref
,
$tested_invmaps
{
$name
},
"prop_invmap('$display_prop') gave same results as its synonym, '$name'"
);
next
PROPERTY;
}
$tested_invmaps
{
$name
} = dclone
$return_ref
;
if
(
$suppressed
) {
if
(
defined
$format
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"did not return undef for suppressed property $prop"
);
}
next
PROPERTY;
}
elsif
(!
defined
$format
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"'$prop' is unknown to prop_invmap()"
);
next
PROPERTY;
}
if
(
@$invlist_ref
!=
@$invmap_ref
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"invlist has "
.
scalar
@$invlist_ref
.
" while invmap has "
.
scalar
@$invmap_ref
.
" elements"
);
next
PROPERTY;
}
if
(
$invlist_ref
->[-1] != 0x110000) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"The last inversion list element is not 0x110000"
);
next
PROPERTY;
}
my
$upper_limit_subtract
;
if
(
$invmap_ref
->[-1] eq
$missing
) {
$upper_limit_subtract
= 1;
}
elsif
(
$invmap_ref
->[-1] eq
'Y'
&& !
grep
{
$_
!~ /[YN]/ }
@$invmap_ref
) {
$upper_limit_subtract
= 0;
if
(
$invlist_ref
->[-2] <= 0x10FFFF &&
$invmap_ref
->[-2] eq
'Y'
) {
pop
@$invlist_ref
;
pop
@$invmap_ref
;
}
}
else
{
fail(
"prop_invmap('$display_prop')"
);
diag(
"The last inversion list element is '$invmap_ref->[-1]', and should be '$missing'"
);
next
PROPERTY;
}
if
(
$name
eq
'bmg'
) {
if
(
$missing
ne
""
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"The missings should be \"\"; got '$missing'"
);
next
PROPERTY;
}
}
elsif
(
$format
=~ /^ a (?!r) /x) {
if
(
$full_name
eq
'Perl_Decimal_Digit'
) {
if
(
$missing
ne
""
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"The missings should be \"\"; got '$missing'"
);
next
PROPERTY;
}
}
}
elsif
(
$missing
=~ /[<>]/) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"The missings should NOT be something with <...>'"
);
next
PROPERTY;
}
my
$proxy_prop
=
$name
;
if
(
$full_name
eq
'Present_In'
) {
$proxy_prop
=
"age"
;
}
elsif
(
$full_name
eq
'Simple_Case_Folding'
||
$full_name
=~ /Simple_ (.) .*? case_Mapping /x)
{
if
(
$full_name
eq
'Simple_Case_Folding'
) {
$proxy_prop
=
'cf'
;
}
else
{
$proxy_prop
=
lc
$1 .
"c"
;
}
if
(
$format
ne
"a"
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"The format should be 'a'; got '$format'"
);
next
PROPERTY;
}
}
if
(
$format
!~ / ^ (?: a [der]? | ale? | n | sl? ) $ /x) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"Unknown format '$format'"
);
next
PROPERTY;
}
my
$base_file
;
my
$official
;
if
(
$name
ne
'na'
&& (
$name
eq
'blk'
||
defined
(
$base_file
=
$Unicode::UCD::loose_property_to_file_of
{
$proxy_prop
})
||
exists
$Unicode::UCD::loose_to_file_of
{
$proxy_prop
}
||
$name
eq
"dm"
))
{
my
$file
;
my
$is_binary
= 0;
if
(
$name
eq
'blk'
) {
$base_file
=
"This is a dummy name"
;
my
$blocks_ref
= charblocks();
if
($::IS_EBCDIC) {
my
%new_blocks
;
my
$index
= 0;
foreach
my
$block
(
values
%$blocks_ref
) {
foreach
my
$range
(
@$block
) {
$new_blocks
{
$index
++}[0] =
$range
;
}
}
$blocks_ref
= \
%new_blocks
;
}
$official
=
""
;
for
my
$range
(
sort
{
$a
->[0][0] <=>
$b
->[0][0] }
values
%$blocks_ref
)
{
if
(
$range
->[0][0] ==
$range
->[0][1]) {
$official
.=
sprintf
(
"%X\t\t%s\n"
,
$range
->[0][0],
$range
->[0][2]);
}
else
{
$official
.=
sprintf
(
"%X\t%X\t%s\n"
,
$range
->[0][0],
$range
->[0][1],
$range
->[0][2]);
}
}
}
else
{
$base_file
=
"Decomposition"
if
$format
eq
'ad'
;
if
(!
defined
$base_file
) {
$base_file
=
$Unicode::UCD::loose_to_file_of
{
$proxy_prop
};
$is_binary
= (
$base_file
=~ s/!//) ? -1 : 1;
$base_file
=
"lib/$base_file"
unless
$base_file
=~ m!^
}
if
(
$base_file
=~ s!^
$official
=
$Unicode::UCD::inline_definitions
[
$base_file
];
}
else
{
$official
=
do
"unicore/$base_file.pl"
;
}
$official
=~ s/\s*(
if
(
$format
eq
'ad'
) {
my
@official
=
split
/\n/,
$official
;
$official
=
""
;
foreach
my
$line
(
@official
) {
my
(
$start
,
$end
,
$value
)
=
$line
=~ / ^ (.+?) \t (.*?) \t (.+?)
\s* ( \
$value
=~ s/<.*?> //;
$official
.=
"$start\t\t$value\n"
;
if
(
$end
ne
""
) {
for
my
$i
(
hex
(
$start
) + 1 ..
hex
$end
) {
$official
.=
sprintf
"%X\t\t%s\n"
,
$i
,
$value
;
}
}
}
}
}
local
$/ =
"\n"
;
chomp
$official
;
$/ =
$input_record_separator
;
my
$swash_name
=
$Unicode::UCD::file_to_swash_name
{
$base_file
};
my
$specials_ref
;
my
$file_format
;
if
(
$swash_name
) {
$specials_ref
=
$Unicode::UCD::SwashInfo
{
$swash_name
}{
'specials_name'
};
if
(
$specials_ref
) {
no
strict
'refs'
;
$specials_ref
= \%{
$specials_ref
};
}
$file_format
=
$Unicode::UCD::SwashInfo
{
$swash_name
}{
'format'
};
}
my
$file_range_format
= (
grep
{
$full_name
eq
$_
}
@legacy_file_format
)
?
"%04X"
:
"%X"
;
my
$file_map_format
= (
$full_name
eq
'Decomposition_Mapping'
)
?
"%04X"
:
$file_range_format
;
if
(
$full_name
=~ /^ ( Case_Folding
| (Lower|Title|Upper) case_Mapping )
$ /x)
{
my
@list
;
for
(
split
"\n"
,
$official
) {
my
(
$start
,
$end
,
$value
) = / ^ (.+?) \t (.*?) \t (.+?)
\s* ( \
$end
=
$start
if
$end
eq
""
;
push
@list
, [
hex
$start
,
hex
$end
,
hex
$value
];
}
my
$i
= 0;
foreach
my
$utf8_cp
(
sort
keys
%$specials_ref
) {
my
$cp
=
$utf8_cp
;
utf8::decode(
$cp
);
$cp
=
ord
$cp
;
while
(
$i
<
@list
-1 ) {
last
if
$cp
<=
$list
[
$i
][1];
$i
++;
}
next
if
$cp
<
$list
[
$i
][0];
if
(
$cp
==
$list
[
$i
][0]) {
if
(
$list
[
$i
][1] >
$list
[
$i
][0]) {
$list
[
$i
][0]++;
}
else
{
splice
@list
,
$i
, 1;
}
}
else
{
splice
@list
,
$i
, 1,
[
$list
[
$i
][0],
$cp
- 1,
$list
[
$i
][2] ],
[
$cp
+ 1,
$list
[
$i
][1],
$list
[
$i
][2] ];
}
}
$official
=
""
;
for
my
$element
(
@list
) {
$official
.=
"\n"
if
$official
;
if
(
$element
->[1] ==
$element
->[0]) {
$official
.=
sprintf
"$file_range_format\t\t$file_map_format"
,
$element
->[0],
$element
->[2];
}
else
{
$official
.=
sprintf
"$file_range_format\t$file_range_format\t$file_map_format"
,
$element
->[0],
$element
->[1],
$element
->[2];
}
}
}
elsif
(
$full_name
=~ / ^ Simple_(Case_Folding|(Lower|Title|Upper)case_Mapping) $ /x)
{
undef
$specials_ref
;
}
elsif
(
$format
!~ /^a/ &&
defined
$file_format
&&
$file_format
eq
'x'
) {
my
@lines
=
split
"\n"
,
$official
;
foreach
my
$line
(
@lines
) {
my
(
$lower
,
$upper
,
$map
) =
split
"\t"
,
$line
;
$line
=
"$lower\t$upper\t"
.
hex
$map
;
}
$official
=
join
"\n"
,
@lines
;
}
my
$tested_map
=
""
;
my
$binary_count
= 0;
my
%specials
=
%$specials_ref
if
$specials_ref
;
for
(
my
$i
= 0;
$i
<
@$invlist_ref
-
$upper_limit_subtract
;
$i
++) {
if
(
ref
$invmap_ref
->[
$i
]
&& (
$format
eq
'ad'
||
$format
=~ /^ . l /x))
{
if
(
$format
eq
'sl'
) {
if
(
$full_name
ne
'Name_Alias'
) {
$invmap_ref
->[
$i
] =
join
" "
, @{
$invmap_ref
->[
$i
]};
}
else
{
if
(
ref
$invmap_ref
->[
$i
]) {
my
$hex_cp
=
sprintf
(
"%X"
,
$invlist_ref
->[
$i
]);
my
$concatenated
=
$invmap_ref
->[
$i
][0];
for
(
my
$j
= 1;
$j
< @{
$invmap_ref
->[
$i
]};
$j
++) {
$concatenated
.=
"\n$hex_cp\t\t"
.
$invmap_ref
->[
$i
][
$j
];
}
$invmap_ref
->[
$i
] =
$concatenated
;
}
}
}
elsif
(
$format
=~ / ^ al e? $/x) {
my
$value
;
my
$key
=
chr
$invlist_ref
->[
$i
];
utf8::encode(
$key
);
if
(!
defined
(
$value
=
delete
$specials
{
$key
})) {
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
"There was no specials element for %04X"
,
$invlist_ref
->[
$i
]);
next
PROPERTY;
}
my
$packed
=
pack
"W*"
, @{
$invmap_ref
->[
$i
]};
utf8::upgrade(
$packed
);
if
(
$value
ne
$packed
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
"For %04X, expected the mapping to be "
.
"'$packed', but got '$value'"
,
$invlist_ref
->[
$i
]);
next
PROPERTY;
}
if
((
$i
> 0 &&
$invlist_ref
->[
$i
] <=
$invlist_ref
->[
$i
-1])
||
$invlist_ref
->[
$i
] >=
$invlist_ref
->[
$i
+1])
{
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
"Range beginning at %04X is out-of-order."
,
$invlist_ref
->[
$i
]);
next
PROPERTY;
}
next
;
}
elsif
(
$format
eq
'ad'
) {
$invmap_ref
->[
$i
] =
join
" "
,
map
{
sprintf
"%04X"
,
$_
}
@{
$invmap_ref
->[
$i
]};
}
else
{
fail(
"prop_invmap('$display_prop')"
);
diag(
"Can't handle format '$format'"
);
next
PROPERTY;
}
}
elsif
(
defined
$file_format
&&
$file_format
eq
'ax'
) {
$invmap_ref
->[
$i
] =
sprintf
(
"%X"
,
$invmap_ref
->[
$i
]);
}
elsif
(
$format
eq
'ad'
||
$format
eq
'ale'
) {
if
(
$invmap_ref
->[
$i
] =~ / ^ -? \d+ $ /x
&&
$invmap_ref
->[
$i
] != 0)
{
my
$next
=
$invmap_ref
->[
$i
] + 1;
$invmap_ref
->[
$i
] =
sprintf
(
$file_map_format
,
$invmap_ref
->[
$i
]);
if
(
$invlist_ref
->[
$i
+1] >
$invlist_ref
->[
$i
] + 1) {
splice
@$invlist_ref
,
$i
+1, 0,
$invlist_ref
->[
$i
] + 1;
splice
@$invmap_ref
,
$i
+1, 0,
$next
;
}
}
if
(
$format
eq
'ale'
&&
$invmap_ref
->[
$i
] eq
""
) {
my
$value
;
my
$key
=
chr
$invlist_ref
->[
$i
];
utf8::encode(
$key
);
if
(!
defined
(
$value
=
delete
$specials
{
$key
})) {
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
"There was no specials element for %04X"
,
$invlist_ref
->[
$i
]);
next
PROPERTY;
}
if
(
$value
ne
""
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
"For %04X, expected the mapping to be \"\", but got '$value'"
,
$invlist_ref
->[
$i
]);
next
PROPERTY;
}
if
((
$i
> 0 &&
$invlist_ref
->[
$i
] <=
$invlist_ref
->[
$i
-1])
||
$invlist_ref
->[
$i
] >=
$invlist_ref
->[
$i
+1])
{
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
"Range beginning at %04X is out-of-order."
,
$invlist_ref
->[
$i
]);
next
PROPERTY;
}
next
;
}
}
elsif
(
$is_binary
) {
$invmap_ref
->[
$i
] =~ s/Y//;
}
if
(
$invmap_ref
->[
$i
] eq
$missing
) {
if
((
$i
> 0 &&
$invlist_ref
->[
$i
] <=
$invlist_ref
->[
$i
-1])
||
$invlist_ref
->[
$i
] >=
$invlist_ref
->[
$i
+1])
{
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
"Range beginning at %04X is out-of-order."
,
$invlist_ref
->[
$i
]);
next
PROPERTY;
}
next
;
}
if
(
$format
eq
'ad'
&&
$invmap_ref
->[
$i
] eq
'<hangul syllable>'
&&
$invlist_ref
->[
$i
] == 0xAC00)
{
if
((
$i
> 0 &&
$invlist_ref
->[
$i
] <=
$invlist_ref
->[
$i
-1])
||
$invlist_ref
->[
$i
] >=
$invlist_ref
->[
$i
+1])
{
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
"Range beginning at %04X is out-of-order."
,
$invlist_ref
->[
$i
]);
next
PROPERTY;
}
next
;
}
my
$start
=
$invlist_ref
->[
$i
];
my
$end
= (
defined
$invlist_ref
->[
$i
+1])
?
$invlist_ref
->[
$i
+1] - 1
:
$Unicode::UCD::MAX_CP
;
if
(
$is_binary
) {
$tested_map
.=
"$start\n"
;
$binary_count
++;
if
(
$end
<
$Unicode::UCD::MAX_CP
) {
$tested_map
.= (
$end
+ 1) .
"\n"
;
$binary_count
++;
}
}
else
{
$end
= (
$start
==
$end
) ?
""
:
sprintf
(
$file_range_format
,
$end
);
if
(
$invmap_ref
->[
$i
] ne
""
) {
$tested_map
.=
sprintf
"$file_range_format\t%s\t%s\n"
,
$start
,
$end
,
$invmap_ref
->[
$i
];
}
elsif
(
$end
ne
""
) {
$tested_map
.=
sprintf
"$file_range_format\t%s\n"
,
$start
,
$end
;
}
else
{
$tested_map
.=
sprintf
"$file_range_format\n"
,
$start
;
}
}
}
$tested_map
=
"V$binary_count\n$tested_map"
if
$binary_count
;
local
$/ =
"\n"
;
chomp
$tested_map
;
$/ =
$input_record_separator
;
if
(
$tested_map
ne
$official
) {
fail_with_diff(
$display_prop
,
$official
,
$tested_map
,
"prop_invmap"
);
next
PROPERTY;
}
if
(
keys
%specials
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"Unexpected specials: "
.
join
", "
,
keys
%specials
);
next
PROPERTY;
}
}
elsif
(
$format
eq
'n'
) {
if
(
$missing
ne
""
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"The missings should be \"\"; got \"missing\""
);
next
PROPERTY;
}
$official
=
do
"unicore/Name.pl"
;
$official
=~ s/\n\n/\e/g;
$official
=~ s/\n/\t/g;
$official
=~ s/\e/\n/g;
$official
=~ s/ ^ [^\t]+ \ .*? \n //xmg;
$official
=~ s/ ^ 00000 .*? ( .{5} \t SPACE ) $ /$1/xms;
my
$range_2_start
;
my
$range_2_end_next
;
if
($::IS_ASCII) {
$range_2_start
=
'0007F'
;
$range_2_end_next
=
'000A0'
;
}
elsif
(
ord
'^'
== 106) {
$range_2_start
=
'005F'
;
$range_2_end_next
=
'0060'
;
}
else
{
$range_2_start
=
'00FF'
;
$range_2_end_next
=
'0100'
;
}
$official
=~ s/ ^
$range_2_start
.*? (
$range_2_end_next
) /$1/xms;
my
(
$aliases_code_points
,
$aliases_maps
,
undef
,
undef
)
=
&prop_invmap
(
'_Perl_Name_Alias'
,
'_perl_core_internal_ok'
);
for
(
my
$i
= 0;
$i
<
@$aliases_code_points
;
$i
++) {
my
$code_point
=
$aliases_code_points
->[
$i
];
next
if
$code_point
<= 0x1F
|| (
$code_point
>= 0x7F &&
$code_point
<= 0x9F);
my
$hex_code_point
=
sprintf
"%05X"
,
$code_point
;
$aliases_maps
->[
$i
] = [
$aliases_maps
->[
$i
] ]
if
!
ref
$aliases_maps
->[
$i
];
foreach
my
$alias
(@{
$aliases_maps
->[
$i
]}) {
$alias
=~ s/:.*//;
$alias
=
quotemeta
(
$alias
);
$official
=~ s/
$hex_code_point
\t
$alias
\n //x;
}
}
local
$/ =
"\n"
;
chomp
$official
;
$/ =
$input_record_separator
;
my
$tested_map
=
""
;
my
@code_point_in_names
=
@Unicode::UCD::code_points_ending_in_code_point
;
for
my
$i
(0 ..
@$invlist_ref
- 1 -
$upper_limit_subtract
) {
my
$start
=
$invlist_ref
->[
$i
];
my
$end
=
$invlist_ref
->[
$i
+1] - 1;
if
(
$invmap_ref
->[
$i
] eq
$missing
) {
if
((
$i
> 0 &&
$invlist_ref
->[
$i
] <=
$invlist_ref
->[
$i
-1])
||
$invlist_ref
->[
$i
] >=
$invlist_ref
->[
$i
+1])
{
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
"Range beginning at %04X is out-of-order."
,
$invlist_ref
->[
$i
]);
next
PROPERTY;
}
next
;
}
if
(
$invmap_ref
->[
$i
] =~ / (.*) ( < .*? > )/x) {
my
$name
= $1;
my
$type
= $2;
if
((
$i
> 0 &&
$invlist_ref
->[
$i
] <=
$invlist_ref
->[
$i
-1])
||
$invlist_ref
->[
$i
] >=
$invlist_ref
->[
$i
+1])
{
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
"Range beginning at %04X is out-of-order."
,
$invlist_ref
->[
$i
]);
next
PROPERTY;
}
if
(
$type
eq
"<hangul syllable>"
) {
if
(
$name
ne
""
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"Unexpected text in $invmap_ref->[$i]"
);
next
PROPERTY;
}
if
(
$start
!= 0xAC00) {
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
(
"<hangul syllables> should begin at 0xAC00, got %04X"
,
$start
));
next
PROPERTY;
}
if
(
$end
!=
$start
+ 11172 - 1) {
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
(
"<hangul syllables> should end at %04X, got %04X"
,
$start
+ 11172 -1,
$end
));
next
PROPERTY;
}
}
elsif
(
$type
ne
"<code point>"
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"Unexpected text '$type' in $invmap_ref->[$i]"
);
next
PROPERTY;
}
else
{
for
my
$i
(0 ..
@code_point_in_names
- 1) {
my
$hash
=
$code_point_in_names
[
$i
];
if
(
$hash
->{
'low'
} ==
$start
&&
$hash
->{
'high'
} ==
$end
&&
"$hash->{'name'}-"
eq
$name
)
{
splice
@code_point_in_names
,
$i
, 1;
last
;
}
else
{
fail(
"prop_invmap('$display_prop')"
);
diag(
"Unexpected code-point-in-name line '$invmap_ref->[$i]'"
);
next
PROPERTY;
}
}
}
next
;
}
$end
= (
$start
==
$end
) ?
""
:
sprintf
(
"%05X"
,
$end
);
$tested_map
.=
sprintf
"%05X\t%s\n"
,
$start
,
$invmap_ref
->[
$i
];
}
local
$/ =
"\n"
;
chomp
$tested_map
;
$/ =
$input_record_separator
;
if
(
$tested_map
ne
$official
) {
fail_with_diff(
$display_prop
,
$official
,
$tested_map
,
"prop_invmap"
);
next
PROPERTY;
}
if
(
@code_point_in_names
) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"Missing code-point-in-name line(s)"
. Dumper \
@code_point_in_names
);
next
PROPERTY;
}
}
elsif
(
$format
eq
's'
) {
my
%maps
;
my
$previous_map
;
for
my
$i
(0 ..
@$invlist_ref
- 1 -
$upper_limit_subtract
) {
my
$range_start
=
$invlist_ref
->[
$i
];
if
((
$i
> 0 &&
$range_start
<=
$invlist_ref
->[
$i
-1])
||
$range_start
>=
$invlist_ref
->[
$i
+1])
{
fail(
"prop_invmap('$display_prop')"
);
diag(
sprintf
"Range beginning at %04X is out-of-order."
,
$invlist_ref
->[
$i
]);
next
PROPERTY;
}
push
@{
$maps
{
$previous_map
}},
$range_start
if
defined
$previous_map
;
$previous_map
=
$invmap_ref
->[
$i
];
push
@{
$maps
{
$previous_map
}},
$range_start
;
}
if
(
@$invlist_ref
> 1) {
my
$penultimate_map
=
$invmap_ref
->[-2];
if
(
$penultimate_map
ne
$missing
) {
push
@{
$maps
{
$penultimate_map
}},
$invlist_ref
->[-1];
push
@{
$maps
{
$missing
}},
$invlist_ref
->[-1];
}
}
foreach
my
$map
(
sort
keys
%maps
) {
my
@off_invlist
= prop_invlist(
"$prop = $map"
);
my
$min
= (
@off_invlist
>= @{
$maps
{
$map
}})
?
@off_invlist
: @{
$maps
{
$map
}};
for
my
$i
(0 ..
$min
- 1) {
if
(
$i
>
@off_invlist
- 1) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"There is no element [$i] for $prop=$map from prop_invlist(), while [$i] in the implicit one constructed from prop_invmap() is '$maps{$map}[$i]'"
);
next
PROPERTY;
}
elsif
(
$i
> @{
$maps
{
$map
}} - 1) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"There is no element [$i] from the implicit $prop=$map constructed from prop_invmap(), while [$i] in the one from prop_invlist() is '$off_invlist[$i]'"
);
next
PROPERTY;
}
elsif
(
$maps
{
$map
}[
$i
] ne
$off_invlist
[
$i
]) {
fail(
"prop_invmap('$display_prop')"
);
diag(
"Element [$i] of the implicit $prop=$map constructed from prop_invmap() is '$maps{$map}[$i]', and the one from prop_invlist() is '$off_invlist[$i]'"
);
next
PROPERTY;
}
}
}
}
else
{
fail(
"prop_invmap('$display_prop')"
);
diag(
"Unknown property '$display_prop' or format '$format'"
);
next
PROPERTY;
}
pass(
"prop_invmap('$display_prop')"
);
}
if
(
$v_unicode_version
ge v3.1.0) {
my
(
$scripts_ranges_ref
,
$scripts_map_ref
) = prop_invmap(
"Script"
);
my
$index
= search_invlist(
$scripts_ranges_ref
, 0x390);
is(
$scripts_map_ref
->[
$index
],
"Greek"
,
"U+0390 is Greek"
);
my
@alpha_invlist
= prop_invlist(
"Alpha"
);
is(search_invlist(\
@alpha_invlist
,
ord
(
"\t"
)),
undef
,
"search_invlist returns undef for code points before first one on the list"
);
}
ok($/ eq
$input_record_separator
,
"The record separator didn't get overridden"
);
if
(! ok(
@warnings
== 0,
"No warnings were generated"
)) {
diag(
join
"\n"
,
"The warnings are:"
,
@warnings
);
}
my
$count
= 0;
for
my
$i
(
$Unicode::UCD::MAX_CP
- 1 ..
$Unicode::UCD::MAX_CP
) {
$count
++;
}
is(
$count
, 2,
"MAX_CP isn't too large"
);
done_testing();