BEGIN {
$| = 1;
chdir
't'
if
-d
't'
;
require
'./test.pl'
;
set_up_inc(
'../lib'
);
skip_all_without_config(
'useithreads'
);
skip_all(
"Fails on threaded builds on OpenBSD"
)
if
($^O =~ m/^(openbsd)$/);
require
'./loc_tools.pl'
;
eval
{
require
POSIX; POSIX->
import
(
qw(errno_h locale_h unistd_h )
) };
if
($@) {
skip_all(
"could not load the POSIX module"
);
}
}
$Devel::Peek::pv_limit
= 0;
$Devel::Peek::pv_limit
= 0;
$Data::Dumper::Sortkeys
=1;
$Data::Dumper::Useqq
= 1;
$Data::Dumper::Deepcopy
= 1;
my
$debug
= 0;
my
%map_category_name_to_number
;
my
%map_category_number_to_name
;
my
@valid_categories
= valid_locale_categories();
foreach
my
$category
(
@valid_categories
) {
my
$cat_num
=
eval
"&POSIX::$category"
;
die
"Can't determine ${category}'s number: $@"
if
$@;
$map_category_name_to_number
{
$category
} =
$cat_num
;
$map_category_number_to_name
{
$cat_num
} =
$category
;
}
my
$LC_ALL
;
my
$LC_ALL_string
;
if
(
defined
$map_category_name_to_number
{LC_ALL}) {
$LC_ALL_string
=
'LC_ALL'
;
$LC_ALL
=
$map_category_name_to_number
{LC_ALL};
}
elsif
(
defined
$map_category_name_to_number
{LC_CTYPE}) {
$LC_ALL_string
=
'LC_CTYPE'
;
$LC_ALL
=
$map_category_name_to_number
{LC_CTYPE};
}
else
{
skip_all(
"No LC_ALL nor LC_CTYPE"
);
}
delete
local
@ENV
{
'LANGUAGE'
,
'LANG'
,
keys
%map_category_name_to_number
};
my
@locales
= find_locales(
$LC_ALL
);
skip_all(
"Couldn't find any locales"
)
if
@locales
== 0;
plan(2);
my
(
$utf8_locales_ref
,
$non_utf8_locales_ref
)
= classify_locales_wrt_utf8ness(\
@locales
);
my
$official_ascii_name
=
'ansi_x341968'
;
my
%lang_code_to_script
= (
am
=>
'amharic'
,
amh
=>
'amharic'
,
amharic
=>
'amharic'
,
ar
=>
'arabic'
,
be
=>
'cyrillic'
,
bel
=>
'cyrillic'
,
ben
=>
'bengali'
,
bn
=>
'bengali'
,
bg
=>
'cyrillic'
,
bul
=>
'cyrillic'
,
bulgarski
=>
'cyrillic'
,
bulgarian
=>
'cyrillic'
,
c
=>
$official_ascii_name
,
cnr
=>
'cyrillic'
,
de
=>
'latin_1'
,
deu
=>
'latin_1'
,
deutsch
=>
'latin_1'
,
german
=>
'latin_1'
,
div
=>
'thaana'
,
dv
=>
'thaana'
,
dzo
=>
'tibetan'
,
dz
=>
'tibetan'
,
el
=>
'greek'
,
ell
=>
'greek'
,
ellada
=>
'greek'
,
en
=>
$official_ascii_name
,
eng
=>
$official_ascii_name
,
american
=>
$official_ascii_name
,
british
=>
$official_ascii_name
,
es
=>
'latin_1'
,
fa
=>
'arabic'
,
fas
=>
'arabic'
,
flamish
=>
'latin_1'
,
fra
=>
'latin_1'
,
fr
=>
'latin_1'
,
heb
=>
'hebrew'
,
he
=>
'hebrew'
,
hi
=>
'hindi'
,
hin
=>
'hindi'
,
hy
=>
'armenian'
,
hye
=>
'armenian'
,
ita
=>
'latin_1'
,
it
=>
'latin_1'
,
ja
=>
'katakana'
,
jpn
=>
'katakana'
,
nihongo
=>
'katakana'
,
japanese
=>
'katakana'
,
ka
=>
'georgian'
,
kat
=>
'georgian'
,
kaz
=>
'cyrillic'
,
khm
=>
'khmer'
,
kir
=>
'cyrillic'
,
kk
=>
'cyrillic'
,
km
=>
'khmer'
,
ko
=>
'hangul'
,
kor
=>
'hangul'
,
korean
=>
'hangul'
,
ku
=>
'arabic'
,
kur
=>
'arabic'
,
ky
=>
'cyrillic'
,
latin1
=>
'latin_1'
,
lao
=>
'lao'
,
lo
=>
'lao'
,
mk
=>
'cyrillic'
,
mkd
=>
'cyrillic'
,
macedonian
=>
'cyrillic'
,
mn
=>
'cyrillic'
,
mon
=>
'cyrillic'
,
mya
=>
'myanmar'
,
my
=>
'myanmar'
,
ne
=>
'devanagari'
,
nep
=>
'devanagari'
,
nld
=>
'latin_1'
,
nl
=>
'latin_1'
,
nederlands
=>
'latin_1'
,
dutch
=>
'latin_1'
,
por
=>
'latin_1'
,
posix
=>
$official_ascii_name
,
ps
=>
'arabic'
,
pt
=>
'latin_1'
,
pus
=>
'arabic'
,
ru
=>
'cyrillic'
,
russki
=>
'cyrillic'
,
russian
=>
'cyrillic'
,
rus
=>
'cyrillic'
,
sin
=>
'sinhala'
,
si
=>
'sinhala'
,
so
=>
'arabic'
,
som
=>
'arabic'
,
spa
=>
'latin_1'
,
sr
=>
'cyrillic'
,
srp
=>
'cyrillic'
,
tam
=>
'tamil'
,
ta
=>
'tamil'
,
tg
=>
'cyrillic'
,
tgk
=>
'cyrillic'
,
tha
=>
'thai'
,
th
=>
'thai'
,
thai
=>
'thai'
,
ti
=>
'ethiopian'
,
tir
=>
'ethiopian'
,
uk
=>
'cyrillic'
,
ukr
=>
'cyrillic'
,
ur
=>
'arabic'
,
urd
=>
'arabic'
,
zgh
=>
'arabic'
,
zh
=>
'chinese'
,
zho
=>
'chinese'
,
);
my
%codeset_to_script
= (
88591
=>
'latin_1'
,
88592
=>
'latin_2'
,
88593
=>
'latin_3'
,
88594
=>
'latin_4'
,
88595
=>
'cyrillic'
,
88596
=>
'arabic'
,
88597
=>
'greek'
,
88598
=>
'hebrew'
,
88599
=>
'latin_5'
,
885910
=>
'latin_6'
,
885911
=>
'thai'
,
885912
=>
'devanagari'
,
885913
=>
'latin_7'
,
885914
=>
'latin_8'
,
885915
=>
'latin_9'
,
885916
=>
'latin_10'
,
cp1251
=>
'cyrillic'
,
cp1255
=>
'hebrew'
,
);
my
%script_priorities
= (
$official_ascii_name
=> 15,
latin_1
=> 14,
latin_9
=> 13,
latin_2
=> 12,
latin_4
=> 12,
latin_5
=> 12,
latin_6
=> 12,
latin_7
=> 12,
latin_8
=> 12,
latin_10
=> 12,
latin
=> 11,
);
my
%script_instances
;
sub
analyze_locale_name($) {
my
%ret
;
my
$input_locale_name
=
shift
;
my
$old_locale
= setlocale(LC_CTYPE);
my
$new_locale
= setlocale(LC_CTYPE,
$input_locale_name
);
if
(!
$new_locale
) {
diag
"Unexpectedly can't setlocale(LC_CTYPE, $new_locale);"
.
" \$!=$!, \$^E=$^E"
;
return
;
}
$ret
{locale_name} =
$new_locale
;
$ret
{locale_name} =~ / ^
( .+? )
(?: _ ( .+? ) )?
(?: \. ( .+? ) )?
(?: \@ ( .+ ) )?
$
/x;
$ret
{language} = $1 //
""
;
$ret
{territory} = $2 //
""
;
$ret
{codeset} = $3 //
""
;
$ret
{modifier} = $4 //
""
;
foreach
my
$key
(
qw(language codeset modifier)
) {
$ret
{
$key
} =
lc
$ret
{
$key
};
}
my
$langinfo_codeset
=
lc
langinfo(CODESET);
if
(! setlocale(LC_CTYPE,
$old_locale
)) {
die
"Unexpectedly can't restore locale to $old_locale from"
.
" $new_locale; \$!=$!, \$^E=$^E"
;
}
foreach
my
$codeset_ref
(\
$langinfo_codeset
, \
$ret
{codeset}) {
$$codeset_ref
=~ s/\W//g;
$$codeset_ref
=~ s/iso8859/8859/g;
$$codeset_ref
=~ s/\b65001\b/utf8/;
$$codeset_ref
=~ s/\b646\b/
$official_ascii_name
/;
$$codeset_ref
=~ s/\busascii\b/
$official_ascii_name
/;
}
if
(
$langinfo_codeset
) {
if
(
$ret
{codeset} &&
$ret
{codeset} ne
$langinfo_codeset
) {
diag
"In $ret{locale_name}, codeset from langinfo"
.
" ($langinfo_codeset) doesn't match codeset in"
.
" locale_name ($ret{codeset})"
;
}
$ret
{codeset} =
$langinfo_codeset
;
}
$ret
{is_utf8} = 0 + (
$ret
{codeset} eq
'utf8'
);
if
(
$ret
{modifier}
and
grep
{
$_
eq
$ret
{modifier} }
values
%lang_code_to_script
)
{
$ret
{script} =
$ret
{nominal_script} =
$ret
{modifier};
$ret
{modifier} =
""
;
}
elsif
(
$ret
{codeset} && !
$ret
{is_utf8}) {
$ret
{script} =
$codeset_to_script
{
$ret
{codeset}};
if
(
$ret
{script}) {
$ret
{script} .=
'_'
.
$official_ascii_name
;
}
elsif
(
$ret
{codeset} =~ /^koi/) {
$ret
{script} =
"cyrillic_${official_ascii_name}"
;
}
else
{
$ret
{script} =
$ret
{codeset};
$ret
{script} .=
"_$ret{language}"
if
$ret
{codeset} !~ /
$official_ascii_name
/;
}
}
else
{
$ret
{script} =
$lang_code_to_script
{
$ret
{language}};
if
(!
$ret
{script}) {
$ret
{script} = (
grep
{
$ret
{language} eq
$_
}
values
%lang_code_to_script
)
?
$ret
{language}
:
'latin'
;
}
}
if
( (
$ret
{modifier} &&
$ret
{modifier} eq
'euro'
)
&&
$ret
{script} =~ / ^ (
$official_ascii_name
| latin (_1)? ) $ /x)
{
$ret
{script} =
'latin_9'
;
}
$ret
{priority} =
$script_priorities
{
$ret
{script}};
if
(!
$ret
{priority}) {
$ret
{priority} = (
$ret
{script} ne
$official_ascii_name
&&
$ret
{script} =~
$official_ascii_name
)
? 0
: 1;
}
my
$script_root
= (
$ret
{script} =~ s/_.*//r) .
"_$ret{is_utf8}"
;
$ret
{script_instance} =
$script_instances
{
$script_root
}++;
return
\
%ret
;
}
sub
sort_locales ()
{
my
$cmp
=
$a
->{script_instance} <=>
$b
->{script_instance};
return
$cmp
if
$cmp
;
$cmp
=
$a
->{priority} <=>
$b
->{priority};
return
$cmp
if
$cmp
;
$cmp
=
$a
->{script} cmp
$b
->{script};
return
$cmp
if
$cmp
;
$cmp
=
$a
->{modifier} cmp
$b
->{modifier};
return
$cmp
if
$cmp
;
$cmp
=
$a
->{codeset} cmp
$b
->{codeset};
return
$cmp
if
$cmp
;
$cmp
=
$a
->{territory} cmp
$b
->{territory};
return
$cmp
if
$cmp
;
return
lc
$a
cmp
lc
$b
;
}
my
@cleaned_up_locales
;
for
my
$locale
(
@locales
) {
my
$locale_struct
= analyze_locale_name(
$locale
);
next
unless
$locale_struct
;
my
$name
=
$locale_struct
->{locale_name};
next
if
grep
{
$name
eq
$_
->{locale_name} }
@cleaned_up_locales
;
push
@cleaned_up_locales
,
$locale_struct
;
}
@locales
=
@cleaned_up_locales
;
@locales
=
grep
{
$_
->{codeset} ne
""
}
@locales
;
@locales
=
sort
sort_locales
@locales
;
SKIP: {
my
$locale
=
$locales
[0];
skip(
"No valid locale to test with"
, 1)
if
$locale
->{codeset} eq
$official_ascii_name
;
local
$ENV
{LC_MESSAGES} =
$locale
->{locale_name};
my
$error_count
=
keys
(%!) + 1;
print
fresh_perl("
my
\
$errnum
= 1;
my
\
@threads
=
map
+threads->create(
sub
{
usleep 0.1;
'threads'
->yield();
for
(1..5_000) {
\
$errnum
= (\
$errnum
+ 1) %
$error_count
;
\$! = \
$errnum
;
next
if
\"\$!\" eq \"\";
}
}), (0..1);
\
$_
->
join
for
splice
\
@threads
;",
{}
);
pass(
"Didn't segfault"
);
}
my
%locale_name_to_object
;
for
my
$locale
(
@locales
) {
$locale_name_to_object
{
$locale
->{locale_name}} =
$locale
;
}
sub
sort_by_hashed_locale {
local
$a
=
$locale_name_to_object
{
$a
};
local
$b
=
$locale_name_to_object
{
$b
};
return
sort_locales;
}
sub
min {
my
(
$a
,
$b
) =
@_
;
return
$a
if
$a
<=
$b
;
return
$b
;
}
my
$thread_count
= 15;
my
$iterations
= 100;
my
$alarm_clock
= (1 * 10 * 60);
my
$iterations_per_test_set
= min(30,
int
(
$iterations
/ 5));
$iterations_per_test_set
= 1
if
$iterations_per_test_set
== 0;
my
@platform_categories
= platform_locale_categories();
my
$lc_all_frequency
=
scalar
@platform_categories
==
scalar
@valid_categories
? 3
: -1;
my
$max_result_length
= 10000;
my
$per_thread_startup
= .18;
my
$die_on_negative_sleep
= 1;
my
$max_message_catalog_entries
= 10;
my
$strftime_args
=
"'%c', 0, 0, , 12, 18, 11, 87"
;
my
%distincts
;
my
%op_counts
;
my
$separator
=
'____'
;
sub
pack_op_result($$) {
my
(
$op
,
$result
) =
@_
;
return
$op
.
$separator
. (0 + utf8::is_utf8(
$op
)) .
$separator
.
$result
.
$separator
. (0 + utf8::is_utf8(
$result
));
}
sub
fixup_utf8ness($$) {
my
(
$operand
,
$utf8ness
) =
@_
;
if
(
$utf8ness
+ 0 != 0 + utf8::is_utf8(
$$operand
)) {
if
(
$utf8ness
) {
utf8::upgrade(
$$operand
);
}
else
{
utf8::downgrade(
$$operand
);
}
}
}
sub
unpack_op_result($) {
my
$op_result
=
shift
;
my
(
$op
,
$op_utf8ness
,
$result
,
$result_utf8ness
) =
split
$separator
,
$op_result
;
fixup_utf8ness(\
$op
,
$op_utf8ness
);
fixup_utf8ness(\
$result
,
$result_utf8ness
);
return
(
$op
,
$result
);
}
sub
add_trials($$;$)
{
my
$category_name
=
shift
;
my
$input_op
=
shift
;
my
$locale_constraint
=
shift
//
""
;
LOCALE:
foreach
my
$locale
(
@locales
) {
my
$locale_name
=
$locale
->{locale_name};
my
$op
=
$input_op
;
next
unless
setlocale(
$LC_ALL
,
$locale_name
);
next
unless
setlocale(
$map_category_name_to_number
{
$category_name
},
$locale_name
);
if
(
$locale_constraint
) {
if
(
$locale_constraint
eq
'utf8_only'
) {
next
if
!
$locale
->{is_utf8};
}
elsif
(
$locale_constraint
eq
'a<b'
) {
my
$result
=
eval
"use locale; 'a' lt 'B'"
;
die
"$category_name: '$op (a lt B)': $@"
if
$@;
next
unless
$result
;
}
else
{
die
"Only accepted locale constraints are 'utf8_only' and 'a<b'"
}
}
my
$eval_string
= (
$op
) ?
"use locale; $op;"
:
""
;
my
$result
=
eval
$eval_string
;
die
"$category_name: '$op': $@"
if
$@;
if
(!
defined
$result
) {
if
(
$debug
) {
print
STDERR __FILE__,
": "
, __LINE__,
": Undefined result for $locale_name"
,
" $category_name: '$op'\n"
;
}
next
;
}
elsif
(
$debug
> 1) {
print
STDERR
"\n"
, __FILE__,
": "
, __LINE__,
": $category_name:"
,
" $locale_name: Op = "
, Dumper(
$op
),
"; Returned "
;
Dump
$result
;
}
if
(
length
$result
>
$max_result_length
) {
diag(
"For $locale_name, '$op', result is too long; skipped"
);
next
;
}
if
(
$eval_string
!~ /xfrm/ &&
$result
=~ /\?\?/) {
if
(
$debug
) {
print
STDERR __FILE__,
": "
, __LINE__,
" For $locale_name, op=$op, result has mojibake: $result\n"
;
}
next
;
}
my
$deterministic_trial_count
= 5;
my
$alternate
;
my
@alternate
;
if
(!
$utf8_locales_ref
|| !
$utf8_locales_ref
->@*) {
@alternate
=
grep
{
$_
ne
$locale_name
}
$non_utf8_locales_ref
->@*;
}
elsif
(!
$non_utf8_locales_ref
|| !
$non_utf8_locales_ref
->@*) {
@alternate
=
grep
{
$_
ne
$locale_name
}
$utf8_locales_ref
->@*;
}
elsif
(
grep
{
$_
eq
$locale_name
}
$utf8_locales_ref
->@*) {
@alternate
=
$non_utf8_locales_ref
->@*;
}
else
{
@alternate
=
$utf8_locales_ref
->@*;
}
for
my
$i
(1 ..
$deterministic_trial_count
- 1) {
my
$other
=
shift
@alternate
;
push
@alternate
,
$other
;
if
(! setlocale(
$LC_ALL
,
$other
)) {
if
(
$LC_ALL_string
eq
'LC_ALL'
|| ! setlocale(
$map_category_name_to_number
{
$category_name
},
$other
))
{
die
"Unexpectedly can't set locale to $other:"
.
" \$!=$!, \$^E=$^E"
;
}
}
eval
$eval_string
;
if
(! setlocale(
$LC_ALL
,
$locale_name
)) {
if
(
$LC_ALL_string
eq
'LC_ALL'
|| ! setlocale(
$map_category_name_to_number
{
$category_name
},
$locale_name
))
{
die
"Unexpectedly can't set locale to $locale_name from "
. setlocale(
$LC_ALL
)
.
"; \$!=$!, \$^E=$^E"
;
}
}
my
$got
=
eval
$eval_string
;
next
if
$got
eq
$result
&& utf8::is_utf8(
$got
) == utf8::is_utf8(
$result
);
diag(
"For '$eval_string',\nresults in iteration $i differed from"
.
" the original\ngot"
);
Dump(
$got
);
diag(
"expected"
);
Dump(
$result
);
next
LOCALE;
}
my
$op_result
= pack_op_result(
$op
,
$result
);
push
$distincts
{
$category_name
}{
$op_result
}{locales}->@*,
$locale_name
;
if
(
defined
$op_counts
{
$op
} &&
$op_counts
{
$op
} >=
$thread_count
)
{
last
;
}
}
}
my
@valid_category_numbers
=
sort
{
$a
<=>
$b
}
map
{
$map_category_name_to_number
{
$_
} }
@valid_categories
;
my
$use_name_value_pairs
=
defined
$Config
{d_perl_lc_all_uses_name_value_pairs};
my
$lc_all_separator
= (
$use_name_value_pairs
)
?
";"
:
$Config
{perl_lc_all_separator} =~ s/"//gr;
my
@position_to_category_number
;
if
(!
$use_name_value_pairs
) {
my
$positions
=
$Config
{perl_lc_all_category_positions_init} =~ s/[{}]//gr;
$positions
=~ s/,//g;
$positions
=~ s/^ +//;
$positions
=~ s/ +$//;
@position_to_category_number
=
split
/ \s+ /x,
$positions
}
sub
get_next_category() {
state
$index
;
my
$which
= (
$use_name_value_pairs
)
? \
@valid_category_numbers
: \
@position_to_category_number
;
$index
= -1
unless
defined
$index
;
$index
++;
if
(!
defined
$which
->[
$index
]) {
undef
$index
;
return
;
}
my
$category_number
=
$which
->[
$index
];
return
$category_number
if
$category_number
!=
$LC_ALL
;
return
&get_next_category
();
}
SKIP: {
skip(
"Unsafe locale threads"
, 1)
unless
${^SAFE_LOCALES};
if
(${^O} eq
"MSWin32"
&&
$Config
{
'libc'
} !~ /ucrt/) {
@locales
=
grep
{
my
$locale_name
=
$_
->{locale_name};
my
$underlying_name
= setlocale(
&LC_CTYPE
,
$locale_name
);
setlocale(
$LC_ALL
,
"Albanian"
);
defined
(
$underlying_name
) && setlocale(
&LC_CTYPE
,
$underlying_name
)
}
@locales
;
}
my
%msg_catalog
;
foreach
my
$error
(
sort
keys
%!) {
my
$number
=
eval
"Errno::$error"
;
$! =
$number
;
my
$description
=
"$!"
;
next
unless
"$description"
;
$msg_catalog
{
$number
} =
quotemeta
"$description"
;
}
my
@msg_catalog
=
sort
{
$a
<=>
$b
}
keys
%msg_catalog
;
splice
@msg_catalog
,
$max_message_catalog_entries
if
$max_message_catalog_entries
>= 0;
my
$msg_catalog
=
join
','
,
@msg_catalog
;
eval
{
my
$discard
= POSIX::localeconv()->{currency_symbol}; };
my
$has_localeconv
= $@ eq
""
;
foreach
my
$category
(
@valid_categories
) {
no
warnings
'uninitialized'
;
next
if
$category
eq
'LC_ALL'
;
if
(
$category
eq
'LC_COLLATE'
) {
add_trials(
'LC_COLLATE'
,
'quotemeta join "", sort reverse map { chr } (1..255)'
);
add_trials(
'LC_COLLATE'
,
'"a" lt "B"'
,
'a<b'
);
add_trials(
'LC_COLLATE'
,
'my $a = "a"; my $b = "B";'
.
' POSIX::strcoll($a, $b) < 0;'
,
'a<b'
);
add_trials(
'LC_COLLATE'
,
'my $string = quotemeta join "",'
.
' map { chr } (1..255);'
.
' POSIX::strxfrm($string)'
);
next
;
}
if
(
$category
eq
'LC_CTYPE'
) {
add_trials(
'LC_CTYPE'
,
'no warnings "locale"; quotemeta lc'
.
' join "" , map { chr } (0..255)'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale"; quotemeta uc'
.
' join "", map { chr } (0..255)'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale"; quotemeta CORE::fc'
.
' join "", map { chr } (0..255)'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/\d/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/\s/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/\w/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/[[:alpha:]]/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/[[:alnum:]]/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/[[:ascii:]]/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/[[:blank:]]/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/[[:cntrl:]]/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/[[:graph:]]/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/[[:lower:]]/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/[[:print:]]/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/[[:punct:]]/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/[[:upper:]]/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $string = join "", map { chr } 0..255;'
.
' $string =~ s|(.)|$1=~/[[:xdigit:]]/?1:0|gers'
);
add_trials(
'LC_CTYPE'
,
'use I18N::Langinfo qw(langinfo CODESET);'
.
' no warnings "uninitialized";'
.
' langinfo(CODESET);'
);
if
(
$Config
{
'd_mbrlen'
} eq
'define'
) {
add_trials(
'LC_CTYPE'
,
'my $string = chr 0x100;'
.
' utf8::encode($string);'
.
' no warnings "uninitialized";'
.
' POSIX::mblen(undef);'
.
' POSIX::mblen($string)'
,
'utf8_only'
);
}
if
(
$Config
{
'd_mbrtowc'
} eq
'define'
) {
add_trials(
'LC_CTYPE'
,
'my $value; my $str = "\x{100}";'
.
' utf8::encode($str);'
.
' no warnings "uninitialized";'
.
' POSIX::mbtowc(undef, undef);'
.
' POSIX::mbtowc($value, $str); $value;'
,
'utf8_only'
);
}
if
(
$Config
{
'd_wcrtomb'
} eq
'define'
) {
add_trials(
'LC_CTYPE'
,
'my $value;'
.
' no warnings "uninitialized";'
.
' POSIX::wctomb(undef, undef);'
.
' POSIX::wctomb($value, 0xFF);'
.
' $value;'
,
'utf8_only'
);
}
add_trials(
'LC_CTYPE'
,
'no warnings "locale";'
.
' my $uc = CORE::uc join "", map { chr } (0..255);'
.
' my $fc = quotemeta CORE::fc $uc;'
.
' $uc =~ / \A $fc \z /xi;'
);
next
;
}
if
(
$category
eq
'LC_MESSAGES'
) {
add_trials(
'LC_MESSAGES'
,
"join \"\n\", map { \$! = \$_; \"\$!\" } ($msg_catalog)"
);
add_trials(
'LC_MESSAGES'
,
'use I18N::Langinfo qw(langinfo YESSTR NOSTR YESEXPR NOEXPR);'
.
' no warnings "uninitialized";'
.
' join ",",'
.
' map { langinfo($_) } YESSTR, NOSTR, YESEXPR, NOEXPR;'
);
next
;
}
if
(
$category
eq
'LC_MONETARY'
) {
if
(
$has_localeconv
) {
add_trials(
'LC_MONETARY'
,
"localeconv()->{currency_symbol}"
);
}
add_trials(
'LC_MONETARY'
,
'use I18N::Langinfo qw(langinfo CRNCYSTR);'
.
' no warnings "uninitialized";'
.
' join "|", map { langinfo($_) } CRNCYSTR;'
);
next
;
}
if
(
$category
eq
'LC_NUMERIC'
) {
if
(
$has_localeconv
) {
add_trials(
'LC_NUMERIC'
,
"no warnings; 'uninitialised';"
.
" join '|',"
.
" localeconv()->{decimal_point},"
.
" localeconv()->{thousands_sep}"
);
}
add_trials(
'LC_NUMERIC'
,
'use I18N::Langinfo qw(langinfo RADIXCHAR THOUSEP);'
.
' no warnings "uninitialized";'
.
' join "|", map { langinfo($_) } RADIXCHAR, THOUSEP;'
);
add_trials(
'LC_NUMERIC'
,
'my $in = 4.2; sprintf("%g", $in)'
);
next
;
}
if
(
$category
eq
'LC_TIME'
) {
add_trials(
'LC_TIME'
,
"POSIX::strftime($strftime_args)"
);
add_trials(
'LC_TIME'
, <<~
'END_OF_CODE'
);
ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5 ABDAY_6 ABDAY_7
ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12
DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7
MON_1 MON_2 MON_3 MON_4 MON_5 MON_6
MON_7 MON_8 MON_9 MON_10 MON_11 MON_12
D_FMT D_T_FMT T_FMT)
;
no
warnings
"uninitialized"
;
join
"|"
,
map
{ langinfo(
$_
) }
ABDAY_1,ABDAY_2,ABDAY_3,ABDAY_4,ABDAY_5,
ABDAY_6,ABDAY_7,
ABMON_1,ABMON_2,ABMON_3,ABMON_4,ABMON_5,
ABMON_6, ABMON_7,ABMON_8,ABMON_9,ABMON_10,
ABMON_11,ABMON_12,
DAY_1,DAY_2,DAY_3,DAY_4,DAY_5,DAY_6,DAY_7,
MON_1,MON_2,MON_3,MON_4,MON_5,MON_6, MON_7,
MON_8,MON_9,MON_10,MON_11,MON_12,
D_FMT,D_T_FMT,T_FMT;
END_OF_CODE
next
;
}
}
my
%all_tests
;
foreach
my
$category
(
keys
%distincts
) {
my
%results
;
my
%distinct_results_count
;
my
%distinct_ops
;
for
my
$op_result
(
sort
keys
$distincts
{
$category
}->%*) {
my
(
$op
,
$result
) = unpack_op_result(
$op_result
);
$distinct_ops
{
$op
}++;
push
$results
{
$op
}->@*,
$result
;
$distinct_results_count
{
$result
} +=
scalar
$distincts
{
$category
}{
$op_result
}{locales}->@*;
}
my
@ops
=
sort
keys
%distinct_ops
;
sub
gen_combinations {
my
$op_ref
=
shift
;
my
$results_ref
=
shift
;
my
$distincts_ref
=
shift
;
my
$op
=
shift
$op_ref
->@*;
my
@return
;
foreach
my
$result
(
$results_ref
->{
$op
}->@*) {
my
$op_result
= pack_op_result(
$op
,
$result
);
push
@return
, {
op_results
=> [
$op_result
],
locales
=>
$distincts_ref
->{
$op_result
}{locales},
};
}
return
(\
@return
)
unless
$op_ref
->@*;
my
$recurse_return
=
&gen_combinations
(
$op_ref
,
$results_ref
,
$distincts_ref
);
my
@combined
;
foreach
my
$this
(
@return
) {
my
@this_locales
=
$this
->{locales}->@*;
foreach
my
$recursed
(
$recurse_return
->@*) {
my
@recursed_locales
=
$recursed
->{locales}->@*;
my
%seen
;
$seen
{
$_
}++
foreach
@this_locales
,
@recursed_locales
;
my
@intersection
=
grep
$seen
{
$_
} == 2,
keys
%seen
;
next
unless
@intersection
;
my
@combined_result
=
$this
->{op_results}->@*;
push
@combined_result
,
$recursed
->{op_results}->@*;
push
@combined
, {
op_results
=> \
@combined_result
,
locales
=> \
@intersection
,
};
}
}
return
\
@combined
;
}
my
$combinations_ref
= gen_combinations(\
@ops
, \
%results
,
$distincts
{
$category
});
foreach
my
$test
(
$combinations_ref
->@*) {
$test
->{locales}->@* =
sort
sort_by_hashed_locale
$test
->{locales}->@*;
my
@individual_tests
=
$test
->{op_results}->@*;
my
@in_common_locale_counts
;
foreach
my
$this_test
(
@individual_tests
) {
push
@in_common_locale_counts
,
scalar
$distincts
{
$category
}{
$this_test
}{locales}->@*;
}
push
$test
->{in_common_locale_counts}->@*,
@in_common_locale_counts
;
}
my
@cat_tests
=
$combinations_ref
->@*;
sub
sort_test_order {
my
$a_tests_count
=
scalar
$a
->{in_common_locale_counts}->@*;
my
$b_tests_count
=
scalar
$b
->{in_common_locale_counts}->@*;
my
$tests_count
= min(
$a_tests_count
,
$b_tests_count
);
my
$a_nondistincts
= 0;
my
$b_nondistincts
= 0;
for
my
$i
(0 ..
$tests_count
- 1) {
$a_nondistincts
+= (
$a
->{in_common_locale_counts}[
$i
] != 1);
$b_nondistincts
+= (
$b
->{in_common_locale_counts}[
$i
] != 1);
}
my
$cmp
=
$a_nondistincts
<=>
$b_nondistincts
;
return
$cmp
if
$cmp
;
my
$a_count
= 0;
my
$b_count
= 0;
for
my
$i
(0 ..
$tests_count
- 1) {
$a_count
+=
$a
->{in_common_locale_counts}[
$i
];
$b_count
+=
$b
->{in_common_locale_counts}[
$i
];
}
$cmp
=
$a_count
<=>
$b_count
;
return
$cmp
if
$cmp
;
local
$a
=
$a
->{locales}[0];
local
$b
=
$b
->{locales}[0];
return
sort_by_hashed_locale;
}
@cat_tests
=
sort
sort_test_order
@cat_tests
;
push
$all_tests
{
$category
}->@*,
@cat_tests
;
}
my
%thread_already_used_locales
;
my
@tests_by_thread
;
for
my
$i
(0 ..
$thread_count
- 1) {
foreach
my
$category
(
sort
keys
%all_tests
) {
my
$skipped
= 0;
NEXT_CANDIDATE:
my
$candidate
=
shift
$all_tests
{
$category
}->@*;
my
$locale_name
=
$candidate
->{locales}[0];
if
(
defined
$thread_already_used_locales
{
$locale_name
=~ s/\W.*//r})
{
for
my
$j
(1 ..
$candidate
->{locales}->@* - 1) {
my
$synonym
=
$candidate
->{locales}[
$j
];
next
if
defined
$thread_already_used_locales
{
$synonym
=~
s/\W.*//r};
$locale_name
=
$synonym
;
goto
found_synonym;
}
$skipped
++;
if
(
$skipped
<
scalar
$all_tests
{
$category
}->@*) {
push
$all_tests
{
$category
}->@*,
$candidate
;
goto
NEXT_CANDIDATE;
}
found_synonym:
}
$tests_by_thread
[
$i
]->{
$category
}{locale_name} =
$locale_name
;
my
@cases
;
for
my
$j
(0 ..
$candidate
->{op_results}->@* - 1) {
my
(
$op
,
$result
) =
unpack_op_result(
$candidate
->{op_results}[
$j
]);
push
@cases
, {
op
=>
$op
,
expected
=>
$result
};
}
push
$tests_by_thread
[
$i
]->{
$category
}{locale_tests}->@*,
@cases
;
$thread_already_used_locales
{
$locale_name
=~ s/\W.*//r} = 1;
push
$candidate
->{locales}->@*,
shift
$candidate
->{locales}->@*;
push
$all_tests
{
$category
}->@*,
$candidate
;
}
}
my
@cooked_tests
;
for
my
$i
(0 ..
$#tests_by_thread
) {
my
$this_tests
=
$tests_by_thread
[
$i
];
my
@this_cooked_tests
;
my
(
@this_categories
,
@this_locales
);
if
( (
$i
%
$lc_all_frequency
==
$lc_all_frequency
- 1)
&&
$LC_ALL_string
eq
'LC_ALL'
)
{
my
$lc_all
=
""
;
my
$category_number
;
while
(
defined
(
$category_number
= get_next_category())) {
my
$category_name
=
$map_category_number_to_name
{
$category_number
};
my
$locale
=
$this_tests
->{
$category_name
}{locale_name};
$locale
=
"C"
unless
defined
$locale
;
$category_name
=~ s/\@/\\@/g;
$lc_all
.=
$lc_all_separator
if
$lc_all
ne
""
;
if
(
$use_name_value_pairs
) {
$lc_all
.=
$category_name
.
"="
;
}
$lc_all
.=
$locale
;
}
$this_categories
[0] =
$LC_ALL
;
$this_locales
[0] =
$lc_all
;
}
else
{
foreach
my
$category_name
(
sort
keys
$this_tests
->%*) {
push
@this_categories
,
$map_category_name_to_number
{
$category_name
};
push
@this_locales
,
$this_tests
->{
$category_name
}{locale_name};
}
}
while
(
keys
$this_tests
->%*) {
foreach
my
$category_name
(
sort
keys
$this_tests
->%*) {
my
$this_category_tests
=
$this_tests
->{
$category_name
};
my
$test
=
shift
$this_category_tests
->{locale_tests}->@*;
print
STDERR __FILE__,
': '
, __LINE__,
': '
, Dumper
$test
if
$debug
;
if
(!
$test
) {
delete
$this_tests
->{
$category_name
};
next
;
}
$test
->{category_name} =
$category_name
;
my
$locale_name
=
$this_category_tests
->{locale_name};
$test
->{locale_name} =
$locale_name
;
$test
->{codeset} =
$locale_name_to_object
{
$locale_name
}{codeset};
push
@this_cooked_tests
,
$test
;
}
}
push
@cooked_tests
, {
thread
=>
$i
,
categories
=> \
@this_categories
,
locales
=> \
@this_locales
,
tests
=> \
@this_cooked_tests
,
};
}
my
$all_tests_ref
= \
@cooked_tests
;
my
$all_tests_file
= tempfile();
if
(!
defined
store(
$all_tests_ref
,
$all_tests_file
)) {
die
"Could not save the built-up data structure"
;
}
my
$category_number_to_name
= Data::Dumper->Dump(
[ \
%map_category_number_to_name
],
[
'map_category_number_to_name'
]);
my
$switches
=
""
;
$switches
=
"switches => [ -DLv ]"
if
$debug
> 2;
my
$program
=
<<EOT;
BEGIN { \$| = 1; }
my \$debug = $debug;
my \$thread_count = $thread_count;
my \$iterations_per_test_set = $iterations_per_test_set;
my \$iterations = $iterations;
my \$die_on_negative_sleep = $die_on_negative_sleep;
my \$per_thread_startup = $per_thread_startup;
my \$all_tests_file = $all_tests_file;
my \$alarm_clock = $alarm_clock;
EOT
$program
.=
<<'EOT';
use threads;
use strict;
use warnings;
use POSIX qw(locale_h);
use utf8;
use Time::HiRes qw(time usleep);
$|=1;
use Data::Dumper;
$Data::Dumper::Sortkeys=1;
$Data::Dumper::Useqq = 1;
$Data::Dumper::Deepcopy = 1;
# Get the tests stored for us by the setup process
use Storable;
my $all_tests_ref = retrieve($all_tests_file);
if (! defined $all_tests_ref) {
die "Could not restore the built-up data structure";
}
my %corrects;
sub output_test_failure_prefix {
my ($iteration, $category_name, $test) = @_;
my $tid = threads->tid();
print STDERR "\nthread ", $tid,
" failed in iteration $iteration",
" for locale $test->{locale_name}",
" codeset='$test->{codeset}'",
" $category_name",
"\nop='$test->{op}'",
"\nafter getting ", ($corrects{$category_name}
{$test->{locale_name}}
{all} // 0),
" previous correct results for this category and",
" locale,\nincluding ", ($corrects{$category_name}
{$test->{locale_name}}
{$tid} // 0),
" in this thread\n";
}
sub output_test_result($$$) {
my ($type, $result, $utf8_matches) = @_;
no locale;
print STDERR "$type";
my $copy = $result;
if (! $utf8_matches) {
if (utf8::is_utf8($copy)) {
print STDERR " (result already was in UTF-8)";
}
else {
utf8::upgrade($copy);
print STDERR " (result wasn't in UTF-8; converted for easier",
" comparison)";
}
}
print STDERR ":\n";
use Devel::Peek;
Dump $copy;
}
sub iterate { # Run some chunk of iterations of the tests
my ($tid, # Which thread
$initial_iteration, # The number of the first iteration
$count, # How many
$tests_ref) # The tests
= @_;
my $iteration = $initial_iteration;
$count += $initial_iteration;
# Repeatedly ...
while ($iteration < $count) {
my $errors = 0;
use locale;
# ... execute the tests
foreach my $test ($tests_ref->@*) {
# We know what we are expecting
my $expected = $test->{expected};
my $category_name = $test->{category_name};
# And do the test.
my $got = eval $test->{op};
if (! defined $got) {
output_test_failure_prefix($iteration,
$category_name,
$test);
output_test_result("expected", $expected,
1 # utf8ness matches, since only one
);
$errors++;
next;
}
my $utf8ness_matches = ( utf8::is_utf8($got)
== utf8::is_utf8($expected));
my $matched = ($got eq $expected);
if ($matched) {
if ($utf8ness_matches) {
no warnings 'uninitialized';
$corrects{$category_name}{$test->{locale_name}}{all}++;
$corrects{$category_name}{$test->{locale_name}}{$tid}++;
next; # Complete success!
}
}
$errors++;
output_test_failure_prefix($iteration, $category_name, $test);
if ($matched) {
print STDERR "Only difference is UTF8ness of results\n";
}
output_test_result("expected", $expected, $utf8ness_matches);
output_test_result("got", $got, $utf8ness_matches);
} # Loop to do the remaining tests for this iteration
return 0 if $errors;
$iteration++;
# A way to set a gdb break point pp_study
#study if $iteration % 10 == 0;
threads->yield();
}
return 1;
} # End of iterate() definition
EOT
$program
.=
"my $category_number_to_name\n"
;
$program
.=
<<'EOT';
sub setlocales {
# Set each category to the appropriate locale for this test set
my ($categories, $locales) = @_;
for my $i (0 .. $categories->@* - 1) {
if (! setlocale($categories->[$i], $locales->[$i])) {
my $category_name =
$map_category_number_to_name->{$categories->[$i]};
print STDERR "\nthread ", threads->tid(),
" setlocale($category_name ($categories->[$i]),",
" $locales->[$i]) failed\n";
return 0;
}
}
return 1;
}
my $startup_insurance = 1;
my $future = $startup_insurance + $thread_count * $per_thread_startup;
my $starting_time = time() + $future;
sub wait_until_time {
# Sleep until the time when all the threads are due to wake up, so
# they run as simultaneously as we can make it.
my $sleep_time = ($starting_time - time());
#printf STDERR "thread %d started, sleeping %g sec\n",
# threads->tid, $sleep_time;
if ($sleep_time < 0 && $die_on_negative_sleep) {
# What the start time should have been
my $a_better_future = $future - $sleep_time;
my $better_per_thread =
($a_better_future - $startup_insurance) / $thread_count;
printf STDERR "$per_thread_startup would need to be %g",
" for thread %d to have started\nin sync with",
" the other threads\n",
$better_per_thread, threads->tid;
die "Thread started too late";
}
else {
usleep($sleep_time * 1_000_000) if $sleep_time > 0;
}
}
# Create all the subthreads: 1..n
my @threads = map +threads->create(sub {
$SIG{'KILL'} = sub { threads->exit(); };
my $thread = shift;
# Start out with the set of tests whose number is the same as the
# thread number
my $test_set = $thread;
wait_until_time();
# Loop through all the iterations for this thread
my $this_iteration_start = 1;
do {
# Set up each category with its locale;
my $this_ref = $all_tests_ref->[$test_set];
return 0 unless setlocales($this_ref->{categories},
$this_ref->{locales});
# Then run one batch of iterations
my $result = iterate($thread,
$this_iteration_start,
$iterations_per_test_set,
$this_ref->{tests});
return 0 if $result == 0; # Quit if failed
# Next iteration will shift to use a different set of locales for
# each category
$test_set++;
$test_set = 0 if $test_set >= $thread_count;
$this_iteration_start += $iterations_per_test_set;
} while ($this_iteration_start <= $iterations);
return 1; # Success
}, $_), (1..$thread_count - 1); # For each non-0 thread
# Here is thread 0. We do a smaller chunk of iterations in it; then
# join whatever threads have finished so far, then do another chunk.
# This tests for bugs that arise as a result of joining.
my %thread0_corrects = ();
my $this_iteration_start = 1;
my $result = 1; # So far, everything is ok
my $test_set = -1; # Start with 0th test set
wait_until_time();
alarm($alarm_clock); # Guard against hangs
do {
# Next time, we'll use the next test set
$test_set++;
$test_set = 0 if $test_set >= $thread_count;
my $this_ref = $all_tests_ref->[$test_set];
# set the locales for this test set. Do this even if we
# are going to bail, so that it will be set correctly for the final
# batch after the loop.
$result &= setlocales($this_ref->{categories}, $this_ref->{locales});
if ($debug > 1) {
my @joinable = threads->list(threads::joinable);
if (@joinable) {
print STDERR "In thread 0, before iteration ",
$this_iteration_start,
" these threads are done: ",
join (", ", map { $_->tid() } @joinable),
"\n";
}
}
# Join anything already finished.
for my $thread (threads->list(threads::joinable)) {
my $thread_result = $thread->join;
if ($debug > 1) {
print STDERR "In thread 0, before iteration ",
$this_iteration_start,
" joining thread ", $thread->tid(),
"; result=", ((defined $thread_result)
? $thread_result
: "undef"),
"\n";
}
# If the thread failed badly, stop testing anything else.
if (! defined $thread_result) {
$_->kill('KILL')->detach() for threads->list();
print 0;
exit;
}
# Update the status
$result &= $thread_result;
}
# Do a chunk of iterations on this thread 0.
$result &= iterate(0,
$this_iteration_start,
$iterations_per_test_set,
$this_ref->{tests},
\%thread0_corrects);
$this_iteration_start += $iterations_per_test_set;
# And repeat as long as there are other tests
} while (threads->list(threads::all));
print $result;
EOT
fresh_perl_is(
$program
,
1,
{
eval
$switches
},
"Verify there were no failures with simultaneous running threads"
);
}