Legal \\p{} and \\P{} constructs that match no characters
Unicode has some property-value pairs that currently don't match anything. This happens generally either because they are obsolete, or they exist for symmetry with other forms, but no language has yet been encoded that uses them. In this version of Unicode, the following match zero code points:
$zero_matches
END }
# Generate list of properties that we don't accept, grouped by the reasons
# why.  This is so only put out the 'why' once, and then list all the
# properties that have that reason under it.
my %why_list;   # The keys are the reasons; the values are lists of
                # properties that have the key as their reason
# For each property, add it to the list that are suppressed for its reason
# The sort will cause the alphabetically first properties to be added to
# each list first, so each list will be sorted.
foreach my $property (sort keys %why_suppressed) {
    push @{$why_list{$why_suppressed{$property}}}, $property;
}
# For each reason (sorted by the first property that has that reason)...
my @bad_re_properties;
foreach my $why (sort { $why_list{$a}->[0] cmp $why_list{$b}->[0] }
                 keys %why_list)
{
    # Add to the output, all the properties that have that reason.
    my $has_item = 0;   # Flag if actually output anything.
    foreach my $name (@{$why_list{$why}}) {
        # Split compound names into $property and $table components
        my $property = $name;
        my $table;
        if ($property =~ / (.*) = (.*) /x) {
            $property = $1;
            $table = $2;
        }
        # This release of Unicode may not have a property that is
        # suppressed, so don't reference a non-existent one.
        $property = property_ref($property);
        next if ! defined $property;
        # And since this list is only for match tables, don't list the
        # ones that don't have match tables.
        next if ! $property->to_create_match_tables;
        # Find any abbreviation, and turn it into a compound name if this
        # is a property=value pair.
        my $short_name = $property->name;
        $short_name .= '=' . $property->table($table)->name if $table;
        # Start with an empty line.
        push @bad_re_properties, "\n\n" unless $has_item;
        # And add the property as an item for the reason.
        push @bad_re_properties, "\n=item I<$name> ($short_name)\n";
        $has_item = 1;
    }
    # And add the reason under the list of properties, if such a list
    # actually got generated.  Note that the header got added
    # unconditionally before.  But pod ignores extra blank lines, so no
    # harm.
    push @bad_re_properties, "\n$why\n" if $has_item;
} # End of looping through each reason.
if (! @bad_re_properties) {
    push @bad_re_properties,
            "*** This installation accepts ALL non-Unihan properties ***";
}
else {
    # Add =over only if non-empty to avoid an empty =over/=back section,
    # which is considered bad form.
    unshift @bad_re_properties, "\n=over 4\n";
    push @bad_re_properties, "\n=back\n";
}
# Similiarly, generate a list of files that we don't use, grouped by the
# reasons why.  First, create a hash whose keys are the reasons, and whose
# values are anonymous arrays of all the files that share that reason.
my %grouped_by_reason;
foreach my $file (keys %ignored_files) {
    push @{$grouped_by_reason{$ignored_files{$file}}}, $file;
}
foreach my $file (keys %skipped_files) {
    push @{$grouped_by_reason{$skipped_files{$file}}}, $file;
}
# Then, sort each group.
foreach my $group (keys %grouped_by_reason) {
    @{$grouped_by_reason{$group}} = sort { lc $a cmp lc $b }
                                    @{$grouped_by_reason{$group}} ;
}
# Finally, create the output text.  For each reason (sorted by the
# alphabetically first file that has that reason)...
my @unused_files;
foreach my $reason (sort { lc $grouped_by_reason{$a}->[0]
                           cmp lc $grouped_by_reason{$b}->[0]
                          }
                     keys %grouped_by_reason)
{
    # Add all the files that have that reason to the output.  Start
    # with an empty line.
    push @unused_files, "\n\n";
    push @unused_files, map { "\n=item F<$_> \n" }
                        @{$grouped_by_reason{$reason}};
    # And add the reason under the list of files
    push @unused_files, "\n$reason\n";
}
# Similarly, create the output text for the UCD section of the pod
my @ucd_pod;
foreach my $key (keys %ucd_pod) {
    next unless $ucd_pod{$key}->{'output_this'};
    push @ucd_pod, format_pod_line($indent_info_column,
                                   $ucd_pod{$key}->{'name'},
                                   $ucd_pod{$key}->{'info'},
                                   $ucd_pod{$key}->{'status'},
                                  );
}
# Sort alphabetically, and fold for output
@ucd_pod = sort { lc substr($a, 2) cmp lc substr($b, 2) } @ucd_pod;
my $ucd_pod = simple_fold(\@ucd_pod,
                       ' ',
                       $indent_info_column,
                       $automatic_pod_indent);
$ucd_pod =  format_pod_line($indent_info_column, 'NAME', '  INFO')
            . "\n"
            . $ucd_pod;
local $" = "";
# Everything is ready to assemble.
my @OUT = << "END";
=begin comment$HEADER
To change this file, edit $0 instead.
NAME
$pod_file - Index of Unicode Version $string_version character properties in Perl
DESCRIPTION
This document provides information about the portion of the Unicode database that deals with character properties, that is the portion that is defined on single code points. ("Other information in the Unicode data base" below briefly mentions other data that Unicode provides.)
Perl can provide access to all non-provisional Unicode character properties, though not all are enabled by default. The omitted ones are the Unihan properties (accessible via the CPAN module Unicode::Unihan) and certain deprecated or Unicode-internal properties. (An installation may choose to recompile Perl's tables to change this. See "Unicode character properties that are NOT accepted by Perl".)
For most purposes, access to Unicode properties from the Perl core is through regular expression matches, as described in the next section. For some special purposes, and to access the properties that are not suitable for regular expression matching, all the Unicode character properties that Perl handles are accessible via the standard Unicode::UCD module, as described in the section "Properties accessible through Unicode::UCD".
Perl also provides some additional extensions and short-cut synonyms for Unicode properties.
This document merely lists all available properties and does not attempt to explain what each property really means. There is a brief description of each Perl extension; see "Other Properties" in perlunicode for more information on these. There is some detail about Blocks, Scripts, General_Category, and Bidi_Class in perlunicode, but to find out about the intricacies of the official Unicode properties, refer to the Unicode standard. A good starting place is $unicode_reference_url.
Note that you can define your own properties; see "User-Defined Character Properties" in perlunicode.
Properties accessible through \\p{} and \\P{}
The Perl regular expression \\p{} and \\P{} constructs give access to most of the Unicode character properties. The table below shows all these constructs, both single and compound forms.
Compound forms consist of two components, separated by an equals sign or a colon. The first component is the property name, and the second component is the particular value of the property to match against, for example, \\p{Script: Greek} and \\p{Script=Greek} both mean to match characters whose Script property is Greek.
Single forms, like \\p{Greek}, are mostly Perl-defined shortcuts for their equivalent compound forms. The table shows these equivalences. (In our example, \\p{Greek} is a just a shortcut for \\p{Script=Greek}.) There are also a few Perl-defined single forms that are not shortcuts for a compound form. One such is \\p{Word}. These are also listed in the table.
In parsing these constructs, Perl always ignores Upper/lower case differences everywhere within the {braces}. Thus \\p{Greek} means the same thing as \\p{greek}. But note that changing the case of the "p" or "P" before the left brace completely changes the meaning of the construct, from "match" (for \\p{}) to "doesn't match" (for \\P{}). Casing in this document is for improved legibility.
Also, white space, hyphens, and underscores are normally ignored everywhere between the {braces}, and hence can be freely added or removed even if the /x modifier hasn't been specified on the regular expression. But $a_bold_stricter at the beginning of an entry in the table below means that tighter (stricter) rules are used for that entry:
- Single form (\\p{name}) tighter rules:
- 
White space, hyphens, and underscores ARE significant except for: - white space adjacent to a non-word character 
- underscores separating digits in numbers 
 That means, for example, that you can freely add or remove white space adjacent to (but within) the braces without affecting the meaning. 
- Compound form (\\p{name=value}or\\p{name:value}) tighter rules:
- 
The tighter rules given above for the single form apply to everything to the right of the colon or equals; the looser rules still apply to everything to the left. That means, for example, that you can freely add or remove white space adjacent to (but within) the braces and the colon or equal sign. 
Some properties are considered obsolete by Unicode, but still available. There are several varieties of obsolescence:
- Stabilized
- 
A property may be stabilized. Such a determination does not indicate that the property should or should not be used; instead it is a declaration that the property will not be maintained nor extended for newly encoded characters. Such properties are marked with $a_bold_stabilized in the table. 
- Deprecated
- 
A property may be deprecated, perhaps because its original intent has been replaced by another property, or because its specification was somehow defective. This means that its use is strongly discouraged, so much so that a warning will be issued if used, unless the regular expression is in the scope of a no warnings 'deprecated'statement. $A_bold_deprecated flags each such entry in the table, and the entry there for the longest, most descriptive version of the property will give the reason it is deprecated, and perhaps advice. Perl may issue such a warning, even for properties that aren't officially deprecated by Unicode, when there used to be characters or code points that were matched by them, but no longer. This is to warn you that your program may not work like it did on earlier Unicode releases.A deprecated property may be made unavailable in a future Perl version, so it is best to move away from them. A deprecated property may also be stabilized, but this fact is not shown. 
- Obsolete
- 
Properties marked with $a_bold_obsolete in the table are considered (plain) obsolete. Generally this designation is given to properties that Unicode once used for internal purposes (but not any longer). 
Some Perl extensions are present for backwards compatibility and are discouraged from being used, but are not obsolete. $A_bold_discouraged flags each such entry in the table. Future Unicode versions may force some of these extensions to be removed without warning, replaced by another property with the same name that means something different. Use the equivalent shown instead.
@block_warning
The table below has two columns. The left column contains the \\p{} constructs to look up, possibly preceded by the flags mentioned above; and the right column contains information about them, like a description, or synonyms. It shows both the single and compound forms for each property that has them. If the left column is a short name for a property, the right column will give its longer, more descriptive name; and if the left column is the longest name, the right column will show any equivalent shortest name, in both single and compound forms if applicable.
The right column will also caution you if a property means something different than what might normally be expected.
All single forms are Perl extensions; a few compound forms are as well, and are noted as such.
Numbers in (parentheses) indicate the total number of code points matched by the property. For emphasis, those properties that match no code points at all are listed as well in a separate section following the table.
Most properties match the same code points regardless of whether "/i" case-insensitive matching is specified or not. But a few properties are affected. These are shown with the notation
(/i= other_property)in the second column. Under case-insensitive matching they match the same code pode points as the property "other_property".
There is no description given for most non-Perl defined properties (See $unicode_reference_url for that).
For compactness, '*' is used as a wildcard instead of showing all possible combinations. For example, entries like:
\\p{Gc: *}                                  \\p{General_Category: *}mean that 'Gc' is a synonym for 'General_Category', and anything that is valid for the latter is also valid for the former. Similarly,
\\p{Is_*}                                   \\p{*}means that if and only if, for example, \\p{Foo} exists, then \\p{Is_Foo} and \\p{IsFoo} are also valid and all mean the same thing. And similarly, \\p{Foo=Bar} means the same as \\p{Is_Foo=Bar} and \\p{IsFoo=Bar}. "*" here is restricted to something not beginning with an underscore.
Also, in binary properties, 'Yes', 'T', and 'True' are all synonyms for 'Y'. And 'No', 'F', and 'False' are all synonyms for 'N'. The table shows 'Y*' and 'N*' to indicate this, and doesn't have separate entries for the other possibilities. Note that not all properties which have values 'Yes' and 'No' are binary, and they have all their values spelled out without using this wild card, and a NOT clause in their description that highlights their not being binary. These also require the compound form to match them, whereas true binary properties have both single and compound forms available.
Note that all non-essential underscores are removed in the display of the short names below.
Legend summary:
- * is a wild-card
- (\\d+) in the info column gives the number of code points matched by this property.
- $DEPRECATED means this is deprecated.
- $OBSOLETE means this is obsolete.
- $STABILIZED means this is stabilized.
- $STRICTER means tighter (stricter) name matching applies.
- $DISCOURAGED means use of this form is discouraged, and may not be stable.
$formatted_properties
$zero_matches
Properties accessible through Unicode::UCD
All the Unicode character properties mentioned above (except for those marked as for internal use by Perl) are also accessible by "prop_invlist()" in Unicode::UCD.
Due to their nature, not all Unicode character properties are suitable for regular expression matches, nor prop_invlist(). The remaining non-provisional, non-internal ones are accessible via "prop_invmap()" in Unicode::UCD (except for those that this Perl installation hasn't included; see below for which those are).
For compatibility with other parts of Perl, all the single forms given in the table in the section above are recognized. BUT, there are some ambiguities between some Perl extensions and the Unicode properties, all of which are silently resolved in favor of the official Unicode property. To avoid surprises, you should only use prop_invmap() for forms listed in the table below, which omits the non-recommended ones. The affected forms are the Perl single form equivalents of Unicode properties, such as \\p{sc} being a single-form equivalent of \\p{gc=sc}, which is treated by prop_invmap() as the Script property, whose short name is sc. The table indicates the current ambiguities in the INFO column, beginning with the word "NOT".
The standard Unicode properties listed below are documented in $unicode_reference_url; Perl_Decimal_Digit is documented in "prop_invmap()" in Unicode::UCD. The other Perl extensions are in "Other Properties" in perlunicode;
The first column in the table is a name for the property; the second column is an alternative name, if any, plus possibly some annotations. The alternative name is the property's full name, unless that would simply repeat the first column, in which case the second column indicates the property's short name (if different). The annotations are given only in the entry for the full name. If a property is obsolete, etc, the entry will be flagged with the same characters used in the table in the section above, like $DEPRECATED or $STABILIZED.
$ucd_pod
Properties accessible through other means
Certain properties are accessible also via core function calls. These are:
Lowercase_Mapping          lc() and lcfirst()
Titlecase_Mapping          ucfirst()
Uppercase_Mapping          uc()Also, Case_Folding is accessible through the /i modifier in regular expressions, the \\F transliteration escape, and the fc operator.
And, the Name and Name_Aliases properties are accessible through the \\N{} interpolation in double-quoted strings and regular expressions; and functions charnames::viacode(), charnames::vianame(), and charnames::string_vianame() (which require a use charnames (); to be specified.
Finally, most properties related to decomposition are accessible via Unicode::Normalize.
Unicode character properties that are NOT accepted by Perl
Perl will generate an error for a few character properties in Unicode when used in a regular expression. The non-Unihan ones are listed below, with the reasons they are not accepted, perhaps with work-arounds. The short names for the properties are listed enclosed in (parentheses). As described after the list, an installation can change the defaults and choose to accept any of these. The list is machine generated based on the choices made for the installation that generated this document.
@bad_re_properties
An installation can choose to allow any of these to be matched by downloading the Unicode database from http://www.unicode.org/Public/ to \$Config{privlib}/unicore/ in the Perl source tree, changing the controlling lists contained in the program \$Config{privlib}/unicore/mktables and then re-compiling and installing. (\%Config is available from the Config module).
Other information in the Unicode data base
The Unicode data base is delivered in two different formats. The XML version is valid for more modern Unicode releases. The other version is a collection of files. The two are intended to give equivalent information. Perl uses the older form; this allows you to recompile Perl to use early Unicode releases.
The only non-character property that Perl currently supports is Named Sequences, in which a sequence of code points is given a name and generally treated as a single entity. (Perl supports these via the \\N{...} double-quotish construct, "charnames::string_vianame(name)" in charnames, and "namedseq()" in Unicode::UCD.
Below is a list of the files in the Unicode data base that Perl doesn't currently use, along with very brief descriptions of their purposes. Some of the names of the files have been shortened from those that Unicode uses, in order to allow them to be distinguishable from similarly named files on file systems for which only the first 8 characters of a name are significant.
@unused_files
SEE ALSO
END
# And write it.  The 0 means no utf8.
main::write([ $pod_directory, "$pod_file.pod" ], 0, \@OUT);
return;
}sub make_Heavy () { # Create and write Heavy.pl, which passes info about the tables to # utf8_heavy.pl
# Stringify structures for output
my $loose_property_name_of
                       = simple_dumper(\%loose_property_name_of, ' ' x 4);
chomp $loose_property_name_of;
my $stricter_to_file_of = simple_dumper(\%stricter_to_file_of, ' ' x 4);
chomp $stricter_to_file_of;
my $loose_to_file_of = simple_dumper(\%loose_to_file_of, ' ' x 4);
chomp $loose_to_file_of;
my $nv_floating_to_rational
                       = simple_dumper(\%nv_floating_to_rational, ' ' x 4);
chomp $nv_floating_to_rational;
my $why_deprecated = simple_dumper(\%utf8::why_deprecated, ' ' x 4);
chomp $why_deprecated;
# We set the key to the file when we associated files with tables, but we
# couldn't do the same for the value then, as we might not have the file
# for the alternate table figured out at that time.
foreach my $cased (keys %caseless_equivalent_to) {
    my @path = $caseless_equivalent_to{$cased}->file_path;
    my $path = join '/', @path[1, -1];
    $caseless_equivalent_to{$cased} = $path;
}
my $caseless_equivalent_to
                       = simple_dumper(\%caseless_equivalent_to, ' ' x 4);
chomp $caseless_equivalent_to;
my $loose_property_to_file_of
                    = simple_dumper(\%loose_property_to_file_of, ' ' x 4);
chomp $loose_property_to_file_of;
my $file_to_swash_name = simple_dumper(\%file_to_swash_name, ' ' x 4);
chomp $file_to_swash_name;
my @heavy = <<END;
$HEADER
$INTERNAL_ONLY_HEADER# This file is for the use of utf8_heavy.pl and Unicode::UCD
# Maps Unicode (not Perl single-form extensions) property names in loose # standard form to their corresponding standard names \%utf8::loose_property_name_of = ( $loose_property_name_of );
# Maps property, table to file for those using stricter matching \%utf8::stricter_to_file_of = ( $stricter_to_file_of );
# Maps property, table to file for those using loose matching \%utf8::loose_to_file_of = ( $loose_to_file_of );
# Maps floating point to fractional form \%utf8::nv_floating_to_rational = ( $nv_floating_to_rational );
# If a floating point number doesn't have enough digits in it to get this # close to a fraction, it isn't considered to be that fraction even if all the # digits it does have match. \$utf8::max_floating_slop = $MAX_FLOATING_SLOP;
# Deprecated tables to generate a warning for. The key is the file containing # the table, so as to avoid duplication, as many property names can map to the # file, but we only need one entry for all of them. \%utf8::why_deprecated = ( $why_deprecated );
# A few properties have different behavior under /i matching. This maps # those to substitute files to use under /i. \%utf8::caseless_equivalent = ( $caseless_equivalent_to );
# Property names to mapping files \%utf8::loose_property_to_file_of = ( $loose_property_to_file_of );
# Files to the swash names within them. \%utf8::file_to_swash_name = ( $file_to_swash_name );
1; END
main::write("Heavy.pl", 0, \@heavy);  # The 0 means no utf8.
return;
}sub make_Name_pm () { # Create and write Name.pm, which contains subroutines and data to use in # conjunction with Name.pl
# Maybe there's nothing to do.
return unless $has_hangul_syllables || @code_points_ending_in_code_point;
my @name = <<END;
$HEADER
$INTERNAL_ONLY_HEADER
END
# Convert these structures to output format.
my $code_points_ending_in_code_point =
    main::simple_dumper(\@code_points_ending_in_code_point,
                        ' ' x 8);
my $names = main::simple_dumper(\%names_ending_in_code_point,
                                ' ' x 8);
my $loose_names = main::simple_dumper(\%loose_names_ending_in_code_point,
                                ' ' x 8);
# Do the same with the Hangul names,
my $jamo;
my $jamo_l;
my $jamo_v;
my $jamo_t;
my $jamo_re;
if ($has_hangul_syllables) {
    # Construct a regular expression of all the possible
    # combinations of the Hangul syllables.
    my @L_re;   # Leading consonants
    for my $i ($LBase .. $LBase + $LCount - 1) {
        push @L_re, $Jamo{$i}
    }
    my @V_re;   # Middle vowels
    for my $i ($VBase .. $VBase + $VCount - 1) {
        push @V_re, $Jamo{$i}
    }
    my @T_re;   # Trailing consonants
    for my $i ($TBase + 1 .. $TBase + $TCount - 1) {
        push @T_re, $Jamo{$i}
    }
    # The whole re is made up of the L V T combination.
    $jamo_re = '('
                . join ('|', sort @L_re)
                . ')('
                . join ('|', sort @V_re)
                . ')('
                . join ('|', sort @T_re)
                . ')?';
    # These hashes needed by the algorithm were generated
    # during reading of the Jamo.txt file
    $jamo = main::simple_dumper(\%Jamo, ' ' x 8);
    $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8);
    $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8);
    $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8);
}
push @name, <<END;package charnames;
# This module contains machine-generated tables and code for the # algorithmically-determinable Unicode character names. The following # routines can be used to translate between name and code point and vice versa
{ # Closure
# Matches legal code point.  4-6 hex numbers, If there are 6, the first
# two must be 10; if there are 5, the first must not be a 0.  Written this
# way to decrease backtracking.  The first regex allows the code point to
# be at the end of a word, but to work properly, the word shouldn't end
# with a valid hex character.  The second one won't match a code point at
# the end of a word, and doesn't have the run-on issue
my \$run_on_code_point_re = qr/$run_on_code_point_re/;
my \$code_point_re = qr/$code_point_re/;
# In the following hash, the keys are the bases of names which include
# the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01.  The value
# of each key is another hash which is used to get the low and high ends
# for each range of code points that apply to the name.
my %names_ending_in_code_point = (
$names
);
# The following hash is a copy of the previous one, except is for loose
# matching, so each name has blanks and dashes squeezed out
my %loose_names_ending_in_code_point = (
$loose_names
);
# And the following array gives the inverse mapping from code points to
# names.  Lowest code points are first
my \@code_points_ending_in_code_point = (
$code_points_ending_in_code_point
);
END
# Earlier releases didn't have Jamos.  No sense outputting
# them unless will be used.
if ($has_hangul_syllables) {
    push @name, <<END;
# Convert from code point to Jamo short name for use in composing Hangul
# syllable names
my %Jamo = (
$jamo
);
# Leading consonant (can be null)
my %Jamo_L = (
$jamo_l
);
# Vowel
my %Jamo_V = (
$jamo_v
);
# Optional trailing consonant
my %Jamo_T = (
$jamo_t
);
# Computed re that splits up a Hangul name into LVT or LV syllables
my \$syllable_re = qr/$jamo_re/;
my \$HANGUL_SYLLABLE = "HANGUL SYLLABLE ";
my \$loose_HANGUL_SYLLABLE = "HANGULSYLLABLE";
# These constants names and values were taken from the Unicode standard,
# version 5.1, section 3.12.  They are used in conjunction with Hangul
# syllables
my \$SBase = $SBase_string;
my \$LBase = $LBase_string;
my \$VBase = $VBase_string;
my \$TBase = $TBase_string;
my \$SCount = $SCount;
my \$LCount = $LCount;
my \$VCount = $VCount;
my \$TCount = $TCount;
my \$NCount = \$VCount * \$TCount;
END
} # End of has Jamos
push @name, << 'END';
sub name_to_code_point_special {
    my ($name, $loose) = @_;
    # Returns undef if not one of the specially handled names; otherwise
    # returns the code point equivalent to the input name
    # $loose is non-zero if to use loose matching, 'name' in that case
    # must be input as upper case with all blanks and dashes squeezed out.
END
if ($has_hangul_syllables) {
    push @name, << 'END';
    if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//)
        || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//))
    {
        return if $name !~ qr/^$syllable_re$/;
        my $L = $Jamo_L{$1};
        my $V = $Jamo_V{$2};
        my $T = (defined $3) ? $Jamo_T{$3} : 0;
        return ($L * $VCount + $V) * $TCount + $T + $SBase;
    }
END
}
push @name, << 'END';
    # Name must end in 'code_point' for this to handle.
    return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x)
               || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x));
    my $base = $1;
    my $code_point = CORE::hex $2;
    my $names_ref;
    if ($loose) {
        $names_ref = \%loose_names_ending_in_code_point;
    }
    else {
        return if $base !~ s/-$//;
        $names_ref = \%names_ending_in_code_point;
    }
    # Name must be one of the ones which has the code point in it.
    return if ! $names_ref->{$base};
    # Look through the list of ranges that apply to this name to see if
    # the code point is in one of them.
    for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) {
        return if $names_ref->{$base}{'low'}->[$i] > $code_point;
        next if $names_ref->{$base}{'high'}->[$i] < $code_point;
        # Here, the code point is in the range.
        return $code_point;
    }
    # Here, looked like the name had a code point number in it, but
    # did not match one of the valid ones.
    return;
}
sub code_point_to_name_special {
    my $code_point = shift;
    # Returns the name of a code point if algorithmically determinable;
    # undef if not
END
if ($has_hangul_syllables) {
    push @name, << 'END';
    # If in the Hangul range, calculate the name based on Unicode's
    # algorithm
    if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) {
        use integer;
        my $SIndex = $code_point - $SBase;
        my $L = $LBase + $SIndex / $NCount;
        my $V = $VBase + ($SIndex % $NCount) / $TCount;
        my $T = $TBase + $SIndex % $TCount;
        $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}";
        $name .= $Jamo{$T} if $T != $TBase;
        return $name;
    }
END
}
push @name, << 'END';
    # Look through list of these code points for one in range.
    foreach my $hash (@code_points_ending_in_code_point) {
        return if $code_point < $hash->{'low'};
        if ($code_point <= $hash->{'high'}) {
            return sprintf("%s-%04X", $hash->{'name'}, $code_point);
        }
    }
    return;            # None found
}
} # End closure1; END
main::write("Name.pm", 0, \@name);  # The 0 means no utf8.
return;
}sub make_UCD () { # Create and write UCD.pl, which passes info about the tables to # Unicode::UCD
# Create a mapping from each alias of Perl single-form extensions to all
# its equivalent aliases, for quick look-up.
my %perlprop_to_aliases;
foreach my $table ($perl->tables) {
    # First create the list of the aliases of each extension
    my @aliases_list;    # List of legal aliases for this extension
    my $table_name = $table->name;
    my $standard_table_name = standardize($table_name);
    my $table_full_name = $table->full_name;
    my $standard_table_full_name = standardize($table_full_name);
    # Make sure that the list has both the short and full names
    push @aliases_list, $table_name, $table_full_name;
    my $found_ucd = 0;  # ? Did we actually get an alias that should be
                        # output for this table
    # Go through all the aliases (including the two just added), and add
    # any new unique ones to the list
    foreach my $alias ($table->aliases) {
        # Skip non-legal names
        next unless $alias->ok_as_filename;
        next unless $alias->ucd;
        $found_ucd = 1;     # have at least one legal name
        my $name = $alias->name;
        my $standard = standardize($name);
        # Don't repeat a name that is equivalent to one already on the
        # list
        next if $standard eq $standard_table_name;
        next if $standard eq $standard_table_full_name;
        push @aliases_list, $name;
    }
    # If there were no legal names, don't output anything.
    next unless $found_ucd;
    # To conserve memory in the program reading these in, omit full names
    # that are identical to the short name, when those are the only two
    # aliases for the property.
    if (@aliases_list == 2 && $aliases_list[0] eq $aliases_list[1]) {
        pop @aliases_list;
    }
    # Here, @aliases_list is the list of all the aliases that this
    # extension legally has.  Now can create a map to it from each legal
    # standardized alias
    foreach my $alias ($table->aliases) {
        next unless $alias->ucd;
        next unless $alias->ok_as_filename;
        push @{$perlprop_to_aliases{standardize($alias->name)}},
             @aliases_list;
    }
}
# Make a list of all combinations of properties/values that are suppressed.
my @suppressed;
if (! $debug_skip) {    # This tends to fail in this debug mode
    foreach my $property_name (keys %why_suppressed) {
        # Just the value
        my $value_name = $1 if $property_name =~ s/ = ( .* ) //x;
        # The hash may contain properties not in this release of Unicode
        next unless defined (my $property = property_ref($property_name));
        # Find all combinations
        foreach my $prop_alias ($property->aliases) {
            my $prop_alias_name = standardize($prop_alias->name);
            # If no =value, there's just one combination possibe for this
            if (! $value_name) {
                # The property may be suppressed, but there may be a proxy
                # for it, so it shouldn't be listed as suppressed
                next if $prop_alias->ucd;
                push @suppressed, $prop_alias_name;
            }
            else {  # Otherwise
                foreach my $value_alias
                                ($property->table($value_name)->aliases)
                {
                    next if $value_alias->ucd;
                    push @suppressed, "$prop_alias_name="
                                    .  standardize($value_alias->name);
                }
            }
        }
    }
}
@suppressed = sort @suppressed; # So doesn't change between runs of this
                                # program
# Convert the structure below (designed for Name.pm) to a form that UCD
# wants, so it doesn't have to modify it at all; i.e. so that it includes
# an element for the Hangul syllables in the appropriate place, and
# otherwise changes the name to include the "-<code point>" suffix.
my @algorithm_names;
my $done_hangul = 0;
# Copy it linearly.
for my $i (0 .. @code_points_ending_in_code_point - 1) {
    # Insert the hanguls in the correct place.
    if (! $done_hangul
        && $code_points_ending_in_code_point[$i]->{'low'} > $SBase)
    {
        $done_hangul = 1;
        push @algorithm_names, { low => $SBase,
                                 high => $SBase + $SCount - 1,
                                 name => '<hangul syllable>',
                                };
    }
    # Copy the current entry, modified.
    push @algorithm_names, {
        low => $code_points_ending_in_code_point[$i]->{'low'},
        high => $code_points_ending_in_code_point[$i]->{'high'},
        name =>
           "$code_points_ending_in_code_point[$i]->{'name'}-<code point>",
    };
}
# Serialize these structures for output.
my $loose_to_standard_value
                      = simple_dumper(\%loose_to_standard_value, ' ' x 4);
chomp $loose_to_standard_value;
my $string_property_loose_to_name
                = simple_dumper(\%string_property_loose_to_name, ' ' x 4);
chomp $string_property_loose_to_name;
my $perlprop_to_aliases = simple_dumper(\%perlprop_to_aliases, ' ' x 4);
chomp $perlprop_to_aliases;
my $prop_aliases = simple_dumper(\%prop_aliases, ' ' x 4);
chomp $prop_aliases;
my $prop_value_aliases = simple_dumper(\%prop_value_aliases, ' ' x 4);
chomp $prop_value_aliases;
my $suppressed = (@suppressed) ? simple_dumper(\@suppressed, ' ' x 4) : "";
chomp $suppressed;
my $algorithm_names = simple_dumper(\@algorithm_names, ' ' x 4);
chomp $algorithm_names;
my $ambiguous_names = simple_dumper(\%ambiguous_names, ' ' x 4);
chomp $ambiguous_names;
my $loose_defaults = simple_dumper(\%loose_defaults, ' ' x 4);
chomp $loose_defaults;
my @ucd = <<END;
$HEADER
$INTERNAL_ONLY_HEADER# This file is for the use of Unicode::UCD
# Highest legal Unicode code point \$Unicode::UCD::MAX_UNICODE_CODEPOINT = 0x$MAX_UNICODE_CODEPOINT_STRING;
# Hangul syllables \$Unicode::UCD::HANGUL_BEGIN = $SBase_string; \$Unicode::UCD::HANGUL_COUNT = $SCount;
# Keys are all the possible "prop=value" combinations, in loose form; values # are the standard loose name for the 'value' part of the key \%Unicode::UCD::loose_to_standard_value = ( $loose_to_standard_value );
# String property loose names to standard loose name \%Unicode::UCD::string_property_loose_to_name = ( $string_property_loose_to_name );
# Keys are Perl extensions in loose form; values are each one's list of # aliases \%Unicode::UCD::loose_perlprop_to_name = ( $perlprop_to_aliases );
# Keys are standard property name; values are each one's aliases \%Unicode::UCD::prop_aliases = ( $prop_aliases );
# Keys of top level are standard property name; values are keys to another # hash, Each one is one of the property's values, in standard form. The # values are that prop-val's aliases. If only one specified, the short and # long alias are identical. \%Unicode::UCD::prop_value_aliases = ( $prop_value_aliases );
# Ordered (by code point ordinal) list of the ranges of code points whose # names are algorithmically determined. Each range entry is an anonymous hash # of the start and end points and a template for the names within it. \@Unicode::UCD::algorithmic_named_code_points = ( $algorithm_names );
# The properties that as-is have two meanings, and which must be disambiguated \%Unicode::UCD::ambiguous_names = ( $ambiguous_names );
# Keys are the prop-val combinations which are the default values for the # given property, expressed in standard loose form \%Unicode::UCD::loose_defaults = ( $loose_defaults );
# All combinations of names that are suppressed. # This is actually for UCD.t, so it knows which properties shouldn't have # entries. If it got any bigger, would probably want to put it in its own # file to use memory only when it was needed, in testing. \@Unicode::UCD::suppressed_properties = ( $suppressed );
1; END
main::write("UCD.pl", 0, \@ucd);  # The 0 means no utf8.
return;
}sub write_all_tables() { # Write out all the tables generated by this program to files, as well as # the supporting data structures, pod file, and .t file.
my @writables;              # List of tables that actually get written
my %match_tables_to_write;  # Used to collapse identical match tables
                            # into one file.  Each key is a hash function
                            # result to partition tables into buckets.
                            # Each value is an array of the tables that
                            # fit in the bucket.
# For each property ...
# (sort so that if there is an immutable file name, it has precedence, so
# some other property can't come in and take over its file name.  (We
# don't care if both defined, as they had better be different anyway.)
# The property named 'Perl' needs to be first (it doesn't have any
# immutable file name) because empty properties are defined in terms of
# it's table named 'Any'.)   We also sort by the property's name.  This is
# just for repeatability of the outputs between runs of this program, but
# does not affect correctness.
PROPERTY:
foreach my $property ($perl,
                      sort { return -1 if defined $a->file;
                             return 1 if defined $b->file;
                             return $a->name cmp $b->name;
                            } grep { $_ != $perl } property_ref('*'))
{
    my $type = $property->type;
    # And for each table for that property, starting with the mapping
    # table for it ...
    TABLE:
    foreach my $table($property,
                    # and all the match tables for it (if any), sorted so
                    # the ones with the shortest associated file name come
                    # first.  The length sorting prevents problems of a
                    # longer file taking a name that might have to be used
                    # by a shorter one.  The alphabetic sorting prevents
                    # differences between releases
                    sort {  my $ext_a = $a->external_name;
                            return 1 if ! defined $ext_a;
                            my $ext_b = $b->external_name;
                            return -1 if ! defined $ext_b;
                            # But return the non-complement table before
                            # the complement one, as the latter is defined
                            # in terms of the former, and needs to have
                            # the information for the former available.
                            return 1 if $a->complement != 0;
                            return -1 if $b->complement != 0;
                            # Similarly, return a subservient table after
                            # a leader
                            return 1 if $a->leader != $a;
                            return -1 if $b->leader != $b;
                            my $cmp = length $ext_a <=> length $ext_b;
                            # Return result if lengths not equal
                            return $cmp if $cmp;
                            # Alphabetic if lengths equal
                            return $ext_a cmp $ext_b
                    } $property->tables
                )
    {
        # Here we have a table associated with a property.  It could be
        # the map table (done first for each property), or one of the
        # other tables.  Determine which type.
        my $is_property = $table->isa('Property');
        my $name = $table->name;
        my $complete_name = $table->complete_name;
        # See if should suppress the table if is empty, but warn if it
        # contains something.
        my $suppress_if_empty_warn_if_not
                = $why_suppress_if_empty_warn_if_not{$complete_name} || 0;
        # Calculate if this table should have any code points associated
        # with it or not.
        my $expected_empty =
            # $perl should be empty, as well as properties that we just
            # don't do anything with
            ($is_property
                && ($table == $perl
                    || grep { $complete_name eq $_ }
                                                @unimplemented_properties
                )
            )
            # Match tables in properties we skipped populating should be
            # empty
            || (! $is_property && ! $property->to_create_match_tables)
            # Tables and properties that are expected to have no code
            # points should be empty
            || $suppress_if_empty_warn_if_not
        ;
        # Set a boolean if this table is the complement of an empty binary
        # table
        my $is_complement_of_empty_binary =
            $type == $BINARY &&
            (($table == $property->table('Y')
                && $property->table('N')->is_empty)
            || ($table == $property->table('N')
                && $property->table('Y')->is_empty));
        if ($table->is_empty) {
            if ($suppress_if_empty_warn_if_not) {
                $table->set_fate($SUPPRESSED,
                                 $suppress_if_empty_warn_if_not);
            }
            # Suppress (by skipping them) expected empty tables.
            next TABLE if $expected_empty;
            # And setup to later output a warning for those that aren't
            # known to be allowed to be empty.  Don't do the warning if
            # this table is a child of another one to avoid duplicating
            # the warning that should come from the parent one.
            if (($table == $property || $table->parent == $table)
                && $table->fate != $SUPPRESSED
                && $table->fate != $MAP_PROXIED
                && ! grep { $complete_name =~ /^$_$/ }
                                                @tables_that_may_be_empty)
            {
                push @unhandled_properties, "$table";
            }
            # An empty table is just the complement of everything.
            $table->set_complement($Any) if $table != $property;
        }
        elsif ($expected_empty) {
            my $because = "";
            if ($suppress_if_empty_warn_if_not) {
                $because = " because $suppress_if_empty_warn_if_not";
            }
            Carp::my_carp("Not expecting property $table$because.  Generating file for it anyway.");
        }
        # Some tables should match everything
        my $expected_full =
            ($table->fate == $SUPPRESSED)
            ? 0
            : ($is_property)
              ? # All these types of map tables will be full because
                # they will have been populated with defaults
                ($type == $ENUM || $type == $FORCED_BINARY)
              : # A match table should match everything if its method
                # shows it should
                ($table->matches_all
                # The complement of an empty binary table will match
                # everything
                || $is_complement_of_empty_binary
                )
        ;
        my $count = $table->count;
        if ($expected_full) {
            if ($count != $MAX_UNICODE_CODEPOINTS) {
                Carp::my_carp("$table matches only "
                . clarify_number($count)
                . " Unicode code points but should match "
                . clarify_number($MAX_UNICODE_CODEPOINTS)
                . " (off by "
                .  clarify_number(abs($MAX_UNICODE_CODEPOINTS - $count))
                . ").  Proceeding anyway.");
            }
            # Here is expected to be full.  If it is because it is the
            # complement of an (empty) binary table that is to be
            # suppressed, then suppress this one as well.
            if ($is_complement_of_empty_binary) {
                my $opposing_name = ($name eq 'Y') ? 'N' : 'Y';
                my $opposing = $property->table($opposing_name);
                my $opposing_status = $opposing->status;
                if ($opposing_status) {
                    $table->set_status($opposing_status,
                                       $opposing->status_info);
                }
            }
        }
        elsif ($count == $MAX_UNICODE_CODEPOINTS
               && ($table == $property || $table->leader == $table)
               && $table->property->status ne $PLACEHOLDER)
        {
                Carp::my_carp("$table unexpectedly matches all Unicode code points.  Proceeding anyway.");
        }
        if ($table->fate >= $SUPPRESSED) {
            if (! $is_property) {
                my @children = $table->children;
                foreach my $child (@children) {
                    if ($child->fate < $SUPPRESSED) {
                        Carp::my_carp_bug("'$table' is suppressed and has a child '$child' which isn't");
                    }
                }
            }
            next TABLE;
        }
        if (! $is_property) {
            make_ucd_table_pod_entries($table) if $table->property == $perl;
            # Several things need to be done just once for each related
            # group of match tables.  Do them on the parent.
            if ($table->parent == $table) {
                # Add an entry in the pod file for the table; it also does
                # the children.
                make_re_pod_entries($table) if defined $pod_directory;
                # See if the the table matches identical code points with
                # something that has already been output.  In that case,
                # no need to have two files with the same code points in
                # them.  We use the table's hash() method to store these
                # in buckets, so that it is quite likely that if two
                # tables are in the same bucket they will be identical, so
                # don't have to compare tables frequently.  The tables
                # have to have the same status to share a file, so add
                # this to the bucket hash.  (The reason for this latter is
                # that Heavy.pl associates a status with a file.)
                # We don't check tables that are inverses of others, as it
                # would lead to some coding complications, and checking
                # all the regular ones should find everything.
                if ($table->complement == 0) {
                    my $hash = $table->hash . ';' . $table->status;
                    # Look at each table that is in the same bucket as
                    # this one would be.
                    foreach my $comparison
                                        (@{$match_tables_to_write{$hash}})
                    {
                        if ($table->matches_identically_to($comparison)) {
                            $table->set_equivalent_to($comparison,
                                                            Related => 0);
                            next TABLE;
                        }
                    }
                    # Here, not equivalent, add this table to the bucket.
                    push @{$match_tables_to_write{$hash}}, $table;
                }
            }
        }
        else {
            # Here is the property itself.
            # Don't write out or make references to the $perl property
            next if $table == $perl;
            make_ucd_table_pod_entries($table);
            # There is a mapping stored of the various synonyms to the
            # standardized name of the property for utf8_heavy.pl.
            # Also, the pod file contains entries of the form:
            # \p{alias: *}         \p{full: *}
            # rather than show every possible combination of things.
            my @property_aliases = $property->aliases;
            my $full_property_name = $property->full_name;
            my $property_name = $property->name;
            my $standard_property_name = standardize($property_name);
            my $standard_property_full_name
                                    = standardize($full_property_name);
            # We also create for Unicode::UCD a list of aliases for
            # the property.  The list starts with the property name;
            # then its full name.
            my @property_list;
            my @standard_list;
            if ( $property->fate <= $MAP_PROXIED) {
                @property_list = ($property_name, $full_property_name);
                @standard_list = ($standard_property_name,
                                    $standard_property_full_name);
            }
            # For each synonym ...
            for my $i (0 .. @property_aliases - 1)  {
                my $alias = $property_aliases[$i];
                my $alias_name = $alias->name;
                my $alias_standard = standardize($alias_name);
                # Add other aliases to the list of property aliases
                if ($property->fate <= $MAP_PROXIED
                    && ! grep { $alias_standard eq $_ } @standard_list)
                {
                    push @property_list, $alias_name;
                    push @standard_list, $alias_standard;
                }
                # For utf8_heavy, set the mapping of the alias to the
                # property
                if ($type == $STRING) {
                    if ($property->fate <= $MAP_PROXIED) {
                        $string_property_loose_to_name{$alias_standard}
                                        = $standard_property_name;
                    }
                }
                else {
                    if (exists ($loose_property_name_of{$alias_standard}))
                    {
                        Carp::my_carp("There already is a property with the same standard name as $alias_name: $loose_property_name_of{$alias_standard}.  Old name is retained");
                    }
                    else {
                        $loose_property_name_of{$alias_standard}
                                            = $standard_property_name;
                    }
                    # Now for the re pod entry for this alias.  Skip if not
                    # outputting a pod; skip the first one, which is the
                    # full name so won't have an entry like: '\p{full: *}
                    # \p{full: *}', and skip if don't want an entry for
                    # this one.
                    next if $i == 0
                            || ! defined $pod_directory
                            || ! $alias->make_re_pod_entry;
                    my $rhs = "\\p{$full_property_name: *}";
                    if ($property != $perl && $table->perl_extension) {
                        $rhs .= ' (Perl extension)';
                    }
                    push @match_properties,
                        format_pod_line($indent_info_column,
                                    '\p{' . $alias->name . ': *}',
                                    $rhs,
                                    $alias->status);
                }
            }
            # The list of all possible names is attached to each alias, so
            # lookup is easy
            if (@property_list) {
                push @{$prop_aliases{$standard_list[0]}}, @property_list;
            }
            if ($property->fate <= $MAP_PROXIED) {
                # Similarly, we create for Unicode::UCD a list of
                # property-value aliases.
                my $property_full_name = $property->full_name;
                # Look at each table in the property...
                foreach my $table ($property->tables) {
                    my @values_list;
                    my $table_full_name = $table->full_name;
                    my $standard_table_full_name
                                          = standardize($table_full_name);
                    my $table_name = $table->name;
                    my $standard_table_name = standardize($table_name);
                    # The list starts with the table name and its full
                    # name.
                    push @values_list, $table_name, $table_full_name;
                    # We add to the table each unique alias that isn't
                    # discouraged from use.
                    foreach my $alias ($table->aliases) {
                        next if $alias->status
                             && $alias->status eq $DISCOURAGED;
                        my $name = $alias->name;
                        my $standard = standardize($name);
                        next if $standard eq $standard_table_name;
                        next if $standard eq $standard_table_full_name;
                        push @values_list, $name;
                    }
                    # Here @values_list is a list of all the aliases for
                    # the table.  That is, all the property-values given
                    # by this table.  By agreement with Unicode::UCD,
                    # if the name and full name are identical, and there
                    # are no other names, drop the duplcate entry to save
                    # memory.
                    if (@values_list == 2
                        && $values_list[0] eq $values_list[1])
                    {
                        pop @values_list
                    }
                    # To save memory, unlike the similar list for property
                    # aliases above, only the standard forms hve the list.
                    # This forces an extra step of converting from input
                    # name to standard name, but the savings are
                    # considerable.  (There is only marginal savings if we
                    # did this with the property aliases.)
                    push @{$prop_value_aliases{$standard_property_name}{$standard_table_name}}, @values_list;
                }
            }
            # Don't write out a mapping file if not desired.
            next if ! $property->to_output_map;
        }
        # Here, we know we want to write out the table, but don't do it
        # yet because there may be other tables that come along and will
        # want to share the file, and the file's comments will change to
        # mention them.  So save for later.
        push @writables, $table;
    } # End of looping through the property and all its tables.
} # End of looping through all properties.
# Now have all the tables that will have files written for them.  Do it.
foreach my $table (@writables) {
    my @directory;
    my $filename;
    my $property = $table->property;
    my $is_property = ($table == $property);
    if (! $is_property) {
        # Match tables for the property go in lib/$subdirectory, which is
        # the property's name.  Don't use the standard file name for this,
        # as may get an unfamiliar alias
        @directory = ($matches_directory, $property->external_name);
    }
    else {
        @directory = $table->directory;
        $filename = $table->file;
    }
    # Use specified filename if available, or default to property's
    # shortest name.  We need an 8.3 safe filename (which means "an 8
    # safe" filename, since after the dot is only 'pl', which is < 3)
    # The 2nd parameter is if the filename shouldn't be changed, and
    # it shouldn't iff there is a hard-coded name for this table.
    $filename = construct_filename(
                            $filename || $table->external_name,
                            ! $filename,    # mutable if no filename
                            \@directory);
    register_file_for_name($table, \@directory, $filename);
    # Only need to write one file when shared by more than one
    # property
    next if ! $is_property
            && ($table->leader != $table || $table->complement != 0);
    # Construct a nice comment to add to the file
    $table->set_final_comment;
    $table->write;
}
# Write out the pod file
make_pod;
# And Heavy.pl, Name.pm, UCD.pl
make_Heavy;
make_Name_pm;
make_UCD;
make_property_test_script() if $make_test_script;
make_normalization_test_script() if $make_norm_test_script;
return;
}my @white_space_separators = ( # This used only for making the test script. "", ' ', "\t", ' ' );
sub generate_separator($) { # This used only for making the test script. It generates the colon or # equal separator between the property and property value, with random # white space surrounding the separator
my $lhs = shift;
return "" if $lhs eq "";  # No separator if there's only one (the r) side
# Choose space before and after randomly
my $spaces_before =$white_space_separators[rand(@white_space_separators)];
my $spaces_after = $white_space_separators[rand(@white_space_separators)];
# And return the whole complex, half the time using a colon, half the
# equals
return $spaces_before
        . (rand() < 0.5) ? '=' : ':'
        . $spaces_after;
}sub generate_tests($$$$$) { # This used only for making the test script. It generates test cases that # are expected to compile successfully in perl. Note that the lhs and # rhs are assumed to already be as randomized as the caller wants.
    my $lhs = shift;           # The property: what's to the left of the colon
                               #  or equals separator
    my $rhs = shift;           # The property value; what's to the right
    my $valid_code = shift;    # A code point that's known to be in the
                               # table given by lhs=rhs; undef if table is
                               # empty
    my $invalid_code = shift;  # A code point known to not be in the table;
                               # undef if the table is all code points
    my $warning = shift;
    # Get the colon or equal
    my $separator = generate_separator($lhs);
    # The whole 'property=value'
    my $name = "$lhs$separator$rhs";
    my @output;
    # Create a complete set of tests, with complements.
    if (defined $valid_code) {
	push @output, <<"EOC"
Expect(1, $valid_code, '\\p{$name}', $warning);
Expect(0, $valid_code, '\\p{^$name}', $warning);
Expect(0, $valid_code, '\\P{$name}', $warning);
Expect(1, $valid_code, '\\P{^$name}', $warning);
EOC
    }
    if (defined $invalid_code) {
	push @output, <<"EOC"
Expect(0, $invalid_code, '\\p{$name}', $warning);
Expect(1, $invalid_code, '\\p{^$name}', $warning);
Expect(1, $invalid_code, '\\P{$name}', $warning);
Expect(0, $invalid_code, '\\P{^$name}', $warning);
EOC
    }
    return @output;
}sub generate_error($$$) { # This used only for making the test script. It generates test cases that # are expected to not only not match, but to be syntax or similar errors
my $lhs = shift;                # The property: what's to the left of the
                                # colon or equals separator
my $rhs = shift;                # The property value; what's to the right
my $already_in_error = shift;   # Boolean; if true it's known that the
                            # unmodified lhs and rhs will cause an error.
                            # This routine should not force another one
# Get the colon or equal
my $separator = generate_separator($lhs);
# Since this is an error only, don't bother to randomly decide whether to
# put the error on the left or right side; and assume that the rhs is
# loosely matched, again for convenience rather than rigor.
$rhs = randomize_loose_name($rhs, 'ERROR') unless $already_in_error;
my $property = $lhs . $separator . $rhs;
return <<"EOC";
Error('\\p{$property}');
Error('\\P{$property}');
EOC
}# These are used only for making the test script # XXX Maybe should also have a bad strict seps, which includes underscore.
my @good_loose_seps = ( " ", "-", "\t", "", "_", ); my @bad_loose_seps = ( "/a/", ':=', );
sub randomize_stricter_name { # This used only for making the test script. Take the input name and # return a randomized, but valid version of it under the stricter matching # rules.
my $name = shift;
Carp::carp_extra_args(\@_) if main::DEBUG && @_;
# If the name looks like a number (integer, floating, or rational), do
# some extra work
if ($name =~ qr{ ^ ( -? ) (\d+ ( ( [./] ) \d+ )? ) $ }x) {
    my $sign = $1;
    my $number = $2;
    my $separator = $3;
    # If there isn't a sign, part of the time add a plus
    # Note: Not testing having any denominator having a minus sign
    if (! $sign) {
        $sign = '+' if rand() <= .3;
    }
    # And add 0 or more leading zeros.
    $name = $sign . ('0' x int rand(10)) . $number;
    if (defined $separator) {
        my $extra_zeros = '0' x int rand(10);
        if ($separator eq '.') {
            # Similarly, add 0 or more trailing zeros after a decimal
            # point
            $name .= $extra_zeros;
        }
        else {
            # Or, leading zeros before the denominator
            $name =~ s,/,/$extra_zeros,;
        }
    }
}
# For legibility of the test, only change the case of whole sections at a
# time.  To do this, first split into sections.  The split returns the
# delimiters
my @sections;
for my $section (split / ( [ - + \s _ . ]+ ) /x, $name) {
    trace $section if main::DEBUG && $to_trace;
    if (length $section > 1 && $section !~ /\D/) {
        # If the section is a sequence of digits, about half the time
        # randomly add underscores between some of them.
        if (rand() > .5) {
            # Figure out how many underscores to add.  max is 1 less than
            # the number of digits.  (But add 1 at the end to make sure
            # result isn't 0, and compensate earlier by subtracting 2
            # instead of 1)
            my $num_underscores = int rand(length($section) - 2) + 1;
            # And add them evenly throughout, for convenience, not rigor
            use integer;
            my $spacing = (length($section) - 1)/ $num_underscores;
            my $temp = $section;
            $section = "";
            for my $i (1 .. $num_underscores) {
                $section .= substr($temp, 0, $spacing, "") . '_';
            }
            $section .= $temp;
        }
        push @sections, $section;
    }
    else {
        # Here not a sequence of digits.  Change the case of the section
        # randomly
        my $switch = int rand(4);
        if ($switch == 0) {
            push @sections, uc $section;
        }
        elsif ($switch == 1) {
            push @sections, lc $section;
        }
        elsif ($switch == 2) {
            push @sections, ucfirst $section;
        }
        else {
            push @sections, $section;
        }
    }
}
trace "returning", join "", @sections if main::DEBUG && $to_trace;
return join "", @sections;
}sub randomize_loose_name($;$) { # This used only for making the test script
my $name = shift;
my $want_error = shift;  # if true, make an error
Carp::carp_extra_args(\@_) if main::DEBUG && @_;
$name = randomize_stricter_name($name);
my @parts;
push @parts, $good_loose_seps[rand(@good_loose_seps)];
# Preserve trailing ones for the sake of not stripping the underscore from
# 'L_'
for my $part (split /[-\s_]+ (?= . )/, $name) {
    if (@parts) {
        if ($want_error and rand() < 0.3) {
            push @parts, $bad_loose_seps[rand(@bad_loose_seps)];
            $want_error = 0;
        }
        else {
            push @parts, $good_loose_seps[rand(@good_loose_seps)];
        }
    }
    push @parts, $part;
}
my $new = join("", @parts);
trace "$name => $new" if main::DEBUG && $to_trace;
if ($want_error) {
    if (rand() >= 0.5) {
        $new .= $bad_loose_seps[rand(@bad_loose_seps)];
    }
    else {
        $new = $bad_loose_seps[rand(@bad_loose_seps)] . $new;
    }
}
return $new;
}# Used to make sure don't generate duplicate test cases. my %test_generated;
sub make_property_test_script() { # This used only for making the test script # this written directly -- it's huge.
print "Making test script\n" if $verbosity >= $PROGRESS;
# This uses randomness to test different possibilities without testing all
# possibilities.  To ensure repeatability, set the seed to 0.  But if
# tests are added, it will perturb all later ones in the .t file
srand 0;
$t_path = 'TestProp.pl' unless defined $t_path; # the traditional name
# Keep going down an order of magnitude
# until find that adding this quantity to
# 1 remains 1; but put an upper limit on
# this so in case this algorithm doesn't
# work properly on some platform, that we
# won't loop forever.
my $digits = 0;
my $min_floating_slop = 1;
while (1+ $min_floating_slop != 1
        && $digits++ < 50)
{
    my $next = $min_floating_slop / 10;
    last if $next == 0; # If underflows,
                        # use previous one
    $min_floating_slop = $next;
}
# It doesn't matter whether the elements of this array contain single lines
# or multiple lines. main::write doesn't count the lines.
my @output;
# Sort these so get results in same order on different runs of this
# program
foreach my $property (sort { $a->name cmp $b->name } property_ref('*')) {
    foreach my $table (sort { $a->name cmp $b->name } $property->tables) {
        # Find code points that match, and don't match this table.
        my $valid = $table->get_valid_code_point;
        my $invalid = $table->get_invalid_code_point;
        my $warning = ($table->status eq $DEPRECATED)
                        ? "'deprecated'"
                        : '""';
        # Test each possible combination of the property's aliases with
        # the table's.  If this gets to be too many, could do what is done
        # in the set_final_comment() for Tables
        my @table_aliases = $table->aliases;
        my @property_aliases = $table->property->aliases;
        # Every property can be optionally be prefixed by 'Is_', so test
        # that those work, by creating such a new alias for each
        # pre-existing one.
        push @property_aliases, map { Alias->new("Is_" . $_->name,
                                                $_->loose_match,
                                                $_->make_re_pod_entry,
                                                $_->ok_as_filename,
                                                $_->status,
                                                $_->ucd,
                                                )
                                     } @property_aliases;
        my $max = max(scalar @table_aliases, scalar @property_aliases);
        for my $j (0 .. $max - 1) {
            # The current alias for property is the next one on the list,
            # or if beyond the end, start over.  Similarly for table
            my $property_name
                        = $property_aliases[$j % @property_aliases]->name;
            $property_name = "" if $table->property == $perl;
            my $table_alias = $table_aliases[$j % @table_aliases];
            my $table_name = $table_alias->name;
            my $loose_match = $table_alias->loose_match;
            # If the table doesn't have a file, any test for it is
            # already guaranteed to be in error
            my $already_error = ! $table->file_path;
            # Generate error cases for this alias.
            push @output, generate_error($property_name,
                                         $table_name,
                                         $already_error);
            # If the table is guaranteed to always generate an error,
            # quit now without generating success cases.
            next if $already_error;
            # Now for the success cases.
            my $random;
            if ($loose_match) {
                # For loose matching, create an extra test case for the
                # standard name.
                my $standard = standardize($table_name);
                # $test_name should be a unique combination for each test
                # case; used just to avoid duplicate tests
                my $test_name = "$property_name=$standard";
                # Don't output duplicate test cases.
                if (! exists $test_generated{$test_name}) {
                    $test_generated{$test_name} = 1;
                    push @output, generate_tests($property_name,
                                                 $standard,
                                                 $valid,
                                                 $invalid,
                                                 $warning,
                                             );
                }
                $random = randomize_loose_name($table_name)
            }
            else { # Stricter match
                $random = randomize_stricter_name($table_name);
            }
            # Now for the main test case for this alias.
            my $test_name = "$property_name=$random";
            if (! exists $test_generated{$test_name}) {
                $test_generated{$test_name} = 1;
                push @output, generate_tests($property_name,
                                             $random,
                                             $valid,
                                             $invalid,
                                             $warning,
                                         );
                # If the name is a rational number, add tests for the
                # floating point equivalent.
                if ($table_name =~ qr{/}) {
                    # Calculate the float, and find just the fraction.
                    my $float = eval $table_name;
                    my ($whole, $fraction)
                                        = $float =~ / (.*) \. (.*) /x;
                    # Starting with one digit after the decimal point,
                    # create a test for each possible precision (number of
                    # digits past the decimal point) until well beyond the
                    # native number found on this machine.  (If we started
                    # with 0 digits, it would be an integer, which could
                    # well match an unrelated table)
                    PLACE:
                    for my $i (1 .. $min_floating_slop + 3) {
                        my $table_name = sprintf("%.*f", $i, $float);
                        if ($i < $MIN_FRACTION_LENGTH) {
                            # If the test case has fewer digits than the
                            # minimum acceptable precision, it shouldn't
                            # succeed, so we expect an error for it.
                            # E.g., 2/3 = .7 at one decimal point, and we
                            # shouldn't say it matches .7.  We should make
                            # it be .667 at least before agreeing that the
                            # intent was to match 2/3.  But at the
                            # less-than- acceptable level of precision, it
                            # might actually match an unrelated number.
                            # So don't generate a test case if this
                            # conflating is possible.  In our example, we
                            # don't want 2/3 matching 7/10, if there is
                            # a 7/10 code point.
                            for my $existing
                                    (keys %nv_floating_to_rational)
                            {
                                next PLACE
                                    if abs($table_name - $existing)
                                            < $MAX_FLOATING_SLOP;
                            }
                            push @output, generate_error($property_name,
                                                         $table_name,
                                                         1   # 1 => already an error
                                          );
                        }
                        else {
                            # Here the number of digits exceeds the
                            # minimum we think is needed.  So generate a
                            # success test case for it.
                            push @output, generate_tests($property_name,
                                                         $table_name,
                                                         $valid,
                                                         $invalid,
                                                         $warning,
                                         );
                        }
                    }
                }
            }
        }
    }
}
&write($t_path,
       0,           # Not utf8;
       [<DATA>,
        @output,
        (map {"Test_X('$_');\n"} @backslash_X_tests),
        "Finished();\n"]);
return;
}sub make_normalization_test_script() { print "Making normalization test script\n" if $verbosity >= $PROGRESS;
my $n_path = 'TestNorm.pl';
unshift @normalization_tests, <<'END';
use utf8;
use Test::More;sub ord_string { # Convert packed ords to printable string use charnames (); return "'" . join("", map { '\N{' . charnames::viacode($_) . '}' } unpack "U*", shift) . "'"; #return "'" . join(" ", map { sprintf "%04X", $_ } unpack "U*", shift) . "'"; }
sub Test_N { my ($source, $nfc, $nfd, $nfkc, $nfkd) = @_; my $display_source = ord_string($source); my $display_nfc = ord_string($nfc); my $display_nfd = ord_string($nfd); my $display_nfkc = ord_string($nfkc); my $display_nfkd = ord_string($nfkd);
use Unicode::Normalize;
#    NFC
#      nfc ==  toNFC(source) ==  toNFC(nfc) ==  toNFC(nfd)
#      nfkc ==  toNFC(nfkc) ==  toNFC(nfkd)
#
#    NFD
#      nfd ==  toNFD(source) ==  toNFD(nfc) ==  toNFD(nfd)
#      nfkd ==  toNFD(nfkc) ==  toNFD(nfkd)
#
#    NFKC
#      nfkc == toNFKC(source) == toNFKC(nfc) == toNFKC(nfd) ==
#      toNFKC(nfkc) == toNFKC(nfkd)
#
#    NFKD
#      nfkd == toNFKD(source) == toNFKD(nfc) == toNFKD(nfd) ==
#      toNFKD(nfkc) == toNFKD(nfkd)
is(NFC($source), $nfc, "NFC($display_source) eq $display_nfc");
is(NFC($nfc), $nfc, "NFC($display_nfc) eq $display_nfc");
is(NFC($nfd), $nfc, "NFC($display_nfd) eq $display_nfc");
is(NFC($nfkc), $nfkc, "NFC($display_nfkc) eq $display_nfkc");
is(NFC($nfkd), $nfkc, "NFC($display_nfkd) eq $display_nfkc");
is(NFD($source), $nfd, "NFD($display_source) eq $display_nfd");
is(NFD($nfc), $nfd, "NFD($display_nfc) eq $display_nfd");
is(NFD($nfd), $nfd, "NFD($display_nfd) eq $display_nfd");
is(NFD($nfkc), $nfkd, "NFD($display_nfkc) eq $display_nfkd");
is(NFD($nfkd), $nfkd, "NFD($display_nfkd) eq $display_nfkd");
is(NFKC($source), $nfkc, "NFKC($display_source) eq $display_nfkc");
is(NFKC($nfc), $nfkc, "NFKC($display_nfc) eq $display_nfkc");
is(NFKC($nfd), $nfkc, "NFKC($display_nfd) eq $display_nfkc");
is(NFKC($nfkc), $nfkc, "NFKC($display_nfkc) eq $display_nfkc");
is(NFKC($nfkd), $nfkc, "NFKC($display_nfkd) eq $display_nfkc");
is(NFKD($source), $nfkd, "NFKD($display_source) eq $display_nfkd");
is(NFKD($nfc), $nfkd, "NFKD($display_nfc) eq $display_nfkd");
is(NFKD($nfd), $nfkd, "NFKD($display_nfd) eq $display_nfkd");
is(NFKD($nfkc), $nfkd, "NFKD($display_nfkc) eq $display_nfkd");
is(NFKD($nfkd), $nfkd, "NFKD($display_nfkd) eq $display_nfkd");
}
END
&write($n_path,
       1,           # Is utf8;
       [
        @normalization_tests,
        'done_testing();'
        ]);
return;
}# This is a list of the input files and how to handle them. The files are # processed in their order in this list. Some reordering is possible if # desired, but the v0 files should be first, and the extracted before the # others except DAge.txt (as data in an extracted file can be over-ridden by # the non-extracted. Some other files depend on data derived from an earlier # file, like UnicodeData requires data from Jamo, and the case changing and # folding requires data from Unicode. Mostly, it is safest to order by first # version releases in (except the Jamo). DAge.txt is read before the # extracted ones because of the rarely used feature $compare_versions. In the # unlikely event that there were ever an extracted file that contained the Age # property information, it would have to go in front of DAge. # # The version strings allow the program to know whether to expect a file or # not, but if a file exists in the directory, it will be processed, even if it # is in a version earlier than expected, so you can copy files from a later # release into an earlier release's directory. my @input_file_objects = ( Input_file->new('PropertyAliases.txt', v0, Handler => \&process_PropertyAliases, ), Input_file->new(undef, v0, # No file associated with this Progress_Message => 'Finishing property setup', Handler => \&finish_property_setup, ), Input_file->new('PropValueAliases.txt', v0, Handler => \&process_PropValueAliases, Has_Missings_Defaults => $NOT_IGNORED, ), Input_file->new('DAge.txt', v3.2.0, Has_Missings_Defaults => $NOT_IGNORED, Property => 'Age' ), Input_file->new("${EXTRACTED}DGeneralCategory.txt", v3.1.0, Property => 'General_Category', ), Input_file->new("${EXTRACTED}DCombiningClass.txt", v3.1.0, Property => 'Canonical_Combining_Class', Has_Missings_Defaults => $NOT_IGNORED, ), Input_file->new("${EXTRACTED}DNumType.txt", v3.1.0, Property => 'Numeric_Type', Has_Missings_Defaults => $NOT_IGNORED, ), Input_file->new("${EXTRACTED}DEastAsianWidth.txt", v3.1.0, Property => 'East_Asian_Width', Has_Missings_Defaults => $NOT_IGNORED, ), Input_file->new("${EXTRACTED}DLineBreak.txt", v3.1.0, Property => 'Line_Break', Has_Missings_Defaults => $NOT_IGNORED, ), Input_file->new("${EXTRACTED}DBidiClass.txt", v3.1.1, Property => 'Bidi_Class', Has_Missings_Defaults => $NOT_IGNORED, ), Input_file->new("${EXTRACTED}DDecompositionType.txt", v3.1.0, Property => 'Decomposition_Type', Has_Missings_Defaults => $NOT_IGNORED, ), Input_file->new("${EXTRACTED}DBinaryProperties.txt", v3.1.0), Input_file->new("${EXTRACTED}DNumValues.txt", v3.1.0, Property => 'Numeric_Value', Each_Line_Handler => \&filter_numeric_value_line, Has_Missings_Defaults => $NOT_IGNORED, ), Input_file->new("${EXTRACTED}DJoinGroup.txt", v3.1.0, Property => 'Joining_Group', Has_Missings_Defaults => $NOT_IGNORED, ),
Input_file->new("${EXTRACTED}DJoinType.txt", v3.1.0,
                Property => 'Joining_Type',
                Has_Missings_Defaults => $NOT_IGNORED,
                ),
Input_file->new('Jamo.txt', v2.0.0,
                Property => 'Jamo_Short_Name',
                Each_Line_Handler => \&filter_jamo_line,
                ),
Input_file->new('UnicodeData.txt', v1.1.5,
                Pre_Handler => \&setup_UnicodeData,
                # We clean up this file for some early versions.
                Each_Line_Handler => [ (($v_version lt v2.0.0 )
                                        ? \&filter_v1_ucd
                                        : ($v_version eq v2.1.5)
                                            ? \&filter_v2_1_5_ucd
                                            # And for 5.14 Perls with 6.0,
                                            # have to also make changes
                                            : ($v_version ge v6.0.0
                                               && $^V lt v5.17.0)
                                                ? \&filter_v6_ucd
                                                : undef),
                                        # Early versions did not have the
                                        # proper Unicode_1 names for the
                                        # controls
                                        (($v_version lt v3.0.0)
                                        ? \&filter_early_U1_names
                                        : undef),
                                        # Early versions did not correctly
                                        # use the later method for giving
                                        # decimal digit values
                                        (($v_version le v3.2.0)
                                        ? \&filter_bad_Nd_ucd
                                        : undef),
                                        # And the main filter
                                        \&filter_UnicodeData_line,
                                     ],
                EOF_Handler => \&EOF_UnicodeData,
                ),
Input_file->new('ArabicShaping.txt', v2.0.0,
                Each_Line_Handler =>
                    [ ($v_version lt 4.1.0)
                                ? \&filter_old_style_arabic_shaping
                                : undef,
                    \&filter_arabic_shaping_line,
                    ],
                Has_Missings_Defaults => $NOT_IGNORED,
                ),
Input_file->new('Blocks.txt', v2.0.0,
                Property => 'Block',
                Has_Missings_Defaults => $NOT_IGNORED,
                Each_Line_Handler => \&filter_blocks_lines
                ),
Input_file->new('PropList.txt', v2.0.0,
                Each_Line_Handler => (($v_version lt v3.1.0)
                                        ? \&filter_old_style_proplist
                                        : undef),
                ),
Input_file->new('Unihan.txt', v2.0.0,
                Pre_Handler => \&setup_unihan,
                Optional => 1,
                Each_Line_Handler => \&filter_unihan_line,
                    ),
Input_file->new('SpecialCasing.txt', v2.1.8,
                Each_Line_Handler => ($v_version eq 2.1.8)
                                     ? \&filter_2_1_8_special_casing_line
                                     : \&filter_special_casing_line,
                Pre_Handler => \&setup_special_casing,
                Has_Missings_Defaults => $IGNORED,
                ),
Input_file->new(
                'LineBreak.txt', v3.0.0,
                Has_Missings_Defaults => $NOT_IGNORED,
                Property => 'Line_Break',
                # Early versions had problematic syntax
                Each_Line_Handler => (($v_version lt v3.1.0)
                                    ? \&filter_early_ea_lb
                                    : undef),
                ),
Input_file->new('EastAsianWidth.txt', v3.0.0,
                Property => 'East_Asian_Width',
                Has_Missings_Defaults => $NOT_IGNORED,
                # Early versions had problematic syntax
                Each_Line_Handler => (($v_version lt v3.1.0)
                                    ? \&filter_early_ea_lb
                                    : undef),
                ),
Input_file->new('CompositionExclusions.txt', v3.0.0,
                Property => 'Composition_Exclusion',
                ),
Input_file->new('BidiMirroring.txt', v3.0.1,
                Property => 'Bidi_Mirroring_Glyph',
                Has_Missings_Defaults => ($v_version lt v6.2.0)
                                          ? $NO_DEFAULTS
                                          # Is <none> which doesn't mean
                                          # anything to us, we will use the
                                          # null string
                                          : $IGNORED,
                ),
Input_file->new("NormTest.txt", v3.0.0,
                 Handler => \&process_NormalizationsTest,
                 Skip => ($make_norm_test_script) ? 0 : 'Validation Tests',
                ),
Input_file->new('CaseFolding.txt', v3.0.1,
                Pre_Handler => \&setup_case_folding,
                Each_Line_Handler =>
                    [ ($v_version lt v3.1.0)
                             ? \&filter_old_style_case_folding
                             : undef,
                       \&filter_case_folding_line
                    ],
                Has_Missings_Defaults => $IGNORED,
                ),
Input_file->new('DCoreProperties.txt', v3.1.0,
                # 5.2 changed this file
                Has_Missings_Defaults => (($v_version ge v5.2.0)
                                        ? $NOT_IGNORED
                                        : $NO_DEFAULTS),
                ),
Input_file->new('Scripts.txt', v3.1.0,
                Property => 'Script',
                Has_Missings_Defaults => $NOT_IGNORED,
                ),
Input_file->new('DNormalizationProps.txt', v3.1.0,
                Has_Missings_Defaults => $NOT_IGNORED,
                Each_Line_Handler => (($v_version lt v4.0.1)
                                  ? \&filter_old_style_normalization_lines
                                  : undef),
                ),
Input_file->new('HangulSyllableType.txt', v0,
                Has_Missings_Defaults => $NOT_IGNORED,
                Property => 'Hangul_Syllable_Type',
                Pre_Handler => ($v_version lt v4.0.0)
                               ? \&generate_hst
                               : undef,
                ),
Input_file->new("$AUXILIARY/WordBreakProperty.txt", v4.1.0,
                Property => 'Word_Break',
                Has_Missings_Defaults => $NOT_IGNORED,
                ),
Input_file->new("$AUXILIARY/GraphemeBreakProperty.txt", v0,
                Property => 'Grapheme_Cluster_Break',
                Has_Missings_Defaults => $NOT_IGNORED,
                Pre_Handler => ($v_version lt v4.1.0)
                               ? \&generate_GCB
                               : undef,
                ),
Input_file->new("$AUXILIARY/GCBTest.txt", v4.1.0,
                Handler => \&process_GCB_test,
                ),
Input_file->new("$AUXILIARY/LBTest.txt", v4.1.0,
                Skip => 'Validation Tests',
                ),
Input_file->new("$AUXILIARY/SBTest.txt", v4.1.0,
                Skip => 'Validation Tests',
                ),
Input_file->new("$AUXILIARY/WBTest.txt", v4.1.0,
                Skip => 'Validation Tests',
                ),
Input_file->new("$AUXILIARY/SentenceBreakProperty.txt", v4.1.0,
                Property => 'Sentence_Break',
                Has_Missings_Defaults => $NOT_IGNORED,
                ),
Input_file->new('NamedSequences.txt', v4.1.0,
                Handler => \&process_NamedSequences
                ),
Input_file->new('NameAliases.txt', v0,
                Property => 'Name_Alias',
                Pre_Handler => ($v_version le v6.0.0)
                               ? \&setup_early_name_alias
                               : undef,
                Each_Line_Handler => ($v_version le v6.0.0)
                               ? \&filter_early_version_name_alias_line
                               : \&filter_later_version_name_alias_line,
                ),
Input_file->new("BidiTest.txt", v5.2.0,
                Skip => 'Validation Tests',
                ),
Input_file->new('UnihanIndicesDictionary.txt', v5.2.0,
                Optional => 1,
                Each_Line_Handler => \&filter_unihan_line,
                ),
Input_file->new('UnihanDataDictionaryLike.txt', v5.2.0,
                Optional => 1,
                Each_Line_Handler => \&filter_unihan_line,
                ),
Input_file->new('UnihanIRGSources.txt', v5.2.0,
                Optional => 1,
                Pre_Handler => \&setup_unihan,
                Each_Line_Handler => \&filter_unihan_line,
                ),
Input_file->new('UnihanNumericValues.txt', v5.2.0,
                Optional => 1,
                Each_Line_Handler => \&filter_unihan_line,
                ),
Input_file->new('UnihanOtherMappings.txt', v5.2.0,
                Optional => 1,
                Each_Line_Handler => \&filter_unihan_line,
                ),
Input_file->new('UnihanRadicalStrokeCounts.txt', v5.2.0,
                Optional => 1,
                Each_Line_Handler => \&filter_unihan_line,
                ),
Input_file->new('UnihanReadings.txt', v5.2.0,
                Optional => 1,
                Each_Line_Handler => \&filter_unihan_line,
                ),
Input_file->new('UnihanVariants.txt', v5.2.0,
                Optional => 1,
                Each_Line_Handler => \&filter_unihan_line,
                ),
Input_file->new('ScriptExtensions.txt', v6.0.0,
                Property => 'Script_Extensions',
                Pre_Handler => \&setup_script_extensions,
                Each_Line_Handler => \&filter_script_extensions_line,
                Has_Missings_Defaults => (($v_version le v6.0.0)
                                        ? $NO_DEFAULTS
                                        : $IGNORED),
                ),
# The two Indic files are actually available starting in v6.0.0, but their
# property values are missing from PropValueAliases.txt in that release,
# so that further work would have to be done to get them to work properly
# for that release.
Input_file->new('IndicMatraCategory.txt', v6.1.0,
                Property => 'Indic_Matra_Category',
                Has_Missings_Defaults => $NOT_IGNORED,
                Skip => "Provisional; for the analysis and processing of Indic scripts",
                ),
Input_file->new('IndicSyllabicCategory.txt', v6.1.0,
                Property => 'Indic_Syllabic_Category',
                Has_Missings_Defaults => $NOT_IGNORED,
                Skip => "Provisional; for the analysis and processing of Indic scripts",
                ),
);# End of all the preliminaries. # Do it...
if ($compare_versions) { Carp::my_carp(<<END Warning. \$compare_versions is set. Output is not suitable for production END ); }
# Put into %potential_files a list of all the files in the directory structure # that could be inputs to this program, excluding those that we should ignore. # Use absolute file names because it makes it easier across machine types. my @ignored_files_full_names = map { File::Spec->rel2abs( internal_file_to_platform($_)) } keys %ignored_files; File::Find::find({ wanted=>sub { return unless /\.txt$/i; # Some platforms change the name's case my $full = lc(File::Spec->rel2abs($_)); $potential_files{$full} = 1 if ! grep { $full eq lc($_) } @ignored_files_full_names; return; } }, File::Spec->curdir());
my @mktables_list_output_files; my $old_start_time = 0;
if (! -e $file_list) { print "'$file_list' doesn't exist, so forcing rebuild.\n" if $verbosity >= $VERBOSE; $write_unchanged_files = 1; } elsif ($write_unchanged_files) { print "Not checking file list '$file_list'.\n" if $verbosity >= $VERBOSE; } else { print "Reading file list '$file_list'\n" if $verbosity >= $VERBOSE; my $file_handle; if (! open $file_handle, "<", $file_list) { Carp::my_carp("Failed to open '$file_list'; turning on -globlist option instead: $!"); $glob_list = 1; } else { my @input;
    # Read and parse mktables.lst, placing the results from the first part
    # into @input, and the second part into @mktables_list_output_files
    for my $list ( \@input, \@mktables_list_output_files ) {
        while (<$file_handle>) {
            s/^ \s+ | \s+ $//xg;
            if (/^ \s* \# .* Autogenerated\ starting\ on\ (\d+)/x) {
                $old_start_time = $1;
            }
            next if /^ \s* (?: \# .* )? $/x;
            last if /^ =+ $/x;
            my ( $file ) = split /\t/;
            push @$list, $file;
        }
        @$list = uniques(@$list);
        next;
    }
    # Look through all the input files
    foreach my $input (@input) {
        next if $input eq 'version'; # Already have checked this.
        # Ignore if doesn't exist.  The checking about whether we care or
        # not is done via the Input_file object.
        next if ! file_exists($input);
        # The paths are stored with relative names, and with '/' as the
        # delimiter; convert to absolute on this machine
        my $full = lc(File::Spec->rel2abs(internal_file_to_platform($input)));
        $potential_files{lc $full} = 1
            if ! grep { lc($full) eq lc($_) } @ignored_files_full_names;
    }
}
close $file_handle;
}if ($glob_list) {
# Here wants to process all .txt files in the directory structure.
# Convert them to full path names.  They are stored in the platform's
# relative style
my @known_files;
foreach my $object (@input_file_objects) {
    my $file = $object->file;
    next unless defined $file;
    push @known_files, File::Spec->rel2abs($file);
}
my @unknown_input_files;
foreach my $file (keys %potential_files) {  # The keys are stored in lc
    next if grep { $file eq lc($_) } @known_files;
    # Here, the file is unknown to us.  Get relative path name
    $file = File::Spec->abs2rel($file);
    push @unknown_input_files, $file;
    # What will happen is we create a data structure for it, and add it to
    # the list of input files to process.  First get the subdirectories
    # into an array
    my (undef, $directories, undef) = File::Spec->splitpath($file);
    $directories =~ s;/$;;;     # Can have extraneous trailing '/'
    my @directories = File::Spec->splitdir($directories);
    # If the file isn't extracted (meaning none of the directories is the
    # extracted one), just add it to the end of the list of inputs.
    if (! grep { $EXTRACTED_DIR eq $_ } @directories) {
        push @input_file_objects, Input_file->new($file, v0);
    }
    else {
        # Here, the file is extracted.  It needs to go ahead of most other
        # processing.  Search for the first input file that isn't a
        # special required property (that is, find one whose first_release
        # is non-0), and isn't extracted.  Also, the Age property file is
        # processed before the extracted ones, just in case
        # $compare_versions is set.
        for (my $i = 0; $i < @input_file_objects; $i++) {
            if ($input_file_objects[$i]->first_released ne v0
                && lc($input_file_objects[$i]->file) ne 'dage.txt'
                && $input_file_objects[$i]->file !~ /$EXTRACTED_DIR/i)
            {
                splice @input_file_objects, $i, 0,
                                            Input_file->new($file, v0);
                last;
            }
        }
    }
}
if (@unknown_input_files) {
    print STDERR simple_fold(join_lines(<<ENDThe following files are unknown as to how to handle. Assuming they are typical property files. You'll know by later error messages if it worked or not: END ) . " " . join(", ", @unknown_input_files) . "\n\n"); } } # End of looking through directory structure for more .txt files.
# Create the list of input files from the objects we have defined, plus # version my @input_files = qw(version Makefile); foreach my $object (@input_file_objects) { my $file = $object->file; next if ! defined $file; # Not all objects have files next if $object->optional && ! -e $file; push @input_files, $file; }
if ( $verbosity >= $VERBOSE ) { print "Expecting ".scalar( @input_files )." input files. ", "Checking ".scalar( @mktables_list_output_files )." output files.\n"; }
# We set $most_recent to be the most recently changed input file, including # this program itself (done much earlier in this file) foreach my $in (@input_files) { next unless -e $in; # Keep going even if missing a file my $mod_time = (stat $in)[9]; $most_recent = $mod_time if $mod_time > $most_recent;
# See that the input files have distinct names, to warn someone if they
# are adding a new one
if ($make_list) {
    my ($volume, $directories, $file ) = File::Spec->splitpath($in);
    $directories =~ s;/$;;;     # Can have extraneous trailing '/'
    my @directories = File::Spec->splitdir($directories);
    my $base = $file =~ s/\.txt$//;
    construct_filename($file, 'mutable', \@directories);
}
}# We use 'Makefile' just to see if it has changed since the last time we # rebuilt. Now discard it. @input_files = grep { $_ ne 'Makefile' } @input_files;
my $rebuild = $write_unchanged_files # Rebuild: if unconditional rebuild || ! scalar @mktables_list_output_files # or if no outputs known || $old_start_time < $most_recent; # or out-of-date
# Now we check to see if any output files are older than youngest, if # they are, we need to continue on, otherwise we can presumably bail. if (! $rebuild) { foreach my $out (@mktables_list_output_files) { if ( ! file_exists($out)) { print "'$out' is missing.\n" if $verbosity >= $VERBOSE; $rebuild = 1; last; } #local $to_trace = 1 if main::DEBUG; trace $most_recent, (stat $out)[9] if main::DEBUG && $to_trace; if ( (stat $out)[9] <= $most_recent ) { #trace "$out: most recent mod time: ", (stat $out)[9], ", youngest: $most_recent\n" if main::DEBUG && $to_trace; print "'$out' is too old.\n" if $verbosity >= $VERBOSE; $rebuild = 1; last; } } } if (! $rebuild) { print "Files seem to be ok, not bothering to rebuild. Add '-w' option to force build\n"; exit(0); } print "Must rebuild tables.\n" if $verbosity >= $VERBOSE;
# Ready to do the major processing. First create the perl pseudo-property. $perl = Property->new('perl', Type => $NON_STRING, Perl_Extension => 1);
# Process each input file foreach my $file (@input_file_objects) { $file->run; }
# Finish the table generation.
print "Finishing processing Unicode properties\n" if $verbosity >= $PROGRESS; finish_Unicode();
print "Compiling Perl properties\n" if $verbosity >= $PROGRESS; compile_perl();
print "Creating Perl synonyms\n" if $verbosity >= $PROGRESS; add_perl_synonyms();
print "Writing tables\n" if $verbosity >= $PROGRESS; write_all_tables();
# Write mktables.lst if ( $file_list and $make_list ) {
print "Updating '$file_list'\n" if $verbosity >= $PROGRESS;
foreach my $file (@input_files, @files_actually_output) {
    my (undef, $directories, $file) = File::Spec->splitpath($file);
    my @directories = File::Spec->splitdir($directories);
    $file = join '/', @directories, $file;
}
my $ofh;
if (! open $ofh,">",$file_list) {
    Carp::my_carp("Can't write to '$file_list'.  Skipping: $!");
    return
}
else {
    my $localtime = localtime $start_time;
    print $ofh <<"END";
#
# $file_list -- File list for $0.
#
#   Autogenerated starting on $start_time ($localtime)
#
# - First section is input files
#   ($0 itself is not listed but is automatically considered an input)
# - Section separator is /^=+\$/
# - Second section is a list of output files.
# - Lines matching /^\\s*#/ are treated as comments
#   which along with blank lines are ignored.
## Input files:
END print $ofh "$_\n" for sort(@input_files); print $ofh "\n=================================\n# Output files:\n\n"; print $ofh "$_\n" for sort @files_actually_output; print $ofh "\n# ",scalar(@input_files)," input files\n", "# ",scalar(@files_actually_output)+1," output files\n\n", "# End list\n"; close $ofh or Carp::my_carp("Failed to close $ofh: $!");
    print "Filelist has ",scalar(@input_files)," input files and ",
        scalar(@files_actually_output)+1," output files\n"
        if $verbosity >= $VERBOSE;
}
}# Output these warnings unless -q explicitly specified. if ($verbosity >= $NORMAL_VERBOSITY && ! $debug_skip) { if (@unhandled_properties) { print "\nProperties and tables that unexpectedly have no code points\n"; foreach my $property (sort @unhandled_properties) { print $property, "\n"; } }
if (%potential_files) {
    print "\nInput files that are not considered:\n";
    foreach my $file (sort keys %potential_files) {
        print File::Spec->abs2rel($file), "\n";
    }
}
print "\nAll done\n" if $verbosity >= $VERBOSE;
}
exit(0);# TRAILING CODE IS USED BY make_property_test_script() __DATA__
use strict; use warnings;
# If run outside the normal test suite on an ASCII platform, you can # just create a latin1_to_native() function that just returns its # inputs, because that's the only function used from test.pl require "test.pl";
# Test qr/\X/ and the \p{} regular expression constructs. This file is # constructed by mktables from the tables it generates, so if mktables is # buggy, this won't necessarily catch those bugs. Tests are generated for all # feasible properties; a few aren't currently feasible; see # is_code_point_usable() in mktables for details.
# Standard test packages are not used because this manipulates SIG_WARN. It # exits 0 if every non-skipped test succeeded; -1 if any failed.
my $Tests = 0; my $Fails = 0;
sub Expect($$$$) { my $expected = shift; my $ord = shift; my $regex = shift; my $warning_type = shift; # Type of warning message, like 'deprecated' # or empty if none my $line = (caller)[2]; $ord = ord(latin1_to_native(chr($ord)));
# Convert the code point to hex form
my $string = sprintf "\"\\x{%04X}\"", $ord;
my @tests = "";
# The first time through, use all warnings.  If the input should generate
# a warning, add another time through with them turned off
push @tests, "no warnings '$warning_type';" if $warning_type;
foreach my $no_warnings (@tests) {
    # Store any warning messages instead of outputting them
    local $SIG{__WARN__} = $SIG{__WARN__};
    my $warning_message;
    $SIG{__WARN__} = sub { $warning_message = $_[0] };
    $Tests++;
    # A string eval is needed because of the 'no warnings'.
    # Assumes no parens in the regular expression
    my $result = eval "$no_warnings
                        my \$RegObj = qr($regex);
                        $string =~ \$RegObj ? 1 : 0";
    if (not defined $result) {
        print "not ok $Tests - couldn't compile /$regex/; line $line: $@\n";
        $Fails++;
    }
    elsif ($result ^ $expected) {
        print "not ok $Tests - expected $expected but got $result for $string =~ qr/$regex/; line $line\n";
        $Fails++;
    }
    elsif ($warning_message) {
        if (! $warning_type || ($warning_type && $no_warnings)) {
            print "not ok $Tests - for qr/$regex/ did not expect warning message '$warning_message'; line $line\n";
            $Fails++;
        }
        else {
            print "ok $Tests - expected and got a warning message for qr/$regex/; line $line\n";
        }
    }
    elsif ($warning_type && ! $no_warnings) {
        print "not ok $Tests - for qr/$regex/ expected a $warning_type warning message, but got none; line $line\n";
        $Fails++;
    }
    else {
        print "ok $Tests - got $result for $string =~ qr/$regex/; line $line\n";
    }
}
return;
}sub Error($) { my $regex = shift; $Tests++; if (eval { 'x' =~ qr/$regex/; 1 }) { $Fails++; my $line = (caller)[2]; print "not ok $Tests - re compiled ok, but expected error for qr/$regex/; line $line: $@\n"; } else { my $line = (caller)[2]; print "ok $Tests - got and expected error for qr/$regex/; line $line\n"; } return; }
# GCBTest.txt character that separates grapheme clusters my $breakable_utf8 = my $breakable = chr(0xF7); utf8::upgrade($breakable_utf8);
# GCBTest.txt character that indicates that the adjoining code points are part # of the same grapheme cluster my $nobreak_utf8 = my $nobreak = chr(0xD7); utf8::upgrade($nobreak_utf8);
sub Test_X($) { # Test qr/\X/ matches. The input is a line from auxiliary/GCBTest.txt # Each such line is a sequence of code points given by their hex numbers, # separated by the two characters defined just before this subroutine that # indicate that either there can or cannot be a break between the adjacent # code points. If there isn't a break, that means the sequence forms an # extended grapheme cluster, which means that \X should match the whole # thing. If there is a break, \X should stop there. This is all # converted by this routine into a match: # $string =~ /(\X)/, # Each \X should match the next cluster; and that is what is checked.
my $template = shift;
my $line   = (caller)[2];
# The line contains characters above the ASCII range, but in Latin1.  It
# may or may not be in utf8, and if it is, it may or may not know it.  So,
# convert these characters to 8 bits.  If knows is in utf8, simply
# downgrade.
if (utf8::is_utf8($template)) {
    utf8::downgrade($template);
} else {
    # Otherwise, if it is in utf8, but doesn't know it, the next lines
    # convert the two problematic characters to their 8-bit equivalents.
    # If it isn't in utf8, they don't harm anything.
    use bytes;
    $template =~ s/$nobreak_utf8/$nobreak/g;
    $template =~ s/$breakable_utf8/$breakable/g;
}
# Get rid of the leading and trailing breakables
$template =~ s/^ \s* $breakable \s* //x;
$template =~ s/ \s* $breakable \s* $ //x;
# And no-breaks become just a space.
$template =~ s/ \s* $nobreak \s* / /xg;
# Split the input into segments that are breakable between them.
my @segments = split /\s*$breakable\s*/, $template;
my $string = "";
my $display_string = "";
my @should_match;
my @should_display;
# Convert the code point sequence in each segment into a Perl string of
# characters
foreach my $segment (@segments) {
    my @code_points = split /\s+/, $segment;
    my $this_string = "";
    my $this_display = "";
    foreach my $code_point (@code_points) {
        $this_string .= latin1_to_native(chr(hex $code_point));
        $this_display .= "\\x{$code_point}";
    }
    # The next cluster should match the string in this segment.
    push @should_match, $this_string;
    push @should_display, $this_display;
    $string .= $this_string;
    $display_string .= $this_display;
}
# If a string can be represented in both non-ut8 and utf8, test both cases
UPGRADE:
for my $to_upgrade (0 .. 1) {
    if ($to_upgrade) {
        # If already in utf8, would just be a repeat
        next UPGRADE if utf8::is_utf8($string);
        utf8::upgrade($string);
    }
    # Finally, do the \X match.
    my @matches = $string =~ /(\X)/g;
    # Look through each matched cluster to verify that it matches what we
    # expect.
    my $min = (@matches < @should_match) ? @matches : @should_match;
    for my $i (0 .. $min - 1) {
        $Tests++;
        if ($matches[$i] eq $should_match[$i]) {
            print "ok $Tests - ";
            if ($i == 0) {
                print "In \"$display_string\" =~ /(\\X)/g, \\X #1";
            } else {
                print "And \\X #", $i + 1,
            }
            print " correctly matched $should_display[$i]; line $line\n";
        } else {
            $matches[$i] = join("", map { sprintf "\\x{%04X}", $_ }
                                                unpack("U*", $matches[$i]));
            print "not ok $Tests - In \"$display_string\" =~ /(\\X)/g, \\X #",
                $i + 1,
                " should have matched $should_display[$i]",
                " but instead matched $matches[$i]",
                ".  Abandoning rest of line $line\n";
            next UPGRADE;
        }
    }
    # And the number of matches should equal the number of expected matches.
    $Tests++;
    if (@matches == @should_match) {
        print "ok $Tests - Nothing was left over; line $line\n";
    } else {
        print "not ok $Tests - There were ", scalar @should_match, " \\X matches expected, but got ", scalar @matches, " instead; line $line\n";
    }
}
return;
}sub Finished() { print "1..$Tests\n"; exit($Fails ? -1 : 0); }
Error('\p{Script=InGreek}'); # Bug #69018 Test_X("1100 $nobreak 1161"); # Bug #70940 Expect(0, 0x2028, '\p{Print}', ""); # Bug # 71722 Expect(0, 0x2029, '\p{Print}', ""); # Bug # 71722 Expect(1, 0xFF10, '\p{XDigit}', ""); # Bug # 71726
1 POD Error
The following errors were encountered while parsing the POD:
- Around line 15282:
- =end comment without matching =begin. (Stack: [empty])