The Perl and Raku Conference 2025: Greenville, South Carolina - June 27-29 Learn more

use strict;
# vim: set ts=8 sts=2 sw=2 tw=100 et :
# ABSTRACT: Internal utilities for JSON::Schema::Modern
our $VERSION = '0.607';
use 5.020;
use strictures 2;
use stable 0.031 'postderef';
use experimental 'signatures';
no autovivification warn => qw(fetch store exists delete);
use if "$]" >= 5.022, experimental => 're_strict';
no if "$]" >= 5.031009, feature => 'indirect';
no if "$]" >= 5.033001, feature => 'multidimensional';
no if "$]" >= 5.033006, feature => 'bareword_filehandles';
use B;
use Carp 'croak';
use Ref::Util 0.100 qw(is_ref is_plain_arrayref is_plain_hashref);
use builtin::compat qw(blessed created_as_number);
use Scalar::Util 'looks_like_number';
use Storable 'dclone';
use Exporter 'import';
our @EXPORT_OK = qw(
is_type
get_type
is_bignum
is_equal
is_elements_unique
jsonp
unjsonp
local_annotations
canonical_uri
E
A
abort
assert_keyword_exists
assert_keyword_type
assert_pattern
assert_uri_reference
assert_uri
annotate_self
sprintf_num
true
false
);
use JSON::PP ();
use constant { true => JSON::PP::true, false => JSON::PP::false };
# supports the six core types, plus integer (which is also a number)
# we do NOT check stringy_numbers here -- you must do that in the caller
# note that sometimes a value may return true for more than one type, e.g. integer+number,
# or number+string, depending on its internal flags.
# pass { legacy_ints => 1 } in $config to use draft4 integer behaviour
sub is_type ($type, $value, $config = {}) {
if ($type eq 'null') {
return !(defined $value);
}
if ($type eq 'boolean') {
return is_bool($value);
}
if ($type eq 'object') {
return is_plain_hashref($value);
}
if ($type eq 'array') {
return is_plain_arrayref($value);
}
if ($type eq 'string' or $type eq 'number' or $type eq 'integer') {
return 0 if not defined $value;
my $flags = B::svref_2object(\$value)->FLAGS;
# dualvars with the same string and (stringified) numeric value could be either a string or a
# number, and before 5.36 we can't tell the difference, so we will answer yes for both.
# in 5.36+, stringified numbers still get a PV but don't have POK set, whereas
# numified strings do have POK set, so we can tell which one came first.
if ($type eq 'string') {
# like created_as_string, but rejects dualvars with stringwise-unequal string and numeric parts
return !is_ref($value)
&& $flags & B::SVf_POK
&& (!($flags & (B::SVf_IOK | B::SVf_NOK))
|| do { no warnings 'numeric'; 0+$value eq $value });
}
if ($type eq 'number') {
# floats in json will always be parsed into Math::BigFloat, when allow_bignum is enabled
return is_bignum($value) || created_as_number($value);
}
if ($type eq 'integer') {
if ($config->{legacy_ints}) {
# in draft4, an integer is "A JSON number without a fraction or exponent part.",
# therefore 2.0 is NOT an integer
return ref($value) eq 'Math::BigInt'
|| ($flags & B::SVf_IOK) && !($flags & B::SVf_NOK) && created_as_number($value);
}
else {
# note: values that are larger than $Config{ivsize} will be represented as an NV, not IV,
# therefore they will fail this check
return is_bignum($value) && $value->is_int
# if dualvar, PV and stringified NV/IV must be identical
|| created_as_number($value) && int($value) == $value;
}
}
}
if ($type =~ /^reference to (.+)$/) {
return !blessed($value) && ref($value) eq $1;
}
return ref($value) eq $type;
}
# returns one of the six core types, plus integer
# we do NOT check stringy_numbers here -- you must do that in the caller
# pass { legacy_ints => 1 } in $config to use draft4 integer behaviour
sub get_type ($value, $config = {}) {
return 'object' if is_plain_hashref($value);
return 'boolean' if is_bool($value);
return 'null' if not defined $value;
return 'array' if is_plain_arrayref($value);
# floats in json will always be parsed into Math::BigFloat, when allow_bignum is enabled
if (is_ref($value)) {
my $ref = ref($value);
return $ref eq 'Math::BigInt' ? 'integer'
# note: this will be wrong for Math::BigFloat->new('1.0') in draft4
: $ref eq 'Math::BigFloat' ? ($value->is_int ? 'integer' : 'number')
: (defined blessed($value) ? '' : 'reference to ').$ref;
}
my $flags = B::svref_2object(\$value)->FLAGS;
# dualvars with the same string and (stringified) numeric value could be either a string or a
# number, and before 5.36 we can't tell the difference, so we choose number because it has been
# evaluated as a number already.
# in 5.36+, stringified numbers still get a PV but don't have POK set, whereas
# numified strings do have POK set, so we can tell which one came first.
# like created_as_string, but rejects dualvars with stringwise-unequal string and numeric parts
return 'string'
if $flags & B::SVf_POK
&& (!($flags & (B::SVf_IOK | B::SVf_NOK))
|| do { no warnings 'numeric'; 0+$value eq $value });
if ($config->{legacy_ints}) {
# in draft4, an integer is "A JSON number without a fraction or exponent part.",
# therefore 2.0 is NOT an integer
return ($flags & B::SVf_IOK) && !($flags & B::SVf_NOK) ? 'integer' : 'number'
if created_as_number($value);
}
else {
# note: values that are larger than $Config{ivsize} will be represented as an NV, not IV,
# therefore they will fail this check
return int($value) == $value ? 'integer' : 'number' if created_as_number($value);
}
# this might be a scalar with POK|IOK or POK|NOK set
return 'ambiguous type';
}
# lifted from JSON::MaybeXS
# note: unlike builtin::compat::is_bool on older perls, we do not accept
# dualvar(0,"") or dualvar(1,"1") because JSON::PP and Cpanel::JSON::XS
# do not encode these as booleans.
use constant HAVE_BUILTIN => "$]" >= 5.035010;
use if HAVE_BUILTIN, experimental => 'builtin';
sub is_bool ($value) {
HAVE_BUILTIN and builtin::is_bool($value)
or
!!blessed($value)
and ($value->isa('JSON::PP::Boolean')
or $value->isa('Cpanel::JSON::XS::Boolean')
or $value->isa('JSON::XS::Boolean'));
}
sub is_bignum ($value) {
ref($value) =~ /^Math::Big(?:Int|Float)$/;
}
# compares two arbitrary data payloads for equality, as per
# $state hashref supports the following fields:
# - stringy_numbers (input): strings will also be compared numerically
# - path (output): location of the first difference
# - error (output): description of the difference
sub is_equal ($x, $y, $state = {}) {
$state->{path} //= '';
my @types = map get_type($_), $x, $y;
$state->{error} = 'ambiguous type encountered', return 0
if grep $types[$_] eq 'ambiguous type', 0..1;
if ($state->{scalarref_booleans}) {
($x, $types[0]) = (0+!!$$x, 'boolean') if $types[0] eq 'reference to SCALAR';
($y, $types[1]) = (0+!!$$y, 'boolean') if $types[1] eq 'reference to SCALAR';
}
if ($state->{stringy_numbers}) {
($x, $types[0]) = (0+$x, int(0+$x) == $x ? 'integer' : 'number')
if $types[0] eq 'string' and looks_like_number($x);
($y, $types[1]) = (0+$y, int(0+$y) == $y ? 'integer' : 'number')
if $types[1] eq 'string' and looks_like_number($y);
}
$state->{error} = "wrong type: $types[0] vs $types[1]", return 0 if $types[0] ne $types[1];
return 1 if $types[0] eq 'null';
($x eq $y and return 1), $state->{error} = 'strings not equal', return 0
if $types[0] eq 'string';
($x == $y and return 1), $state->{error} = "$types[0]s not equal", return 0
if grep $types[0] eq $_, qw(boolean number integer);
my $path = $state->{path};
if ($types[0] eq 'object') {
$state->{error} = 'property count differs: '.keys(%$x).' vs '.keys(%$y), return 0
if keys %$x != keys %$y;
if (not is_equal(my $arr_x = [ sort keys %$x ], my $arr_y = [ sort keys %$y ], my $s={})) {
my $pos = substr($s->{path}, 1);
$state->{error} = 'property names differ starting at position '.$pos.' ("'.$arr_x->[$pos].'" vs "'.$arr_y->[$pos].'")';
return 0;
}
foreach my $property (sort keys %$x) {
$state->{path} = jsonp($path, $property);
return 0 if not is_equal($x->{$property}, $y->{$property}, $state);
}
return 1;
}
if ($types[0] eq 'array') {
$state->{error} = 'element count differs: '.@$x.' vs '.@$y, return 0 if @$x != @$y;
foreach my $idx (0 .. $x->$#*) {
$state->{path} = $path.'/'.$idx;
return 0 if not is_equal($x->[$idx], $y->[$idx], $state);
}
return 1;
}
$state->{error} = 'uh oh', return 0; # should never get here
}
# checks array elements for uniqueness. short-circuits on first pair of matching elements
# if second arrayref is provided, it is populated with the indices of identical items
# $state hashref supports the following fields:
# - scalarref_booleans (input): treats \0 and \1 as boolean values
# - stringy_numbers (input): strings will also be compared numerically
sub is_elements_unique ($array, $equal_indices = undef, $state = {}) {
my %s = $state->%{qw(scalarref_booleans stringy_numbers)};
foreach my $idx0 (0 .. $array->$#*-1) {
foreach my $idx1 ($idx0+1 .. $array->$#*) {
if (is_equal($array->[$idx0], $array->[$idx1], \%s)) {
push @$equal_indices, $idx0, $idx1 if defined $equal_indices;
return 0;
}
}
}
return 1;
}
# shorthand for creating and appending json pointers
# the first argument is an already-encoded json pointer; remaining arguments are path segments to be
# encoded and appended
sub jsonp {
warn q{first argument to jsonp should be '' or start with '/'} if length($_[0]) and substr($_[0],0,1) ne '/';
return join('/', shift, map s/~/~0/gr =~ s!/!~1!gr, grep defined, @_);
}
# splits a json pointer apart into its path segments
sub unjsonp ($path) {
return map s!~0!~!gr =~ s!~1!/!gr, split m!/!, $path;
}
# get all annotations produced for the current instance data location (that are visible to this
# schema location) - remember these are hashrefs, not Annotation objects
sub local_annotations ($state) {
grep $_->{instance_location} eq $state->{data_path}, $state->{annotations}->@*;
}
# shorthand for finding the canonical uri of the present schema location
# ensure that this code is kept consistent with the absolute_keyword_location builder in ResultNode
sub canonical_uri ($state, @extra_path) {
return $state->{initial_schema_uri} if not @extra_path and not length($state->{schema_path});
my $uri = $state->{initial_schema_uri}->clone;
my $fragment = ($uri->fragment//'').(@extra_path ? jsonp($state->{schema_path}, @extra_path) : $state->{schema_path});
undef $fragment if not length($fragment);
$uri->fragment($fragment);
$uri;
}
# shorthand for creating error objects
# uses these keys from $state:
# - initial_schema_uri
# - effective_base_uri (optional)
# - keyword (optional)
# - data_path
# - traversed_schema_path
# - schema_path
# - _schema_path_suffix (optional)
# - errors
# - exception (optional; set by abort())
# - recommended_response (optional)
# - depth
# - traverse (boolean, used for mode)
# returns defined-false, so callers can use 'return;' to differentiate between
# failed-with-no-error from failed-with-error.
sub E ($state, $error_string, @args) {
croak 'E called in void context' if not defined wantarray;
# sometimes the keyword shouldn't be at the very end of the schema path
my $sps = delete $state->{_schema_path_suffix};
my @schema_path_suffix = defined $sps && is_plain_arrayref($sps) ? $sps->@* : $sps//();
# we store the absolute uri in unresolved form until needed,
# and perform the rest of the calculations later.
my $uri = [ $state->{initial_schema_uri}, $state->{schema_path}, ($state->{keyword}//()), @schema_path_suffix, $state->{effective_base_uri} ];
my $keyword_location = $state->{traversed_schema_path}
.jsonp($state->{schema_path}, $state->{keyword}, @schema_path_suffix);
push $state->{errors}->@*, JSON::Schema::Modern::Error->new(
depth => $state->{depth} // 0,
keyword => $state->{keyword},
instance_location => $state->{data_path},
keyword_location => $keyword_location,
# we calculate absolute_keyword_location when instantiating the Error object for Result
_uri => $uri,
error => @args ? sprintf($error_string, @args) : $error_string,
$state->{exception} ? ( exception => $state->{exception} ) : (),
$state->{recommended_response} ? ( recommended_response => $state->{recommended_response} ) : (),
mode => $state->{traverse} ? 'traverse' : 'evaluate',
);
return 0;
}
# shorthand for creating annotations
# uses these keys from $state:
# - initial_schema_uri
# - keyword (mandatory)
# - data_path
# - traversed_schema_path
# - schema_path
# - _schema_path_suffix (optional)
# - annotations
# - collect_annotations
# - spec_version
# - _unknown (boolean)
# - depth
sub A ($state, $annotation) {
return 1 if not $state->{collect_annotations};
# we store the absolute uri in unresolved form until needed,
# and perform the rest of the calculations later.
my $uri = [ $state->{initial_schema_uri}, $state->{schema_path}, $state->{keyword}, $state->{effective_base_uri} ];
my $keyword_location = $state->{traversed_schema_path}
.jsonp($state->{schema_path}, $state->{keyword});
push $state->{annotations}->@*, {
depth => $state->{depth} // 0,
keyword => $state->{keyword},
instance_location => $state->{data_path},
keyword_location => $keyword_location,
# we calculate absolute_keyword_location when instantiating the Annotation object for Result
_uri => $uri,
annotation => $annotation,
$state->{_unknown} ? ( unknown => 1 ) : (),
};
return 1;
}
# creates an error object, but also aborts evaluation immediately
# only this error is returned, because other errors on the stack might not actually be "real"
# errors (consider if we were in the middle of evaluating a "not" or "if").
# Therefore this is only appropriate during the evaluation phase, not the traverse phase.
sub abort ($state, $error_string, @args) {
()= E({ %$state, exception => 1 }, $error_string, @args);
croak 'abort() called during traverse' if $state->{traverse};
die pop $state->{errors}->@*;
}
sub assert_keyword_exists ($state, $schema) {
croak 'assert_keyword_exists called in void context' if not defined wantarray;
return E($state, '%s keyword is required', $state->{keyword}) if not exists $schema->{$state->{keyword}};
return 1;
}
sub assert_keyword_type ($state, $schema, $type) {
croak 'assert_keyword_type called in void context' if not defined wantarray;
return 1 if is_type($type, $schema->{$state->{keyword}});
E($state, '%s value is not a%s %s', $state->{keyword}, ($type =~ /^[aeiou]/ ? 'n' : ''), $type);
}
sub assert_pattern ($state, $pattern) {
croak 'assert_pattern called in void context' if not defined wantarray;
try {
local $SIG{__WARN__} = sub { die @_ };
qr/$pattern/;
}
catch ($e) { return E($state, $e); };
return 1;
}
# this is only suitable for checking URIs within schemas themselves
sub assert_uri_reference ($state, $schema) {
croak 'assert_uri_reference called in void context' if not defined wantarray;
my $string = $schema->{$state->{keyword}};
return E($state, '%s value is not a valid URI reference', $state->{keyword})
# see also uri-reference format sub
if fc(Mojo::URL->new($string)->to_unsafe_string) ne fc($string)
or $string =~ /[^[:ascii:]]/ # ascii characters only
or $string =~ /#/ # no fragment, except...
and $string !~ m{#$} # allow empty fragment
and $string !~ m{#[A-Za-z][A-Za-z0-9_:.-]*$} # allow plain-name fragment
and $string !~ m{#/(?:[^~]|~[01])*$}; # allow json pointer fragment
return 1;
}
# this is only suitable for checking URIs within schemas themselves,
# which have fragments consisting of plain names (anchors) or json pointers
sub assert_uri ($state, $schema, $override = undef) {
croak 'assert_uri called in void context' if not defined wantarray;
my $string = $override // $schema->{$state->{keyword}};
my $uri = Mojo::URL->new($string);
return E($state, '"%s" is not a valid URI', $string)
# see also uri format sub
if fc($uri->to_unsafe_string) ne fc($string)
or $string =~ /[^[:ascii:]]/
or not $uri->is_abs
or $string =~ /#/
and $string !~ m{#$} # empty fragment
and $string !~ m{#[A-Za-z][A-Za-z0-9_:.-]*$} # plain-name fragment
and $string !~ m{#/(?:[^~]|~[01])*$}; # json pointer fragment
return 1;
}
# produces an annotation whose value is the same as that of the current schema keyword
# makes a copy as this is passed back to the user, who cannot be trusted to not mutate it
sub annotate_self ($state, $schema) {
A($state, is_ref($schema->{$state->{keyword}}) ? dclone($schema->{$state->{keyword}})
: $schema->{$state->{keyword}});
}
# use original value as stored in the NV, without losing precision
sub sprintf_num ($value) {
is_bignum($value) ? $value->bstr : sprintf('%s', $value);
}
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
JSON::Schema::Modern::Utilities - Internal utilities for JSON::Schema::Modern
=head1 VERSION
version 0.607
=head1 SYNOPSIS
use JSON::Schema::Modern::Utilities qw(func1 func2..);
=head1 DESCRIPTION
This class contains internal utilities to be used by L<JSON::Schema::Modern>.
=for Pod::Coverage is_type get_type is_bignum is_bool is_equal is_elements_unique jsonp unjsonp local_annotations
canonical_uri E A abort assert_keyword_exists assert_keyword_type assert_pattern assert_uri_reference assert_uri
annotate_self sprintf_num HAVE_BUILTIN true false
=head1 SUPPORT
I am also usually active on irc, as 'ether' at C<irc.perl.org> and C<irc.libera.chat>.
=for stopwords OpenAPI
You can also find me on the L<JSON Schema Slack server|https://json-schema.slack.com> and L<OpenAPI Slack
server|https://open-api.slack.com>, which are also great resources for finding help.
=head1 AUTHOR
Karen Etheridge <ether@cpan.org>
=head1 COPYRIGHT AND LICENCE
This software is copyright (c) 2020 by Karen Etheridge.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut