use strict;
use warnings;
package JSON::Schema::Modern::Utilities;
# vim: set ts=8 sts=2 sw=2 tw=100 et :
# ABSTRACT: Internal utilities for JSON::Schema::Modern

our $VERSION = '0.607';

use 5.020;
use strictures 2;
use stable 0.031 'postderef';
use experimental 'signatures';
no autovivification warn => qw(fetch store exists delete);
use if "$]" >= 5.022, experimental => 're_strict';
no if "$]" >= 5.031009, feature => 'indirect';
no if "$]" >= 5.033001, feature => 'multidimensional';
no if "$]" >= 5.033006, feature => 'bareword_filehandles';
use B;
use Carp 'croak';
use Ref::Util 0.100 qw(is_ref is_plain_arrayref is_plain_hashref);
use builtin::compat qw(blessed created_as_number);
use Scalar::Util 'looks_like_number';
use Storable 'dclone';
use Feature::Compat::Try;
use namespace::clean;

use Exporter 'import';

our @EXPORT_OK = qw(
  is_type
  get_type
  is_bignum
  is_equal
  is_elements_unique
  jsonp
  unjsonp
  local_annotations
  canonical_uri
  E
  A
  abort
  assert_keyword_exists
  assert_keyword_type
  assert_pattern
  assert_uri_reference
  assert_uri
  annotate_self
  sprintf_num
  true
  false
);

use JSON::PP ();
use constant { true => JSON::PP::true, false => JSON::PP::false };

# supports the six core types, plus integer (which is also a number)
# we do NOT check stringy_numbers here -- you must do that in the caller
# note that sometimes a value may return true for more than one type, e.g. integer+number,
# or number+string, depending on its internal flags.
# pass { legacy_ints => 1 } in $config to use draft4 integer behaviour
sub is_type ($type, $value, $config = {}) {
  if ($type eq 'null') {
    return !(defined $value);
  }
  if ($type eq 'boolean') {
    return is_bool($value);
  }
  if ($type eq 'object') {
    return is_plain_hashref($value);
  }
  if ($type eq 'array') {
    return is_plain_arrayref($value);
  }

  if ($type eq 'string' or $type eq 'number' or $type eq 'integer') {
    return 0 if not defined $value;
    my $flags = B::svref_2object(\$value)->FLAGS;

    # dualvars with the same string and (stringified) numeric value could be either a string or a
    # number, and before 5.36 we can't tell the difference, so we will answer yes for both.
    # in 5.36+, stringified numbers still get a PV but don't have POK set, whereas
    # numified strings do have POK set, so we can tell which one came first.

    if ($type eq 'string') {
      # like created_as_string, but rejects dualvars with stringwise-unequal string and numeric parts
      return !is_ref($value)
        && $flags & B::SVf_POK
        && (!($flags & (B::SVf_IOK | B::SVf_NOK))
          || do { no warnings 'numeric'; 0+$value eq $value });
    }

    if ($type eq 'number') {
      # floats in json will always be parsed into Math::BigFloat, when allow_bignum is enabled
      return is_bignum($value) || created_as_number($value);
    }

    if ($type eq 'integer') {
      if ($config->{legacy_ints}) {
        # in draft4, an integer is "A JSON number without a fraction or exponent part.",
        # therefore 2.0 is NOT an integer
        return ref($value) eq 'Math::BigInt'
          || ($flags & B::SVf_IOK) && !($flags & B::SVf_NOK) && created_as_number($value);
      }
      else {
        # note: values that are larger than $Config{ivsize} will be represented as an NV, not IV,
        # therefore they will fail this check
        return is_bignum($value) && $value->is_int
          # if dualvar, PV and stringified NV/IV must be identical
          || created_as_number($value) && int($value) == $value;
      }
    }
  }

  if ($type =~ /^reference to (.+)$/) {
    return !blessed($value) && ref($value) eq $1;
  }

  return ref($value) eq $type;
}

# returns one of the six core types, plus integer
# we do NOT check stringy_numbers here -- you must do that in the caller
# pass { legacy_ints => 1 } in $config to use draft4 integer behaviour
sub get_type ($value, $config = {}) {
  return 'object' if is_plain_hashref($value);
  return 'boolean' if is_bool($value);
  return 'null' if not defined $value;
  return 'array' if is_plain_arrayref($value);

  # floats in json will always be parsed into Math::BigFloat, when allow_bignum is enabled
  if (is_ref($value)) {
    my $ref = ref($value);
    return $ref eq 'Math::BigInt' ? 'integer'
      # note: this will be wrong for Math::BigFloat->new('1.0') in draft4
      : $ref eq 'Math::BigFloat' ? ($value->is_int ? 'integer' : 'number')
      : (defined blessed($value) ? '' : 'reference to ').$ref;
  }

  my $flags = B::svref_2object(\$value)->FLAGS;

  # dualvars with the same string and (stringified) numeric value could be either a string or a
  # number, and before 5.36 we can't tell the difference, so we choose number because it has been
  # evaluated as a number already.
  # in 5.36+, stringified numbers still get a PV but don't have POK set, whereas
  # numified strings do have POK set, so we can tell which one came first.

  # like created_as_string, but rejects dualvars with stringwise-unequal string and numeric parts
  return 'string'
    if $flags & B::SVf_POK
      && (!($flags & (B::SVf_IOK | B::SVf_NOK))
        || do { no warnings 'numeric'; 0+$value eq $value });

  if ($config->{legacy_ints}) {
    # in draft4, an integer is "A JSON number without a fraction or exponent part.",
    # therefore 2.0 is NOT an integer
    return ($flags & B::SVf_IOK) && !($flags & B::SVf_NOK) ? 'integer' : 'number'
      if created_as_number($value);
  }
  else {
    # note: values that are larger than $Config{ivsize} will be represented as an NV, not IV,
    # therefore they will fail this check
    return int($value) == $value ? 'integer' : 'number' if created_as_number($value);
  }

  # this might be a scalar with POK|IOK or POK|NOK set
  return 'ambiguous type';
}

# lifted from JSON::MaybeXS
# note: unlike builtin::compat::is_bool on older perls, we do not accept
# dualvar(0,"") or dualvar(1,"1") because JSON::PP and Cpanel::JSON::XS
# do not encode these as booleans.
use constant HAVE_BUILTIN => "$]" >= 5.035010;
use if HAVE_BUILTIN, experimental => 'builtin';
sub is_bool ($value) {
  HAVE_BUILTIN and builtin::is_bool($value)
  or
  !!blessed($value)
    and ($value->isa('JSON::PP::Boolean')
      or $value->isa('Cpanel::JSON::XS::Boolean')
      or $value->isa('JSON::XS::Boolean'));
}

sub is_bignum ($value) {
  ref($value) =~ /^Math::Big(?:Int|Float)$/;
}

# compares two arbitrary data payloads for equality, as per
# https://json-schema.org/draft/2020-12/json-schema-core.html#rfc.section.4.2.2
# $state hashref supports the following fields:
# - stringy_numbers (input): strings will also be compared numerically
# - path (output): location of the first difference
# - error (output): description of the difference
sub is_equal ($x, $y, $state = {}) {
  $state->{path} //= '';

  my @types = map get_type($_), $x, $y;

  $state->{error} = 'ambiguous type encountered', return 0
    if grep $types[$_] eq 'ambiguous type', 0..1;

  if ($state->{scalarref_booleans}) {
    ($x, $types[0]) = (0+!!$$x, 'boolean') if $types[0] eq 'reference to SCALAR';
    ($y, $types[1]) = (0+!!$$y, 'boolean') if $types[1] eq 'reference to SCALAR';
  }

  if ($state->{stringy_numbers}) {
    ($x, $types[0]) = (0+$x, int(0+$x) == $x ? 'integer' : 'number')
      if $types[0] eq 'string' and looks_like_number($x);

    ($y, $types[1]) = (0+$y, int(0+$y) == $y ? 'integer' : 'number')
      if $types[1] eq 'string' and looks_like_number($y);
  }

  $state->{error} = "wrong type: $types[0] vs $types[1]", return 0 if $types[0] ne $types[1];
  return 1 if $types[0] eq 'null';
  ($x eq $y and return 1), $state->{error} = 'strings not equal', return 0
    if $types[0] eq 'string';
  ($x == $y and return 1), $state->{error} = "$types[0]s not equal", return 0
    if grep $types[0] eq $_, qw(boolean number integer);

  my $path = $state->{path};
  if ($types[0] eq 'object') {
    $state->{error} = 'property count differs: '.keys(%$x).' vs '.keys(%$y), return 0
      if keys %$x != keys %$y;

    if (not is_equal(my $arr_x = [ sort keys %$x ], my $arr_y = [ sort keys %$y ], my $s={})) {
      my $pos = substr($s->{path}, 1);
      $state->{error} = 'property names differ starting at position '.$pos.' ("'.$arr_x->[$pos].'" vs "'.$arr_y->[$pos].'")';
      return 0;
    }

    foreach my $property (sort keys %$x) {
      $state->{path} = jsonp($path, $property);
      return 0 if not is_equal($x->{$property}, $y->{$property}, $state);
    }

    return 1;
  }

  if ($types[0] eq 'array') {
    $state->{error} = 'element count differs: '.@$x.' vs '.@$y, return 0 if @$x != @$y;
    foreach my $idx (0 .. $x->$#*) {
      $state->{path} = $path.'/'.$idx;
      return 0 if not is_equal($x->[$idx], $y->[$idx], $state);
    }
    return 1;
  }

  $state->{error} = 'uh oh', return 0; # should never get here
}

# checks array elements for uniqueness. short-circuits on first pair of matching elements
# if second arrayref is provided, it is populated with the indices of identical items
# $state hashref supports the following fields:
# - scalarref_booleans (input): treats \0 and \1 as boolean values
# - stringy_numbers (input): strings will also be compared numerically
sub is_elements_unique ($array, $equal_indices = undef, $state = {}) {
  my %s = $state->%{qw(scalarref_booleans stringy_numbers)};
  foreach my $idx0 (0 .. $array->$#*-1) {
    foreach my $idx1 ($idx0+1 .. $array->$#*) {
      if (is_equal($array->[$idx0], $array->[$idx1], \%s)) {
        push @$equal_indices, $idx0, $idx1 if defined $equal_indices;
        return 0;
      }
    }
  }
  return 1;
}

# shorthand for creating and appending json pointers
# the first argument is an already-encoded json pointer; remaining arguments are path segments to be
# encoded and appended
sub jsonp {
  warn q{first argument to jsonp should be '' or start with '/'} if length($_[0]) and substr($_[0],0,1) ne '/';
  return join('/', shift, map s/~/~0/gr =~ s!/!~1!gr, grep defined, @_);
}

# splits a json pointer apart into its path segments
sub unjsonp ($path) {
  return map s!~0!~!gr =~ s!~1!/!gr, split m!/!, $path;
}

# get all annotations produced for the current instance data location (that are visible to this
# schema location) - remember these are hashrefs, not Annotation objects
sub local_annotations ($state) {
  grep $_->{instance_location} eq $state->{data_path}, $state->{annotations}->@*;
}

# shorthand for finding the canonical uri of the present schema location
# ensure that this code is kept consistent with the absolute_keyword_location builder in ResultNode
sub canonical_uri ($state, @extra_path) {
  return $state->{initial_schema_uri} if not @extra_path and not length($state->{schema_path});
  my $uri = $state->{initial_schema_uri}->clone;
  my $fragment = ($uri->fragment//'').(@extra_path ? jsonp($state->{schema_path}, @extra_path) : $state->{schema_path});
  undef $fragment if not length($fragment);
  $uri->fragment($fragment);
  $uri;
}

# shorthand for creating error objects
# uses these keys from $state:
# - initial_schema_uri
# - effective_base_uri (optional)
# - keyword (optional)
# - data_path
# - traversed_schema_path
# - schema_path
# - _schema_path_suffix (optional)
# - errors
# - exception (optional; set by abort())
# - recommended_response (optional)
# - depth
# - traverse (boolean, used for mode)
# returns defined-false, so callers can use 'return;' to differentiate between
# failed-with-no-error from failed-with-error.
sub E ($state, $error_string, @args) {
  croak 'E called in void context' if not defined wantarray;

  # sometimes the keyword shouldn't be at the very end of the schema path
  my $sps = delete $state->{_schema_path_suffix};
  my @schema_path_suffix = defined $sps && is_plain_arrayref($sps) ? $sps->@* : $sps//();

  # we store the absolute uri in unresolved form until needed,
  # and perform the rest of the calculations later.
  my $uri = [ $state->{initial_schema_uri}, $state->{schema_path}, ($state->{keyword}//()), @schema_path_suffix, $state->{effective_base_uri} ];

  my $keyword_location = $state->{traversed_schema_path}
    .jsonp($state->{schema_path}, $state->{keyword}, @schema_path_suffix);

  require JSON::Schema::Modern::Error;
  push $state->{errors}->@*, JSON::Schema::Modern::Error->new(
    depth => $state->{depth} // 0,
    keyword => $state->{keyword},
    instance_location => $state->{data_path},
    keyword_location => $keyword_location,
    # we calculate absolute_keyword_location when instantiating the Error object for Result
    _uri => $uri,
    error => @args ? sprintf($error_string, @args) : $error_string,
    $state->{exception} ? ( exception => $state->{exception} ) : (),
    $state->{recommended_response} ? ( recommended_response => $state->{recommended_response} ) : (),
    mode => $state->{traverse} ? 'traverse' : 'evaluate',
  );

  return 0;
}

# shorthand for creating annotations
# uses these keys from $state:
# - initial_schema_uri
# - keyword (mandatory)
# - data_path
# - traversed_schema_path
# - schema_path
# - _schema_path_suffix (optional)
# - annotations
# - collect_annotations
# - spec_version
# - _unknown (boolean)
# - depth
sub A ($state, $annotation) {
  return 1 if not $state->{collect_annotations};

  # we store the absolute uri in unresolved form until needed,
  # and perform the rest of the calculations later.
  my $uri = [ $state->{initial_schema_uri}, $state->{schema_path}, $state->{keyword}, $state->{effective_base_uri} ];

  my $keyword_location = $state->{traversed_schema_path}
    .jsonp($state->{schema_path}, $state->{keyword});

  push $state->{annotations}->@*, {
    depth => $state->{depth} // 0,
    keyword => $state->{keyword},
    instance_location => $state->{data_path},
    keyword_location => $keyword_location,
    # we calculate absolute_keyword_location when instantiating the Annotation object for Result
    _uri => $uri,
    annotation => $annotation,
    $state->{_unknown} ? ( unknown => 1 ) : (),
  };

  return 1;
}

# creates an error object, but also aborts evaluation immediately
# only this error is returned, because other errors on the stack might not actually be "real"
# errors (consider if we were in the middle of evaluating a "not" or "if").
# Therefore this is only appropriate during the evaluation phase, not the traverse phase.
sub abort ($state, $error_string, @args) {
  ()= E({ %$state, exception => 1 }, $error_string, @args);
  croak 'abort() called during traverse' if $state->{traverse};
  die pop $state->{errors}->@*;
}

sub assert_keyword_exists ($state, $schema) {
  croak 'assert_keyword_exists called in void context' if not defined wantarray;
  return E($state, '%s keyword is required', $state->{keyword}) if not exists $schema->{$state->{keyword}};
  return 1;
}

sub assert_keyword_type ($state, $schema, $type) {
  croak 'assert_keyword_type called in void context' if not defined wantarray;
  return 1 if is_type($type, $schema->{$state->{keyword}});
  E($state, '%s value is not a%s %s', $state->{keyword}, ($type =~ /^[aeiou]/ ? 'n' : ''), $type);
}

sub assert_pattern ($state, $pattern) {
  croak 'assert_pattern called in void context' if not defined wantarray;
  try {
    local $SIG{__WARN__} = sub { die @_ };
    qr/$pattern/;
  }
  catch ($e) { return E($state, $e); };
  return 1;
}

# this is only suitable for checking URIs within schemas themselves
sub assert_uri_reference ($state, $schema) {
  croak 'assert_uri_reference called in void context' if not defined wantarray;

  my $string = $schema->{$state->{keyword}};
  return E($state, '%s value is not a valid URI reference', $state->{keyword})
    # see also uri-reference format sub
    if fc(Mojo::URL->new($string)->to_unsafe_string) ne fc($string)
      or $string =~ /[^[:ascii:]]/            # ascii characters only
      or $string =~ /#/                       # no fragment, except...
        and $string !~ m{#$}                          # allow empty fragment
        and $string !~ m{#[A-Za-z][A-Za-z0-9_:.-]*$}  # allow plain-name fragment
        and $string !~ m{#/(?:[^~]|~[01])*$};         # allow json pointer fragment

  return 1;
}

# this is only suitable for checking URIs within schemas themselves,
# which have fragments consisting of plain names (anchors) or json pointers
sub assert_uri ($state, $schema, $override = undef) {
  croak 'assert_uri called in void context' if not defined wantarray;

  my $string = $override // $schema->{$state->{keyword}};
  my $uri = Mojo::URL->new($string);

  return E($state, '"%s" is not a valid URI', $string)
    # see also uri format sub
    if fc($uri->to_unsafe_string) ne fc($string)
      or $string =~ /[^[:ascii:]]/
      or not $uri->is_abs
      or $string =~ /#/
        and $string !~ m{#$}                          # empty fragment
        and $string !~ m{#[A-Za-z][A-Za-z0-9_:.-]*$}  # plain-name fragment
        and $string !~ m{#/(?:[^~]|~[01])*$};         # json pointer fragment

  return 1;
}

# produces an annotation whose value is the same as that of the current schema keyword
# makes a copy as this is passed back to the user, who cannot be trusted to not mutate it
sub annotate_self ($state, $schema) {
  A($state, is_ref($schema->{$state->{keyword}}) ? dclone($schema->{$state->{keyword}})
    : $schema->{$state->{keyword}});
}

# use original value as stored in the NV, without losing precision
sub sprintf_num ($value) {
  is_bignum($value) ? $value->bstr : sprintf('%s', $value);
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

JSON::Schema::Modern::Utilities - Internal utilities for JSON::Schema::Modern

=head1 VERSION

version 0.607

=head1 SYNOPSIS

  use JSON::Schema::Modern::Utilities qw(func1 func2..);

=head1 DESCRIPTION

This class contains internal utilities to be used by L<JSON::Schema::Modern>.

=for Pod::Coverage is_type get_type is_bignum is_bool is_equal is_elements_unique jsonp unjsonp local_annotations
canonical_uri E A abort assert_keyword_exists assert_keyword_type assert_pattern assert_uri_reference assert_uri
annotate_self sprintf_num HAVE_BUILTIN true false

=head1 SUPPORT

Bugs may be submitted through L<https://github.com/karenetheridge/JSON-Schema-Modern/issues>.

I am also usually active on irc, as 'ether' at C<irc.perl.org> and C<irc.libera.chat>.

=for stopwords OpenAPI

You can also find me on the L<JSON Schema Slack server|https://json-schema.slack.com> and L<OpenAPI Slack
server|https://open-api.slack.com>, which are also great resources for finding help.

=head1 AUTHOR

Karen Etheridge <ether@cpan.org>

=head1 COPYRIGHT AND LICENCE

This software is copyright (c) 2020 by Karen Etheridge.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut