#!/usr/bin/perl -w

# Copyright (c) 2023-2025 Löwenfelsen UG (haftungsbeschränkt)

# licensed under Artistic License 2.0 (see LICENSE file)

# ABSTRACT: format independent identifier object


package Data::Identifier;

use v5.20;
use strict;
use warnings;

use parent qw(Data::Identifier::Interface::Known);

use Carp;
use Math::BigInt lib => 'GMP';
use URI;
use Data::Identifier::Generate;

our $VERSION = v0.09;

use constant {
    RE_UUID => qr/^[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}$/,
    RE_OID  => qr/^[0-2](?:\.(?:0|[1-9][0-9]*))+$/,
    RE_URI  => qr/^[a-zA-Z][a-zA-Z0-9\+\.\-]+/,
    RE_UINT => qr/^(?:0|[1-9][0-9]*)$/,
    RE_QID  => qr/^[QPL][1-9][0-9]*$/,
    RE_DOI  => qr/^10\.[1-9][0-9]+(?:\.[0-9]+)*\/./,
    RE_UNICODE => qr/^U\+([0-9A-F]{4,7})$/,
};

use constant {
    WK_UUID => '8be115d2-dc2f-4a98-91e1-a6e3075cbc31', # uuid
    WK_OID  => 'd08dc905-bbf6-4183-b219-67723c3c8374', # oid
    WK_URI  => 'a8d1637d-af19-49e9-9ef8-6bc1fbcf6439', # uri
    WK_SID  => 'f87a38cb-fd13-4e15-866c-e49901adbec5', # small-identifier
    WK_WD   => 'ce7aae1e-a210-4214-926a-0ebca56d77e3', # wikidata-identifier
    WK_GTIN => '82d529be-0f00-4b4f-a43f-4a22de5f5312', # gtin
    WK_IBAN => 'b1418262-6bc9-459c-b4b0-a054d77db0ea', # iban
    WK_BIC  => 'c8a3a132-f160-473c-b5f3-26a748f37e62', # bic
    WK_DOI  => '931f155e-5a24-499b-9fbb-ed4efefe27fe', # doi
    WK_FC   => 'd576b9d1-47d4-43ae-b7ec-bbea1fe009ba', # factgrid-identifier
    WK_UNICODE_CP => '5f167223-cc9c-4b2f-9928-9fe1b253b560', # unicode-code-point

    NS_WD   => '9e10aca7-4a99-43ac-9368-6cbfa43636df', # Wikidata-namespace
    NS_FC   => '6491f7a9-0b29-4ef1-992c-3681cea18182', # factgrid-namespace
    NS_INT  => '5dd8ddbb-13a8-4d6c-9264-36e6dd6f9c99', # integer-namespace
    NS_DATE => 'fc43fbba-b959-4882-b4c8-90a288b7d416', # gregorian-date-namespace
    NS_UNICODE_CP => '132aa723-a373-48bf-a88d-69f1e00f00cf', # 'unicode-character-namespace'
};

# Features:
my $enabled_oid = 1;

my %uuid_to_uriid_org = (
    WK_UUID() => 'uuid',
    WK_OID()  => 'oid',
    WK_URI()  => 'uri',
    WK_SID()  => 'sid',
    WK_GTIN() => 'gtin',
    WK_WD()   => 'wikidata-identifier',
);

my %uuid_org_to_uuid = map {$uuid_to_uriid_org{$_} => $_} keys %uuid_to_uriid_org;

my $well_known_uuid = __PACKAGE__->new(ise => WK_UUID, validate => RE_UUID);

my %well_known = (
    uuid => $well_known_uuid,
    oid  => __PACKAGE__->new($well_known_uuid => WK_OID,    validate => RE_OID),
    uri  => __PACKAGE__->new($well_known_uuid => WK_URI,    validate => RE_URI),
    sid  => __PACKAGE__->new($well_known_uuid => WK_SID,    validate => RE_UINT),
    wd   => __PACKAGE__->new($well_known_uuid => WK_WD,     validate => RE_QID, namespace => NS_WD, generate => 'id-based'),
    fc   => __PACKAGE__->new($well_known_uuid => WK_FC,     validate => RE_QID, namespace => NS_FC, generate => 'id-based'),
    gtin => __PACKAGE__->new($well_known_uuid => WK_GTIN,   validate => RE_UINT),
    iban => __PACKAGE__->new($well_known_uuid => WK_IBAN),
    bic  => __PACKAGE__->new($well_known_uuid => WK_BIC),
    doi  => __PACKAGE__->new($well_known_uuid => WK_DOI,    validate => RE_DOI),

    # Unofficial, not part of public API:
    unicodecp => __PACKAGE__->new($well_known_uuid => WK_UNICODE_CP, validate => RE_UNICODE, namespace => NS_UNICODE_CP, generate => 'id-based'),
);

my %registered;

$_->register foreach values %well_known;

# Refill with sids:
{
    my %wk_sids = (
        'ddd60c5c-2934-404f-8f2d-fcb4da88b633'  =>   1, # also-shares-identifier
        WK_UUID()                               =>   2,
        'bfae7574-3dae-425d-89b1-9c087c140c23'  =>   3, # tagname
        '7f265548-81dc-4280-9550-1bd0aa4bf748'  =>   4, # has-type
        WK_URI()                                =>   5,
        WK_OID()                                =>   6,
        # Unassigned: 7
        'd0a4c6e2-ce2f-4d4c-b079-60065ac681f1'  =>   8, # language-tag-identifier
        WK_WD()                                 =>   9,
        '923b43ae-a50e-4db3-8655-ed931d0dd6d4'  =>  10, # specialises
        'eacbf914-52cf-4192-a42c-8ecd27c85ee1'  =>  11, # unicode-string
        '928d02b0-7143-4ec9-b5ac-9554f02d3fb1'  =>  12, # integer
        'dea3782c-6bcb-4ce9-8a39-f8dab399d75d'  =>  13, # unsigned-integer
        # Unassigned: 14, 15
        '6ba648c2-3657-47c2-8541-9b73c3a9b2b4'  =>  16, # default-context
        '52a516d0-25d8-47c7-a6ba-80983e576c54'  =>  17, # proto-file
        '1cd4a6c6-0d7c-48d1-81e7-4e8d41fdb45d'  =>  18, # final-file-size
        '6085f87e-4797-4bb2-b23d-85ff7edc1da0'  =>  19, # text-fragment
        '4c9656eb-c130-42b7-9348-a1fee3f42050'  =>  20, # also-list-contains-also
        '298ef373-9731-491d-824d-b2836250e865'  =>  21, # proto-message
        '7be4d8c7-6a75-44cc-94f7-c87433307b26'  =>  22, # proto-entity
        '65bb36f2-b558-48af-8512-bca9150cca85'  =>  23, # proxy-type
        'a1c478b5-0a85-4b5b-96da-d250db14a67c'  =>  24, # flagged-as
        '59cfe520-ba32-48cc-b654-74f7a05779db'  =>  25, # marked-as
        '2bffc55d-7380-454e-bd53-c5acd525d692'  =>  26, # roaraudio-error-number
        WK_SID()                                =>  27,
        'd2750351-aed7-4ade-aa80-c32436cc6030'  =>  28, # also-has-role
        # Unassigned: 29, 30, 31
        '448c50a8-c847-4bc7-856e-0db5fea8f23b'  =>  32, # final-file-encoding
        '79385945-0963-44aa-880a-bca4a42e9002'  =>  33, # final-file-hash
        '3fde5688-6e34-45e9-8f33-68f079b152c8'  =>  34, # SEEK_SET
        'bc598c52-642e-465b-b079-e9253cd6f190'  =>  35, # SEEK_CUR
        '06aff30f-70e8-48b4-8b20-9194d22fc460'  =>  36, # SEEK_END
        '59a5691a-6a19-4051-bc26-8db82c019df3'  =>  37, # inode
        '2c7e15ed-aa2f-4e2f-9a1d-64df0c85875a'  => 112, # chat-0-word-identifier
        WK_GTIN()                               => 160,
    );

    foreach my $ise (keys %wk_sids) {
        my $identifier = __PACKAGE__->new(ise => $ise);
        $identifier->{id_cache} //= {};
        $identifier->{id_cache}->{WK_SID()} //= $wk_sids{$ise};
        $identifier->register; # re-register
    }
}

# Some extra tags such as namespaces:
foreach my $ise (NS_WD, NS_INT, NS_DATE) {
    my $identifier = __PACKAGE__->new(ise => $ise);
    $identifier->register; # re-register
}

# Refill with displaynames
{
    my %displaynames = (
        WK_UUID()                               => 'uuid',
        WK_OID()                                => 'oid',
        WK_URI()                                => 'uri',
        WK_SID()                                => 'small-identifier',
        WK_WD()                                 => 'wikidata-identifier',
        WK_GTIN()                               => 'gtin',
        WK_IBAN()                               => 'iban',
        WK_BIC()                                => 'bic',
        WK_DOI()                                => 'doi',
        WK_FC()                                 => 'factgrid-identifier',
        WK_UNICODE_CP()                         => 'unicode-code-point',
        NS_WD()                                 => 'Wikidata-namespace',
        NS_FC()                                 => 'factgrid-namespace',
        NS_INT()                                => 'integer-namespace',
        NS_DATE()                               => 'gregorian-date-namespace',
        NS_UNICODE_CP()                         => 'unicode-character-namespace',

        'ddd60c5c-2934-404f-8f2d-fcb4da88b633'  => 'also-shares-identifier',
        'bfae7574-3dae-425d-89b1-9c087c140c23'  => 'tagname',
        '7f265548-81dc-4280-9550-1bd0aa4bf748'  => 'has-type',
        'd0a4c6e2-ce2f-4d4c-b079-60065ac681f1'  => 'language-tag-identifier',
        '923b43ae-a50e-4db3-8655-ed931d0dd6d4'  => 'specialises',
        'eacbf914-52cf-4192-a42c-8ecd27c85ee1'  => 'unicode-string',
        '928d02b0-7143-4ec9-b5ac-9554f02d3fb1'  => 'integer',
        'dea3782c-6bcb-4ce9-8a39-f8dab399d75d'  => 'unsigned-integer',
        '6ba648c2-3657-47c2-8541-9b73c3a9b2b4'  => 'default-context',
        '52a516d0-25d8-47c7-a6ba-80983e576c54'  => 'proto-file',
        '1cd4a6c6-0d7c-48d1-81e7-4e8d41fdb45d'  => 'final-file-size',
        '6085f87e-4797-4bb2-b23d-85ff7edc1da0'  => 'text-fragment',
        '4c9656eb-c130-42b7-9348-a1fee3f42050'  => 'also-list-contains-also',
        '298ef373-9731-491d-824d-b2836250e865'  => 'proto-message',
        '7be4d8c7-6a75-44cc-94f7-c87433307b26'  => 'proto-entity',
        '65bb36f2-b558-48af-8512-bca9150cca85'  => 'proxy-type',
        'a1c478b5-0a85-4b5b-96da-d250db14a67c'  => 'flagged-as',
        '59cfe520-ba32-48cc-b654-74f7a05779db'  => 'marked-as',
        '2bffc55d-7380-454e-bd53-c5acd525d692'  => 'roaraudio-error-number',
        'd2750351-aed7-4ade-aa80-c32436cc6030'  => 'also-has-role',
        '448c50a8-c847-4bc7-856e-0db5fea8f23b'  => 'final-file-encoding',
        '79385945-0963-44aa-880a-bca4a42e9002'  => 'final-file-hash',
        '3fde5688-6e34-45e9-8f33-68f079b152c8'  => 'SEEK_SET',
        'bc598c52-642e-465b-b079-e9253cd6f190'  => 'SEEK_CUR',
        '06aff30f-70e8-48b4-8b20-9194d22fc460'  => 'SEEK_END',
        '59a5691a-6a19-4051-bc26-8db82c019df3'  => 'inode',
        '2c7e15ed-aa2f-4e2f-9a1d-64df0c85875a'  => 'chat-0-word-identifier',
    );

    foreach my $ise (keys %displaynames) {
        my $identifier = __PACKAGE__->new(ise => $ise);
        $identifier->{displayname} //= $displaynames{$ise};
        $identifier->register; # re-register
    }
}

{
    # ISE -> namespace
    my %namespaces_uint = (
        '2bffc55d-7380-454e-bd53-c5acd525d692' => '744eaf4e-ae93-44d8-9ab5-744105222da6', # roaraudio-error-number: roaraudio-error-namespace
        '4a7fc2e2-854b-42ec-b24f-c7fece371865' => 'ac59062c-6ba2-44de-9f54-09e28f2c0b5c', # e621-post-identifier: e621-post-namespace
        'a0a4fae2-be6f-4a51-8326-6110ba845a16' => '69b7ff38-ca78-43a8-b9ea-66cb02312eef', # e621-pool-identifier: e621-pool-namespace
    );

    foreach my $ise (keys %namespaces_uint) {
        my $identifier = __PACKAGE__->new(ise => $ise);
        $identifier->{namespace}    //= __PACKAGE__->new(ise => $namespaces_uint{$ise});
        $identifier->{validate}     //= RE_UINT;
        $identifier->{generate}     //= 'id-based';
        $identifier->register; # re-register
    }

    # validate => RE_QID, namespace => NS_FC, generate => 'id-based'
}

# Call this after after we loaded all our stuff and before anyone else will register stuff:
__PACKAGE__->_known_provider('wellknown');


sub new {
    my ($pkg, $type, $id, %opts) = @_;
    my $self = bless {};

    if (!ref($type) && $type eq 'from') {
        if (ref($id)) {
            my $from = $id;
            if ($id->isa('Data::Identifier')) {
                if (scalar(keys %opts)) {
                    $type = $id->type;
                    $id   = $id->id;
                } else {
                    return $id;
                }
            } elsif ($id->isa('URI')) {
                $type = 'uri';
            } elsif ($id->isa('Data::URIID::Result')) {
                $opts{displayname} //= sub { return $from->attribute('displayname', default => undef) };
                $type = $id->id_type;
                $id   = $id->id;
            } elsif ($id->isa('Data::URIID::Base') || $id->isa('Data::URIID::Colour') || $id->isa('Data::URIID::Service')) {
                #$opts{displayname} //= $id->name if $id->isa('Data::URIID::Service');
                $opts{displayname} //= $id->displayname(default => undef, no_defaults => 1);
                $type = 'ise';
                $id   = $id->ise;
            } elsif ($id->isa('Data::TagDB::Tag')) {
                $opts{displayname} //= sub { $from->displayname };
                $type = 'ise';
                $id   = $id->ise;
            } elsif ($id->isa('Business::ISBN')) {
                $type = $well_known{gtin};
                $id   = $id->as_isbn13->as_string([]);
            } else {
                croak 'Unsupported input data';
            }
        } else {
            # If it's not a ref, try as ise.
            $type = 'ise';
        }
    }

    if (!ref($type) && $type eq 'ise') {
        croak 'Undefined identifier but type is ISE' unless defined $id;

        if ($id =~ /^[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}$/) { # allow less normalised form than RE_UUID
            $type = $well_known_uuid;

            # For bootstrap only.
            if (!defined($type) && $id eq '8be115d2-dc2f-4a98-91e1-a6e3075cbc31') {
                $self->{type} = $well_known_uuid = $type = $self;
                $self->{id} = $id;
            }
        } elsif ($id =~ RE_OID) {
            $type = 'oid';
        } elsif ($id =~ RE_URI) {
            $type = 'uri';
        } else {
            croak 'Not a valid ISE identifier';
        }
    }

    unless (ref $type) {
        if ($type =~ /^[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}$/) { # allow less normalised form than RE_UUID
            $type = $pkg->new(uuid => $type);
            $type->register;
        } elsif ($type eq 'wellknown') {
            $self = $well_known{$id};
            croak 'Unknown well-known' unless defined $self;
            return $self;
        } else {
            $type = $well_known{$type};
        }
        croak 'Unknown type name' unless defined $type;
    }

    croak 'Not a valid type' unless $type->isa(__PACKAGE__);

    # we normalise URIs first as they may then normalised again
    if ($type == ($well_known{uri} // 0)) {
        my $uri = $id.''; # force stringification

        if ($uri =~ m#^urn:uuid:([0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})$#) {
            $id = $1;
            $type = $well_known_uuid;
        } elsif ($uri =~ m#^urn:oid:([0-2](?:\.(?:0|[1-9][0-9]*))+)$#) {
            $id = $1;
            $type = $well_known{oid};
        } elsif ($uri =~ m#^https?://www\.wikidata\.org/entity/([QPL][1-9][0-9]*)$#) {
            $id = $1;
            $type = $well_known{wd};
        } elsif ($uri =~ m#^https?://doi\.org/(10\..+)$#) {
            $id = $1;
            $type = $well_known{doi};
        } elsif ($uri =~ m#^https?://uriid\.org/([^/]+)/[^/]+#) {
            my $ptype = $1;
            if (defined($uuid_org_to_uuid{$ptype}) || $ptype =~ RE_UUID) {
                my $u = URI->new($uri);
                my @path_segments = $u->path_segments;
                if (scalar(@path_segments) == 3 && $path_segments[0] eq '') {
                    $type = $pkg->new(uuid => ($uuid_org_to_uuid{$path_segments[1]} // $path_segments[1]));
                    $id = $path_segments[2];
                }
            }
        }
    }

    if ($type == ($well_known_uuid // 0)) {
        $id = lc($id); # normalise
    } elsif ($type == ($well_known{oid} // 0)) {
        if ($id =~ /^2\.25\.([1-9][0-9]*)$/) {
            my $hex = Math::BigInt->new($1)->as_hex;
            $hex =~ s/^0x//;
            $hex = ('0' x (32 - length($hex))) . $hex;
            $hex =~ s/^(.{8})(.{4})(.{4})(.{4})(.{12})$/$1-$2-$3-$4-$5/;
            $type = $well_known_uuid;
            $id = $hex;
        }
    }

    if (defined(my $v = $registered{$type->uuid}{$id})) {
        return $v;
    }


    if (defined $type->{validate}) {
        croak 'Identifier did not validate against type' unless $id =~ $type->{validate};
    }

    $self->{type} = $type;
    $self->{id} = $id;

    foreach my $key (qw(validate namespace generate displayname)) {
        next unless defined $opts{$key};
        $self->{$key} //= $opts{$key};
    }

    foreach my $key (qw(namespace)) {
        if (defined(my $v = $self->{$key})) {
            unless (ref $v) {
                $self->{$key} = $pkg->new(ise => $v);
            }
        }
    }

    return bless $self;
}


#@returns __PACKAGE__
sub random {
    my ($pkg, %opts) = @_;
    my $type = $opts{type} // 'uuid';

    if (ref $type) {
        if ($type == $well_known_uuid) {
            $type = 'uuid';
        } else {
            croak 'Invalid/Unsupported type';
        }
    }

    if ($type ne 'ise' && $type ne 'uuid') {
        croak 'Invalid/Unsupported type';
    }

    my $uuid = Data::Identifier::Generate->_random(%opts{'sources'});
    return $pkg->new(uuid => $uuid, %opts{'displayname'});
}



sub wellknown {
    my ($pkg, @args) = @_;
    return $pkg->known('wellknown', @args);
}


#@returns __PACKAGE__
sub type {
    my ($self) = @_;
    return $self->{type};
}



sub id {
    my ($self) = @_;
    return $self->{id};
}


sub uuid {
    my ($self, %opts) = @_;

    return $self->{id_cache}{WK_UUID()} if !$opts{no_defaults} && defined($self->{id_cache}) && defined($self->{id_cache}{WK_UUID()});

    if ($self->{type} == $well_known_uuid) {
        return $self->{id};
    }

    unless ($opts{no_defaults}) {
        # Try to generate a UUID and recheck cache:
        $self->_generate;
        return $self->{id_cache}{WK_UUID()} if defined($self->{id_cache}) && defined($self->{id_cache}{WK_UUID()});
    }

    return $opts{default} if exists $opts{default};
    croak 'Identifier has no valid UUID';
}

sub oid {
    my ($self, %opts) = @_;
    my $type = $well_known{oid};

    return $self->{id_cache}{WK_OID()} if !$opts{no_defaults} && defined($self->{id_cache}) && defined($self->{id_cache}{WK_OID()});

    if ($self->{type} == $type) {
        return $self->{id};
    }

    unless ($opts{no_defaults}) {
        if (defined(my $uuid = $self->uuid(default => undef))) {
            return $self->{id_cache}{WK_OID()} = sprintf('2.25.%s', Math::BigInt->new('0x'.$uuid =~ tr/-//dr));
        }
    }

    return $opts{default} if exists $opts{default};
    croak 'Identifier has no valid OID';
}

sub uri {
    my ($self, %opts) = @_;
    my $type = $well_known{uri};

    return $self->{id_cache}{WK_URI()} if !$opts{no_defaults} && defined($self->{id_cache}) && defined($self->{id_cache}{WK_URI()});

    if ($self->{type} == $type) {
        return $self->{id};
    }

    unless ($opts{no_defaults}) {
        if ($self->{type} == $well_known{wd}) {
            return $self->{id_cache}{WK_URI()} = sprintf('http://www.wikidata.org/entity/%s', $self->{id});
        } elsif ($self->{type} == $well_known{doi}) {
            return $self->{id_cache}{WK_URI()} = sprintf('https://doi.org/%s', $self->{id});
        } elsif (defined(my $uuid = $self->uuid(default => undef))) {
            return $self->{id_cache}{WK_URI()} = sprintf('urn:uuid:%s', $uuid);
        } elsif ($enabled_oid && defined(my $oid = $self->oid(default => undef))) {
            return $self->{id_cache}{WK_URI()} = sprintf('urn:oid:%s', $oid);
        } else {
            my $u = URI->new("https://uriid.org/");
            my $type_uuid = $self->{type}->uuid;
            $u->path_segments('', $uuid_to_uriid_org{$type_uuid} // $type_uuid, $self->{id});
            return $self->{id_cache}{WK_URI()} = $u;
        }
    }

    return $opts{default} if exists $opts{default};
    croak 'Identifier has no valid URI';
}

sub sid {
    my ($self, %opts) = @_;
    my $type = $well_known{sid};
    return $self->{id_cache}{WK_SID()} if defined($self->{id_cache}) && defined($self->{id_cache}{WK_SID()});
    if ($self->{type} == $type) {
        return $self->{id};
    }

    return $opts{default} if exists $opts{default};
    croak 'Identifier has no valid SID';
}



sub ise {
    my ($self, %opts) = @_;
    my $type = $self->{type};
    my $have_default = exists $opts{default};
    my $default = delete $opts{default};
    my $value;

    if ($type == $well_known{uuid} || $type == $well_known{oid} || $type == $well_known{uri}) {
        $value = $self->{id};
    } else {
        $opts{default} = undef;
        $value = $self->uuid(%opts) // $self->oid(%opts) // $self->uri(%opts);
    }

    return $value if defined $value;
    return $default if $have_default;
    croak 'Identifier has no valid ISE';
}


sub as {
    my ($self, $as, %opts) = @_;

    $as = $opts{rawtype} if $as eq 'raw' && defined($opts{rawtype});

    return $self if ($as =~ /^[A-Z]/ || $as =~ /::/) && eval {$self->isa($as)};

    $self = __PACKAGE__->new(from => $self) unless eval {$self->isa(__PACKAGE__)};

    if ($as eq 'uuid' || $as eq 'oid' || $as eq 'uri' || $as eq 'sid' || $as eq 'ise') {
        my $func = $self->can($as);
        return $self->$func(%opts);
    } elsif ($as eq __PACKAGE__) {
        return $self;
    } elsif ($as eq 'URI') {
        my $had_default = exists $opts{default};
        my $default = delete $opts{default};
        my $val = $self->uri(%opts, default => undef);

        return URI->new($val) if defined $val;
        if ($had_default) {
            return $default if ref $default;
            return URI->new($default);
        }
        croak 'No value for URI';
    } elsif ($as eq 'Data::URIID::Result' && defined($opts{extractor})) {
        return $opts{extractor}->lookup($self->type->uuid => $self->id);
    } elsif ($as eq 'Data::URIID::Service' && defined($opts{extractor})) {
        return $opts{extractor}->service($self->uuid);
    } elsif ($as eq 'Data::TagDB::Tag' && defined($opts{db})) {
        if ($opts{autocreate}) {
            return $opts{db}->create_tag($self);
        } else {
            return $opts{db}->tag_by_id($self);
        }
    } elsif ($as eq 'Business::ISBN' && $self->type->eq('gtin')) {
        require Business::ISBN;
        my $val = Business::ISBN->new($self->id);
        return $val if defined($val) && $val->is_valid;
    }

    return $opts{default} if exists $opts{default};
    croak 'Unknown/Unsupported as: '.$as;
}


sub eq {
    my ($self, $other) = @_;

    foreach my $e ($self, $other) {
        if (defined($e) && !scalar(eval {$e->isa(__PACKAGE__)})) {
            if (defined $well_known{$e}) {
                $e = $well_known{$e}
            } else {
                $e = Data::Identifier->new(from => $e);
            }
        }
    }

    if (defined($self)) {
        return undef unless defined $other;
        return 1 if $self == $other;
        return undef unless $self->type->eq($other->type);
        return $self->id eq $other->id;
    } else {
        return !defined($other);
    }
}


sub cmp {
    my ($self, $other) = @_;

    foreach my $e ($self, $other) {
        if (defined($e) && !scalar(eval {$e->isa(__PACKAGE__)})) {
            if (defined $well_known{$e}) {
                $e = $well_known{$e}
            } else {
                $e = Data::Identifier->new(from => $e);
            }
        }
    }

    if (defined($self)) {
        return undef unless defined $other;
        return 0 if $self == $other;
        if ((my $r = $self->type->cmp($other->type)) != 0) {
            return $r;
        }

        {
            my $self_id = $self->id;
            my $other_id = $other->id;

            if ((my ($sa, $sb) = $self_id =~ /^([^0-9]*)([0-9]+)$/) && (my ($oa, $ob) = $other_id =~ /^([^0-9]*)([0-9]+)$/)) {
                my $r = $sa cmp $oa;
                return $r if $r;
                return $sb <=> $ob;
            }

            return $self_id cmp $other_id;
        }
    } else {
        return !defined($other);
    }
}


#@returns __PACKAGE__
sub namespace {
    my ($self) = @_;
    return $self->{namespace} // croak 'No namespace';
}


sub register {
    my ($self) = @_;
    $registered{$self->{type}->uuid}{$self->{id}} = $self;

    foreach my $type_name (qw(uuid oid uri sid)) {
        my $f = $self->can($type_name) || next;
        my $v = $self->$f(default => undef) // next;
        $registered{$well_known{$type_name}->uuid}{$v} = $self;
    }
}


sub userdata {
    my ($self, $package, $key, $value) = @_;
    $self->{userdata} //= {};
    $self->{userdata}{$package} //= {};
    return $self->{userdata}{$package}{$key} = $value // $self->{userdata}{$package}{$key};
}


sub displayname {
    my ($self, %opts) = @_;

    if (defined(my $displayname = $self->{displayname})) {
        $displayname = $self->$displayname() if ref $displayname;

        # recheck and return as any of the above conversions could result in $displayname becoming invalid.
        return $displayname if defined($displayname) && length($displayname);
    }

    return $self->id.'' unless $opts{no_defaults}; # force stringification.
    return $opts{default} if exists $opts{default};
    croak 'No value for displayname';
}


sub displaycolour { my ($self, %opts) = @_; return $opts{default}; }
sub icontext { my ($self, %opts) = @_; return $opts{default}; }
sub description { my ($self, %opts) = @_; return $opts{default}; }

# ---- Private helpers ----

sub import {
    my ($pkg, $opts) = @_;
    return unless defined $opts;
    croak 'Bad options' unless ref($opts) eq 'HASH';

    if (defined(my $disable = $opts->{disable})) {
        $disable = [split /\s*,\s*/, $disable] unless ref $disable;
        foreach my $to_disable (@{$disable}) {
            if ($to_disable eq 'oid') {
                $enabled_oid = undef;
                undef *oid;
            } else {
                croak 'Unknown feature: '.$to_disable;
            }
        }
    }
}

sub _generate {
    my ($self) = @_;
    unless (exists $self->{_generate}) {
        my __PACKAGE__ $type = $self->type;

        if (defined(my $generate = $type->{generate})) {
            unless (ref $generate) {
                $self->{generate} = $generate = {style => $generate};
            }

            $self->{id_cache} //= {};

            if (defined(my __PACKAGE__ $ns = eval {$type->namespace->uuid})) {
                my $style = $generate->{style};
                my $input;

                if ($style eq 'id-based') {
                    $input = lc($self->id);
                } else {
                    croak 'Unsupported generator style';
                }

                if (defined $input) {
                    $self->{id_cache}{WK_UUID()} = Data::Identifier::Generate->_uuid_v5($ns, $input);
                }
            }
        }
    }
    $self->{_generate} = undef;
}

sub _known_provider {
    my ($pkg, $class, %opts) = @_;
    croak 'Unsupported options passed' if scalar(keys %opts);

    if ($class eq 'wellknown') {
        state $wellknown = do {
            my %hash = map{$_ => $_} values(%well_known), map {values %{$_}} values(%registered);
            [values %hash];
        };

        return ($wellknown, rawtype => __PACKAGE__);
    } elsif ($class eq 'registered' || $class eq ':all') {
        my %hash = map{$_ => $_} values(%well_known), map {values %{$_}} values(%registered);
        return ([values %hash], rawtype => __PACKAGE__);
    }

    croak 'Unsupported class';
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Data::Identifier - format independent identifier object

=head1 VERSION

version v0.09

=head1 SYNOPSIS

    use Data::Identifier;
    # or:
    use Data::Identifier {option => value, ...};

    my Data::Identifier $id = Data::Identifier->new(uuid => 'ddd60c5c-2934-404f-8f2d-fcb4da88b633');
    my Data::Identifier $id = Data::Identifier->new(oid => '2.1.0.1.0');

    my Data::Identifier $custom_type = Data::Identifier->new(uuid => ...);
    my Data::Identifier $id = Data::Identifier->new($custom_type => '123abc');

    my Data::Identifier $type = $id->type;
    my $raw = $id->id;

    my $uuid = $id->uuid;
    my $oid  = $id->oid;
    my $uri  = $id->uri;
    my $ise  = $id->ise;

This module provides an common interface to identifiers of different types.
Each identifier stores both it's raw value (called C<id>) and it's type (C<type>).

B<Note:> Validation on the raw identifier value may or may not be performed depending on the type.
The level of validation this module can do is limited by it's knowledge about the type as well
as performance aspects. This module may therefore reject invalid values. But it is not safe to assume
that it will reject all invalid values.

B<Note:> This module performs basic deduplication and normalisation. This means that you
might not always get back exactly the identifier you passed in but an equivalent one.
Also note that deduplication is done with performance in mind. This means that there is no
guarantee for two equal identifiers to become deduplicated. See also L</register>.

This package inherits from L<Data::Identifier::Interface::Known>.

=head2 OPTIONS

The following options are supported. Some are marked as experimental.

=head3 disable

B<Note:>
This is an B<experimental> option. It may be changed, renamed, or removed without notice.

This option allows to disable a feature.
This is a global setting, and therefore should only be used at the top level code.

In order for this to be most effective this should be used in the top level code
before any other module is C<use>-ed or C<require>-ed that makes use of this module.

This setting takes an arrayref of strings or a single string that is a comma separated list.

Currently the following features can be disabled:

=over

=item C<oid>

Support for OID. This removes the L</oid> function from this package,
removes internal OID based caches, and parts of the OID detection and normalisation logic.

However this improves the speed of L</register> and some others significantly.

This feature should only be disabled if you're sure you will not use OIDs in your code.

=back

=head1 METHODS

=head2 new

    my Data::Identifier $identifier = Data::Identifier->new($type => $id, %opts);

Creates a new identifier.

C<$type> needs to be a L<Data::Identifier>, a well known name, a UUID, C<wellknown>, C<ise>, or C<from>.

If it is an UUID a type is created as needed.

If it is C<ise> it is parsed as C<uuid>, C<oid>, or C<uri> according to it's format.

If it is C<wellknown> it refers to an identifier from the well known list.

If it is C<from> then C<$id> should refer to an object of some kind that should be converted to identifier.
In this case not all options might be supported. Currently it is possible to convert from:
L<Data::Identifier>,
L<Data::URIID::Colour>, L<Data::URIID::Service>, L<Data::URIID::Result>,
L<Data::TagDB::Tag>,
and L<Business::ISBN>. If C<$id> is not a reference it is parsed as with C<ise>.

The following type names are currently well known:

=over

=item C<uuid>

An UUID.

=item C<oid>

An OID.

=item C<uri>

An URI.

=item C<wd>

An wikidata identifier (Q, P, or L).

=item C<fc>

An FactGrid identifier (Q, P, or L).

=item C<gtin>

An GTIN (or EAN).

=item C<iban>

An IBAN (International Bank Account Number).

=item C<bic>

A BIC (Business Identifier Code).

=item C<doi>

A doi (digital object identifier).

=back

The following options are supported:

=over

=item C<validate>

A regex that should be used to validate identifiers if this identifier is used as a type.

=item C<namespace>

The namespace used by a type. Must be a L<Data::Identifier> or an ISE. Must also resolve to an UUID.

=item C<displayname>

The name as to be returned by L</displayname>.
Must be a scalar string value or a code reference that returns a scalar string.
If it is a code reference the identifier object is passed as C<$_[0]>.

=item C<extractor>

An instance of L<Data::URIID>. This option is currently ignored.

=item C<db>

An instance of L<Data::TagDB>. This option is currently ignored.

=back

=head2 random

    my Data::Identifier $identifier = Data::Identifier->random([ %opts ]);

Generate a new random identifier.
This method will C<die> on error.

The following options (all optional) are supported:

=over

=item C<displayname>

The same as the option of the same name in L</new>. See also L</displayname>.

=item C<sources>

The backend data sources to use for generation of the identifier. This is an array reference
with the names of the modules in order of preference (most preferred first).

Defaults to a list of high quality sources.

Currently supported are at least:
L<Crypt::URandom>,
L<UUID4::Tiny>,
L<Math::Random::Secure>,
L<UUID::URandom>,
L<UUID::Tiny::Patch::UseMRS>,
L<UUID::Tiny> (low quality).

=item C<type>

The type to generate a random identifier in.
A L<Data::Identifier> or one of the special values C<uuid> or C<ise>.

=back

=head2 known

    my @list = Data::Identifier->known($class [, %opts ] );

This module implements L<Data::Identifier::Interface::Known/known>. See there for details.

Supported classes:

=over

=item C<wellknown>

Returns a list with all well known identifiers.

This is useful to prime a database.

=item C<registered>

Returns the list of all currently registered identifiers.

=item C<:all>

Returns the list of all currently known identifiers.

=back

=head2 wellknown

    my @wellknown = Data::Identifier->wellknown(%opts);

This is an alias for:

    my @wellknown = Data::Identifier->known('wellknown', %opts);

See also L</known>.

=head2 type

    my Data::Identifier $type = $identifier->type;

Returns the type of the identifier.

=head2 id

    my $id = $identifier->id;

Returns the raw id of the identifier.

=head2 uuid, oid, uri, sid

    my $uuid = $identifier->uuid( [ %opts ] );
    my $oid  = $identifier->oid( [ %opts ] );
    my $uri  = $identifier->uri( [ %opts ] );
    my $sid  = $identifier->sid( [ %opts ] );

Return the UUID, OID, URI, or SID (small-identifier) of the current identifier or die if no identifier of that type is known nor can be calculated.

The following options (all optional) are supported:

=over

=item C<default>

The default value to return if no other value is available.
This can be set to C<undef> to change the method from C<die>ing in failture to returning C<undef>.

=item C<no_defaults>

If set true do not try to generate a matching identifier.
Note: This does not apply to C<sid()> as small-identifiers cannot be generated. For C<sid()> the option is ignored.

=back

=head2 ise

    my $ise = $identifier->ise( [ %opts ] );

Returns the ISE (UUID, OID, or URI) for the current identifier or die if no ISE is known nor can be calculated.

Supports all options also supported by L</uuid>, L</oid>, and L</uri>.

=head2 as

    my $res = $identifier->as($as, %opts);
    # or:
    my $res = $identifier->Data::Identifier::as($as, %opts); # $identifier is an alien type

This method converts the given identifier to another type of object.

C<$as> must be a name of the package (containing C<::> or starting with an uppercase letter),
or one of the special values.

Currently the following packages are supported:
L<URI>,
L<Data::Identifier>,
L<Data::URIID::Result>,
L<Data::URIID::Service>,
L<Data::TagDB::Tag>,
L<Business::ISBN>.
Other packages might be supported. Packages need to be installed in order to be supported.
Also some packages need special options to be passed to be available.

The folliwng special values are supported:
C<uuid>, C<oid>, C<uri>, C<sid>, C<ise>, and C<raw>.
All but C<raw> are aliases to the corresponding functions.
C<raw> is an alias for the type set with the C<rawtype> option (see below).

If C<$identifier> is or may not be an L<Data::Identifier> this method can be called like
C<$identifier-E<gt>Data::Identifier::as($as...)>.
In that case C<$identifier> is parsed as with C<from> in L</new>.

If C<$identifier> is a C<$as> (see also C<rawtype> below) then C<$identifier> is returned as-is,
even if C<$as> would not be supported otherwise.

The following options (all optional) are supported:

=over

=item C<autocreate>

If the requested type refers to some permanent storage and the object does not exist for
the given identifier whether to create a new object or not.

Defaults to false.

=item C<db>

An instance of L<Data::TagDB>. This is used to create instances of related packages.

=item C<default>

Same as in L</uuid>.

=item C<no_defaults>

Same as in L</uuid>.

=item C<rawtype>

If C<$as> is given as C<raw> then this value is used for C<$as>.
This can be used to ease implementation of other methods that are required to accept C<raw>.

=item C<extractor>

An instance of L<Data::URIID>. This is used to create instances of related packages
such as L<Data::URIID::Result>.

=back

=head2 eq

    my $bool = $identifier->eq($other); # $identifier must be non-undef
    # or:
    my $bool = Data::Identifier::eq($identifier, $other); # $identifier can be undef

Compares two identifiers to be equal.

If both identifiers are C<undef> they are considered equal.

If C<$identifier> or C<$other> is not an instance of L<Data::Identifier> or C<undef>
then it is checked against the list of well known identifiers (see L</new>).
If it has still no match L</new> with the virtual type C<from> is used.

=head2 cmp

    my $val = $identifier->cmp($other); # $identifier must be non-undef
    # or:
    my $val = Data::Identifier::cmp($identifier, $other); # $identifier can be undef

Compares the identifiers similar to C<cmp>. This method can be used to order identifiers.
To check for them to be equal see L</eq>.

The parameters are parsed the same way as L</eq>.

If this method is used for sorting the exact resulting order is not defined. However:

=over

=item *

The order is stable

=item *

Identifiers are ordered by type first

=item *

If the all identifiers have the same type this method tries to be smart about ordering
(ordering numeric values correctly).

=item *

The order is the same for C<$a-E<gt>cmp($b)> as for C<- $b-E<gt>cmp($a)>.

=back

=head2 namespace

    my Data::Identifier $namespace = $identifier->namespace;

Gets the namespace for the type C<$identifier> or dies.
This call is only valid for identifiers that are types.

=head2 register

    $identifier->register;

Registers the identifier for deduplication.
This can be used to register much used identifiers and types
early in an application to increase performance.
However, note that once registered an identifier object is cached for
the life time of the process.

=head2 userdata

    my $value = $identifier->userdata(__PACKAGE__, $key);
    $identifier->userdata(__PACKAGE__, $key => $value);

Get or set user data to be used with this identifier. The data is stored using the given C<$key>.
The package of the caller is given to provide namespaces for the userdata, so two independent packages
can use the same C<$key>.

The meaning of C<$key>, and C<$value> is up to C<__PACKAGE__>.

=head2 displayname

    my $displayname = $identifier->displayname( [ %opts ] );

Returns a display name suitable to display to the user. This function always returns a string.
This is mostly for compatibility with L<Data::TagDB::Tag>.

The following options (all optional) are supported:

=over

=item C<default>

The default value to return if no other value is available.
This can be set to C<undef> to let this method return undef (not die).

=item C<no_defaults>

If set true do not try to use any identifier or other fallback as displayname.

=back

=head2 displaycolour, icontext, description

    my $displaycolour = $identifier->displaycolour( [ %opts ] );
    my $icontext      = $identifier->icontext( [ %opts ] );
    my $description   = $identifier->description( [ %opts ] );

These functions always return C<undef>. They are for compatibility with L<Data::TagDB::Tag>.

The following options (all optional) are supported:

=over

=item C<default>

The default value to return if no other value is available (which is always the case).
This is for compatibility with L</displayname> and implementations of other packages.

=item C<no_defaults>

This option is accepted but ignored.
This is for compatibility with L</displayname> and implementations of other packages.

=back

=head1 AUTHOR

Löwenfelsen UG (haftungsbeschränkt) <support@loewenfelsen.net>

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2023-2025 by Löwenfelsen UG (haftungsbeschränkt) <support@loewenfelsen.net>.

This is free software, licensed under:

  The Artistic License 2.0 (GPL Compatible)

=cut