The Perl Toolchain Summit 2025 Needs You: You can help 🙏 Learn more

#!/usr/bin/perl -w
# Copyright (c) 2023-2025 Löwenfelsen UG (haftungsbeschränkt)
# licensed under Artistic License 2.0 (see LICENSE file)
# ABSTRACT: format independent identifier object
use v5.20;
use strict;
use Carp;
use Math::BigInt lib => 'GMP';
use URI;
our $VERSION = v0.09;
use constant {
RE_UUID => qr/^[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}$/,
RE_OID => qr/^[0-2](?:\.(?:0|[1-9][0-9]*))+$/,
RE_URI => qr/^[a-zA-Z][a-zA-Z0-9\+\.\-]+/,
RE_UINT => qr/^(?:0|[1-9][0-9]*)$/,
RE_QID => qr/^[QPL][1-9][0-9]*$/,
RE_DOI => qr/^10\.[1-9][0-9]+(?:\.[0-9]+)*\/./,
RE_UNICODE => qr/^U\+([0-9A-F]{4,7})$/,
};
use constant {
WK_UUID => '8be115d2-dc2f-4a98-91e1-a6e3075cbc31', # uuid
WK_OID => 'd08dc905-bbf6-4183-b219-67723c3c8374', # oid
WK_URI => 'a8d1637d-af19-49e9-9ef8-6bc1fbcf6439', # uri
WK_SID => 'f87a38cb-fd13-4e15-866c-e49901adbec5', # small-identifier
WK_WD => 'ce7aae1e-a210-4214-926a-0ebca56d77e3', # wikidata-identifier
WK_GTIN => '82d529be-0f00-4b4f-a43f-4a22de5f5312', # gtin
WK_IBAN => 'b1418262-6bc9-459c-b4b0-a054d77db0ea', # iban
WK_BIC => 'c8a3a132-f160-473c-b5f3-26a748f37e62', # bic
WK_DOI => '931f155e-5a24-499b-9fbb-ed4efefe27fe', # doi
WK_FC => 'd576b9d1-47d4-43ae-b7ec-bbea1fe009ba', # factgrid-identifier
WK_UNICODE_CP => '5f167223-cc9c-4b2f-9928-9fe1b253b560', # unicode-code-point
NS_WD => '9e10aca7-4a99-43ac-9368-6cbfa43636df', # Wikidata-namespace
NS_FC => '6491f7a9-0b29-4ef1-992c-3681cea18182', # factgrid-namespace
NS_INT => '5dd8ddbb-13a8-4d6c-9264-36e6dd6f9c99', # integer-namespace
NS_DATE => 'fc43fbba-b959-4882-b4c8-90a288b7d416', # gregorian-date-namespace
NS_UNICODE_CP => '132aa723-a373-48bf-a88d-69f1e00f00cf', # 'unicode-character-namespace'
};
# Features:
my $enabled_oid = 1;
my %uuid_to_uriid_org = (
WK_UUID() => 'uuid',
WK_OID() => 'oid',
WK_URI() => 'uri',
WK_SID() => 'sid',
WK_GTIN() => 'gtin',
WK_WD() => 'wikidata-identifier',
);
my %uuid_org_to_uuid = map {$uuid_to_uriid_org{$_} => $_} keys %uuid_to_uriid_org;
my $well_known_uuid = __PACKAGE__->new(ise => WK_UUID, validate => RE_UUID);
my %well_known = (
uuid => $well_known_uuid,
oid => __PACKAGE__->new($well_known_uuid => WK_OID, validate => RE_OID),
uri => __PACKAGE__->new($well_known_uuid => WK_URI, validate => RE_URI),
sid => __PACKAGE__->new($well_known_uuid => WK_SID, validate => RE_UINT),
wd => __PACKAGE__->new($well_known_uuid => WK_WD, validate => RE_QID, namespace => NS_WD, generate => 'id-based'),
fc => __PACKAGE__->new($well_known_uuid => WK_FC, validate => RE_QID, namespace => NS_FC, generate => 'id-based'),
gtin => __PACKAGE__->new($well_known_uuid => WK_GTIN, validate => RE_UINT),
iban => __PACKAGE__->new($well_known_uuid => WK_IBAN),
bic => __PACKAGE__->new($well_known_uuid => WK_BIC),
doi => __PACKAGE__->new($well_known_uuid => WK_DOI, validate => RE_DOI),
# Unofficial, not part of public API:
unicodecp => __PACKAGE__->new($well_known_uuid => WK_UNICODE_CP, validate => RE_UNICODE, namespace => NS_UNICODE_CP, generate => 'id-based'),
);
my %registered;
$_->register foreach values %well_known;
# Refill with sids:
{
my %wk_sids = (
'ddd60c5c-2934-404f-8f2d-fcb4da88b633' => 1, # also-shares-identifier
WK_UUID() => 2,
'bfae7574-3dae-425d-89b1-9c087c140c23' => 3, # tagname
'7f265548-81dc-4280-9550-1bd0aa4bf748' => 4, # has-type
WK_URI() => 5,
WK_OID() => 6,
# Unassigned: 7
'd0a4c6e2-ce2f-4d4c-b079-60065ac681f1' => 8, # language-tag-identifier
WK_WD() => 9,
'923b43ae-a50e-4db3-8655-ed931d0dd6d4' => 10, # specialises
'eacbf914-52cf-4192-a42c-8ecd27c85ee1' => 11, # unicode-string
'928d02b0-7143-4ec9-b5ac-9554f02d3fb1' => 12, # integer
'dea3782c-6bcb-4ce9-8a39-f8dab399d75d' => 13, # unsigned-integer
# Unassigned: 14, 15
'6ba648c2-3657-47c2-8541-9b73c3a9b2b4' => 16, # default-context
'52a516d0-25d8-47c7-a6ba-80983e576c54' => 17, # proto-file
'1cd4a6c6-0d7c-48d1-81e7-4e8d41fdb45d' => 18, # final-file-size
'6085f87e-4797-4bb2-b23d-85ff7edc1da0' => 19, # text-fragment
'4c9656eb-c130-42b7-9348-a1fee3f42050' => 20, # also-list-contains-also
'298ef373-9731-491d-824d-b2836250e865' => 21, # proto-message
'7be4d8c7-6a75-44cc-94f7-c87433307b26' => 22, # proto-entity
'65bb36f2-b558-48af-8512-bca9150cca85' => 23, # proxy-type
'a1c478b5-0a85-4b5b-96da-d250db14a67c' => 24, # flagged-as
'59cfe520-ba32-48cc-b654-74f7a05779db' => 25, # marked-as
'2bffc55d-7380-454e-bd53-c5acd525d692' => 26, # roaraudio-error-number
WK_SID() => 27,
'd2750351-aed7-4ade-aa80-c32436cc6030' => 28, # also-has-role
# Unassigned: 29, 30, 31
'448c50a8-c847-4bc7-856e-0db5fea8f23b' => 32, # final-file-encoding
'79385945-0963-44aa-880a-bca4a42e9002' => 33, # final-file-hash
'3fde5688-6e34-45e9-8f33-68f079b152c8' => 34, # SEEK_SET
'bc598c52-642e-465b-b079-e9253cd6f190' => 35, # SEEK_CUR
'06aff30f-70e8-48b4-8b20-9194d22fc460' => 36, # SEEK_END
'59a5691a-6a19-4051-bc26-8db82c019df3' => 37, # inode
'2c7e15ed-aa2f-4e2f-9a1d-64df0c85875a' => 112, # chat-0-word-identifier
WK_GTIN() => 160,
);
foreach my $ise (keys %wk_sids) {
my $identifier = __PACKAGE__->new(ise => $ise);
$identifier->{id_cache} //= {};
$identifier->{id_cache}->{WK_SID()} //= $wk_sids{$ise};
$identifier->register; # re-register
}
}
# Some extra tags such as namespaces:
foreach my $ise (NS_WD, NS_INT, NS_DATE) {
my $identifier = __PACKAGE__->new(ise => $ise);
$identifier->register; # re-register
}
# Refill with displaynames
{
my %displaynames = (
WK_UUID() => 'uuid',
WK_OID() => 'oid',
WK_URI() => 'uri',
WK_SID() => 'small-identifier',
WK_WD() => 'wikidata-identifier',
WK_GTIN() => 'gtin',
WK_IBAN() => 'iban',
WK_BIC() => 'bic',
WK_DOI() => 'doi',
WK_FC() => 'factgrid-identifier',
WK_UNICODE_CP() => 'unicode-code-point',
NS_WD() => 'Wikidata-namespace',
NS_FC() => 'factgrid-namespace',
NS_INT() => 'integer-namespace',
NS_DATE() => 'gregorian-date-namespace',
NS_UNICODE_CP() => 'unicode-character-namespace',
'ddd60c5c-2934-404f-8f2d-fcb4da88b633' => 'also-shares-identifier',
'bfae7574-3dae-425d-89b1-9c087c140c23' => 'tagname',
'7f265548-81dc-4280-9550-1bd0aa4bf748' => 'has-type',
'd0a4c6e2-ce2f-4d4c-b079-60065ac681f1' => 'language-tag-identifier',
'923b43ae-a50e-4db3-8655-ed931d0dd6d4' => 'specialises',
'eacbf914-52cf-4192-a42c-8ecd27c85ee1' => 'unicode-string',
'928d02b0-7143-4ec9-b5ac-9554f02d3fb1' => 'integer',
'dea3782c-6bcb-4ce9-8a39-f8dab399d75d' => 'unsigned-integer',
'6ba648c2-3657-47c2-8541-9b73c3a9b2b4' => 'default-context',
'52a516d0-25d8-47c7-a6ba-80983e576c54' => 'proto-file',
'1cd4a6c6-0d7c-48d1-81e7-4e8d41fdb45d' => 'final-file-size',
'6085f87e-4797-4bb2-b23d-85ff7edc1da0' => 'text-fragment',
'4c9656eb-c130-42b7-9348-a1fee3f42050' => 'also-list-contains-also',
'298ef373-9731-491d-824d-b2836250e865' => 'proto-message',
'7be4d8c7-6a75-44cc-94f7-c87433307b26' => 'proto-entity',
'65bb36f2-b558-48af-8512-bca9150cca85' => 'proxy-type',
'a1c478b5-0a85-4b5b-96da-d250db14a67c' => 'flagged-as',
'59cfe520-ba32-48cc-b654-74f7a05779db' => 'marked-as',
'2bffc55d-7380-454e-bd53-c5acd525d692' => 'roaraudio-error-number',
'd2750351-aed7-4ade-aa80-c32436cc6030' => 'also-has-role',
'448c50a8-c847-4bc7-856e-0db5fea8f23b' => 'final-file-encoding',
'79385945-0963-44aa-880a-bca4a42e9002' => 'final-file-hash',
'3fde5688-6e34-45e9-8f33-68f079b152c8' => 'SEEK_SET',
'bc598c52-642e-465b-b079-e9253cd6f190' => 'SEEK_CUR',
'06aff30f-70e8-48b4-8b20-9194d22fc460' => 'SEEK_END',
'59a5691a-6a19-4051-bc26-8db82c019df3' => 'inode',
'2c7e15ed-aa2f-4e2f-9a1d-64df0c85875a' => 'chat-0-word-identifier',
);
foreach my $ise (keys %displaynames) {
my $identifier = __PACKAGE__->new(ise => $ise);
$identifier->{displayname} //= $displaynames{$ise};
$identifier->register; # re-register
}
}
{
# ISE -> namespace
my %namespaces_uint = (
'2bffc55d-7380-454e-bd53-c5acd525d692' => '744eaf4e-ae93-44d8-9ab5-744105222da6', # roaraudio-error-number: roaraudio-error-namespace
'4a7fc2e2-854b-42ec-b24f-c7fece371865' => 'ac59062c-6ba2-44de-9f54-09e28f2c0b5c', # e621-post-identifier: e621-post-namespace
'a0a4fae2-be6f-4a51-8326-6110ba845a16' => '69b7ff38-ca78-43a8-b9ea-66cb02312eef', # e621-pool-identifier: e621-pool-namespace
);
foreach my $ise (keys %namespaces_uint) {
my $identifier = __PACKAGE__->new(ise => $ise);
$identifier->{namespace} //= __PACKAGE__->new(ise => $namespaces_uint{$ise});
$identifier->{validate} //= RE_UINT;
$identifier->{generate} //= 'id-based';
$identifier->register; # re-register
}
# validate => RE_QID, namespace => NS_FC, generate => 'id-based'
}
# Call this after after we loaded all our stuff and before anyone else will register stuff:
__PACKAGE__->_known_provider('wellknown');
sub new {
my ($pkg, $type, $id, %opts) = @_;
my $self = bless {};
if (!ref($type) && $type eq 'from') {
if (ref($id)) {
my $from = $id;
if ($id->isa('Data::Identifier')) {
if (scalar(keys %opts)) {
$type = $id->type;
$id = $id->id;
} else {
return $id;
}
} elsif ($id->isa('URI')) {
$type = 'uri';
} elsif ($id->isa('Data::URIID::Result')) {
$opts{displayname} //= sub { return $from->attribute('displayname', default => undef) };
$type = $id->id_type;
$id = $id->id;
} elsif ($id->isa('Data::URIID::Base') || $id->isa('Data::URIID::Colour') || $id->isa('Data::URIID::Service')) {
#$opts{displayname} //= $id->name if $id->isa('Data::URIID::Service');
$opts{displayname} //= $id->displayname(default => undef, no_defaults => 1);
$type = 'ise';
$id = $id->ise;
} elsif ($id->isa('Data::TagDB::Tag')) {
$opts{displayname} //= sub { $from->displayname };
$type = 'ise';
$id = $id->ise;
} elsif ($id->isa('Business::ISBN')) {
$type = $well_known{gtin};
$id = $id->as_isbn13->as_string([]);
} else {
croak 'Unsupported input data';
}
} else {
# If it's not a ref, try as ise.
$type = 'ise';
}
}
if (!ref($type) && $type eq 'ise') {
croak 'Undefined identifier but type is ISE' unless defined $id;
if ($id =~ /^[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}$/) { # allow less normalised form than RE_UUID
$type = $well_known_uuid;
# For bootstrap only.
if (!defined($type) && $id eq '8be115d2-dc2f-4a98-91e1-a6e3075cbc31') {
$self->{type} = $well_known_uuid = $type = $self;
$self->{id} = $id;
}
} elsif ($id =~ RE_OID) {
$type = 'oid';
} elsif ($id =~ RE_URI) {
$type = 'uri';
} else {
croak 'Not a valid ISE identifier';
}
}
unless (ref $type) {
if ($type =~ /^[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}$/) { # allow less normalised form than RE_UUID
$type = $pkg->new(uuid => $type);
$type->register;
} elsif ($type eq 'wellknown') {
$self = $well_known{$id};
croak 'Unknown well-known' unless defined $self;
return $self;
} else {
$type = $well_known{$type};
}
croak 'Unknown type name' unless defined $type;
}
croak 'Not a valid type' unless $type->isa(__PACKAGE__);
# we normalise URIs first as they may then normalised again
if ($type == ($well_known{uri} // 0)) {
my $uri = $id.''; # force stringification
if ($uri =~ m#^urn:uuid:([0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})$#) {
$id = $1;
$type = $well_known_uuid;
} elsif ($uri =~ m#^urn:oid:([0-2](?:\.(?:0|[1-9][0-9]*))+)$#) {
$id = $1;
$type = $well_known{oid};
} elsif ($uri =~ m#^https?://www\.wikidata\.org/entity/([QPL][1-9][0-9]*)$#) {
$id = $1;
$type = $well_known{wd};
} elsif ($uri =~ m#^https?://doi\.org/(10\..+)$#) {
$id = $1;
$type = $well_known{doi};
} elsif ($uri =~ m#^https?://uriid\.org/([^/]+)/[^/]+#) {
my $ptype = $1;
if (defined($uuid_org_to_uuid{$ptype}) || $ptype =~ RE_UUID) {
my $u = URI->new($uri);
my @path_segments = $u->path_segments;
if (scalar(@path_segments) == 3 && $path_segments[0] eq '') {
$type = $pkg->new(uuid => ($uuid_org_to_uuid{$path_segments[1]} // $path_segments[1]));
$id = $path_segments[2];
}
}
}
}
if ($type == ($well_known_uuid // 0)) {
$id = lc($id); # normalise
} elsif ($type == ($well_known{oid} // 0)) {
if ($id =~ /^2\.25\.([1-9][0-9]*)$/) {
my $hex = Math::BigInt->new($1)->as_hex;
$hex =~ s/^0x//;
$hex = ('0' x (32 - length($hex))) . $hex;
$hex =~ s/^(.{8})(.{4})(.{4})(.{4})(.{12})$/$1-$2-$3-$4-$5/;
$type = $well_known_uuid;
$id = $hex;
}
}
if (defined(my $v = $registered{$type->uuid}{$id})) {
return $v;
}
if (defined $type->{validate}) {
croak 'Identifier did not validate against type' unless $id =~ $type->{validate};
}
$self->{type} = $type;
$self->{id} = $id;
foreach my $key (qw(validate namespace generate displayname)) {
next unless defined $opts{$key};
$self->{$key} //= $opts{$key};
}
foreach my $key (qw(namespace)) {
if (defined(my $v = $self->{$key})) {
unless (ref $v) {
$self->{$key} = $pkg->new(ise => $v);
}
}
}
return bless $self;
}
#@returns __PACKAGE__
sub random {
my ($pkg, %opts) = @_;
my $type = $opts{type} // 'uuid';
if (ref $type) {
if ($type == $well_known_uuid) {
$type = 'uuid';
} else {
croak 'Invalid/Unsupported type';
}
}
if ($type ne 'ise' && $type ne 'uuid') {
croak 'Invalid/Unsupported type';
}
my $uuid = Data::Identifier::Generate->_random(%opts{'sources'});
return $pkg->new(uuid => $uuid, %opts{'displayname'});
}
sub wellknown {
my ($pkg, @args) = @_;
return $pkg->known('wellknown', @args);
}
#@returns __PACKAGE__
sub type {
my ($self) = @_;
return $self->{type};
}
sub id {
my ($self) = @_;
return $self->{id};
}
sub uuid {
my ($self, %opts) = @_;
return $self->{id_cache}{WK_UUID()} if !$opts{no_defaults} && defined($self->{id_cache}) && defined($self->{id_cache}{WK_UUID()});
if ($self->{type} == $well_known_uuid) {
return $self->{id};
}
unless ($opts{no_defaults}) {
# Try to generate a UUID and recheck cache:
$self->_generate;
return $self->{id_cache}{WK_UUID()} if defined($self->{id_cache}) && defined($self->{id_cache}{WK_UUID()});
}
return $opts{default} if exists $opts{default};
croak 'Identifier has no valid UUID';
}
sub oid {
my ($self, %opts) = @_;
my $type = $well_known{oid};
return $self->{id_cache}{WK_OID()} if !$opts{no_defaults} && defined($self->{id_cache}) && defined($self->{id_cache}{WK_OID()});
if ($self->{type} == $type) {
return $self->{id};
}
unless ($opts{no_defaults}) {
if (defined(my $uuid = $self->uuid(default => undef))) {
return $self->{id_cache}{WK_OID()} = sprintf('2.25.%s', Math::BigInt->new('0x'.$uuid =~ tr/-//dr));
}
}
return $opts{default} if exists $opts{default};
croak 'Identifier has no valid OID';
}
sub uri {
my ($self, %opts) = @_;
my $type = $well_known{uri};
return $self->{id_cache}{WK_URI()} if !$opts{no_defaults} && defined($self->{id_cache}) && defined($self->{id_cache}{WK_URI()});
if ($self->{type} == $type) {
return $self->{id};
}
unless ($opts{no_defaults}) {
if ($self->{type} == $well_known{wd}) {
return $self->{id_cache}{WK_URI()} = sprintf('http://www.wikidata.org/entity/%s', $self->{id});
} elsif ($self->{type} == $well_known{doi}) {
return $self->{id_cache}{WK_URI()} = sprintf('https://doi.org/%s', $self->{id});
} elsif (defined(my $uuid = $self->uuid(default => undef))) {
return $self->{id_cache}{WK_URI()} = sprintf('urn:uuid:%s', $uuid);
} elsif ($enabled_oid && defined(my $oid = $self->oid(default => undef))) {
return $self->{id_cache}{WK_URI()} = sprintf('urn:oid:%s', $oid);
} else {
my $u = URI->new("https://uriid.org/");
my $type_uuid = $self->{type}->uuid;
$u->path_segments('', $uuid_to_uriid_org{$type_uuid} // $type_uuid, $self->{id});
return $self->{id_cache}{WK_URI()} = $u;
}
}
return $opts{default} if exists $opts{default};
croak 'Identifier has no valid URI';
}
sub sid {
my ($self, %opts) = @_;
my $type = $well_known{sid};
return $self->{id_cache}{WK_SID()} if defined($self->{id_cache}) && defined($self->{id_cache}{WK_SID()});
if ($self->{type} == $type) {
return $self->{id};
}
return $opts{default} if exists $opts{default};
croak 'Identifier has no valid SID';
}
sub ise {
my ($self, %opts) = @_;
my $type = $self->{type};
my $have_default = exists $opts{default};
my $default = delete $opts{default};
my $value;
if ($type == $well_known{uuid} || $type == $well_known{oid} || $type == $well_known{uri}) {
$value = $self->{id};
} else {
$opts{default} = undef;
$value = $self->uuid(%opts) // $self->oid(%opts) // $self->uri(%opts);
}
return $value if defined $value;
return $default if $have_default;
croak 'Identifier has no valid ISE';
}
sub as {
my ($self, $as, %opts) = @_;
$as = $opts{rawtype} if $as eq 'raw' && defined($opts{rawtype});
return $self if ($as =~ /^[A-Z]/ || $as =~ /::/) && eval {$self->isa($as)};
$self = __PACKAGE__->new(from => $self) unless eval {$self->isa(__PACKAGE__)};
if ($as eq 'uuid' || $as eq 'oid' || $as eq 'uri' || $as eq 'sid' || $as eq 'ise') {
my $func = $self->can($as);
return $self->$func(%opts);
} elsif ($as eq __PACKAGE__) {
return $self;
} elsif ($as eq 'URI') {
my $had_default = exists $opts{default};
my $default = delete $opts{default};
my $val = $self->uri(%opts, default => undef);
return URI->new($val) if defined $val;
if ($had_default) {
return $default if ref $default;
return URI->new($default);
}
croak 'No value for URI';
} elsif ($as eq 'Data::URIID::Result' && defined($opts{extractor})) {
return $opts{extractor}->lookup($self->type->uuid => $self->id);
} elsif ($as eq 'Data::URIID::Service' && defined($opts{extractor})) {
return $opts{extractor}->service($self->uuid);
} elsif ($as eq 'Data::TagDB::Tag' && defined($opts{db})) {
if ($opts{autocreate}) {
return $opts{db}->create_tag($self);
} else {
return $opts{db}->tag_by_id($self);
}
} elsif ($as eq 'Business::ISBN' && $self->type->eq('gtin')) {
require Business::ISBN;
my $val = Business::ISBN->new($self->id);
return $val if defined($val) && $val->is_valid;
}
return $opts{default} if exists $opts{default};
croak 'Unknown/Unsupported as: '.$as;
}
sub eq {
my ($self, $other) = @_;
foreach my $e ($self, $other) {
if (defined($e) && !scalar(eval {$e->isa(__PACKAGE__)})) {
if (defined $well_known{$e}) {
$e = $well_known{$e}
} else {
$e = Data::Identifier->new(from => $e);
}
}
}
if (defined($self)) {
return undef unless defined $other;
return 1 if $self == $other;
return undef unless $self->type->eq($other->type);
return $self->id eq $other->id;
} else {
return !defined($other);
}
}
sub cmp {
my ($self, $other) = @_;
foreach my $e ($self, $other) {
if (defined($e) && !scalar(eval {$e->isa(__PACKAGE__)})) {
if (defined $well_known{$e}) {
$e = $well_known{$e}
} else {
$e = Data::Identifier->new(from => $e);
}
}
}
if (defined($self)) {
return undef unless defined $other;
return 0 if $self == $other;
if ((my $r = $self->type->cmp($other->type)) != 0) {
return $r;
}
{
my $self_id = $self->id;
my $other_id = $other->id;
if ((my ($sa, $sb) = $self_id =~ /^([^0-9]*)([0-9]+)$/) && (my ($oa, $ob) = $other_id =~ /^([^0-9]*)([0-9]+)$/)) {
my $r = $sa cmp $oa;
return $r if $r;
return $sb <=> $ob;
}
return $self_id cmp $other_id;
}
} else {
return !defined($other);
}
}
#@returns __PACKAGE__
sub namespace {
my ($self) = @_;
return $self->{namespace} // croak 'No namespace';
}
sub register {
my ($self) = @_;
$registered{$self->{type}->uuid}{$self->{id}} = $self;
foreach my $type_name (qw(uuid oid uri sid)) {
my $f = $self->can($type_name) || next;
my $v = $self->$f(default => undef) // next;
$registered{$well_known{$type_name}->uuid}{$v} = $self;
}
}
sub userdata {
my ($self, $package, $key, $value) = @_;
$self->{userdata} //= {};
$self->{userdata}{$package} //= {};
return $self->{userdata}{$package}{$key} = $value // $self->{userdata}{$package}{$key};
}
sub displayname {
my ($self, %opts) = @_;
if (defined(my $displayname = $self->{displayname})) {
$displayname = $self->$displayname() if ref $displayname;
# recheck and return as any of the above conversions could result in $displayname becoming invalid.
return $displayname if defined($displayname) && length($displayname);
}
return $self->id.'' unless $opts{no_defaults}; # force stringification.
return $opts{default} if exists $opts{default};
croak 'No value for displayname';
}
sub displaycolour { my ($self, %opts) = @_; return $opts{default}; }
sub icontext { my ($self, %opts) = @_; return $opts{default}; }
sub description { my ($self, %opts) = @_; return $opts{default}; }
# ---- Private helpers ----
sub import {
my ($pkg, $opts) = @_;
return unless defined $opts;
croak 'Bad options' unless ref($opts) eq 'HASH';
if (defined(my $disable = $opts->{disable})) {
$disable = [split /\s*,\s*/, $disable] unless ref $disable;
foreach my $to_disable (@{$disable}) {
if ($to_disable eq 'oid') {
$enabled_oid = undef;
undef *oid;
} else {
croak 'Unknown feature: '.$to_disable;
}
}
}
}
sub _generate {
my ($self) = @_;
unless (exists $self->{_generate}) {
my __PACKAGE__ $type = $self->type;
if (defined(my $generate = $type->{generate})) {
unless (ref $generate) {
$self->{generate} = $generate = {style => $generate};
}
$self->{id_cache} //= {};
if (defined(my __PACKAGE__ $ns = eval {$type->namespace->uuid})) {
my $style = $generate->{style};
my $input;
if ($style eq 'id-based') {
$input = lc($self->id);
} else {
croak 'Unsupported generator style';
}
if (defined $input) {
$self->{id_cache}{WK_UUID()} = Data::Identifier::Generate->_uuid_v5($ns, $input);
}
}
}
}
$self->{_generate} = undef;
}
sub _known_provider {
my ($pkg, $class, %opts) = @_;
croak 'Unsupported options passed' if scalar(keys %opts);
if ($class eq 'wellknown') {
state $wellknown = do {
my %hash = map{$_ => $_} values(%well_known), map {values %{$_}} values(%registered);
[values %hash];
};
return ($wellknown, rawtype => __PACKAGE__);
} elsif ($class eq 'registered' || $class eq ':all') {
my %hash = map{$_ => $_} values(%well_known), map {values %{$_}} values(%registered);
return ([values %hash], rawtype => __PACKAGE__);
}
croak 'Unsupported class';
}
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
Data::Identifier - format independent identifier object
=head1 VERSION
version v0.09
=head1 SYNOPSIS
use Data::Identifier;
# or:
use Data::Identifier {option => value, ...};
my Data::Identifier $id = Data::Identifier->new(uuid => 'ddd60c5c-2934-404f-8f2d-fcb4da88b633');
my Data::Identifier $id = Data::Identifier->new(oid => '2.1.0.1.0');
my Data::Identifier $custom_type = Data::Identifier->new(uuid => ...);
my Data::Identifier $id = Data::Identifier->new($custom_type => '123abc');
my Data::Identifier $type = $id->type;
my $raw = $id->id;
my $uuid = $id->uuid;
my $oid = $id->oid;
my $uri = $id->uri;
my $ise = $id->ise;
This module provides an common interface to identifiers of different types.
Each identifier stores both it's raw value (called C<id>) and it's type (C<type>).
B<Note:> Validation on the raw identifier value may or may not be performed depending on the type.
The level of validation this module can do is limited by it's knowledge about the type as well
as performance aspects. This module may therefore reject invalid values. But it is not safe to assume
that it will reject all invalid values.
B<Note:> This module performs basic deduplication and normalisation. This means that you
might not always get back exactly the identifier you passed in but an equivalent one.
Also note that deduplication is done with performance in mind. This means that there is no
guarantee for two equal identifiers to become deduplicated. See also L</register>.
This package inherits from L<Data::Identifier::Interface::Known>.
=head2 OPTIONS
The following options are supported. Some are marked as experimental.
=head3 disable
B<Note:>
This is an B<experimental> option. It may be changed, renamed, or removed without notice.
This option allows to disable a feature.
This is a global setting, and therefore should only be used at the top level code.
In order for this to be most effective this should be used in the top level code
before any other module is C<use>-ed or C<require>-ed that makes use of this module.
This setting takes an arrayref of strings or a single string that is a comma separated list.
Currently the following features can be disabled:
=over
=item C<oid>
Support for OID. This removes the L</oid> function from this package,
removes internal OID based caches, and parts of the OID detection and normalisation logic.
However this improves the speed of L</register> and some others significantly.
This feature should only be disabled if you're sure you will not use OIDs in your code.
=back
=head1 METHODS
=head2 new
my Data::Identifier $identifier = Data::Identifier->new($type => $id, %opts);
Creates a new identifier.
C<$type> needs to be a L<Data::Identifier>, a well known name, a UUID, C<wellknown>, C<ise>, or C<from>.
If it is an UUID a type is created as needed.
If it is C<ise> it is parsed as C<uuid>, C<oid>, or C<uri> according to it's format.
If it is C<wellknown> it refers to an identifier from the well known list.
If it is C<from> then C<$id> should refer to an object of some kind that should be converted to identifier.
In this case not all options might be supported. Currently it is possible to convert from:
L<Data::Identifier>,
L<Data::URIID::Colour>, L<Data::URIID::Service>, L<Data::URIID::Result>,
L<Data::TagDB::Tag>,
and L<Business::ISBN>. If C<$id> is not a reference it is parsed as with C<ise>.
The following type names are currently well known:
=over
=item C<uuid>
An UUID.
=item C<oid>
An OID.
=item C<uri>
An URI.
=item C<wd>
An wikidata identifier (Q, P, or L).
=item C<fc>
An FactGrid identifier (Q, P, or L).
=item C<gtin>
An GTIN (or EAN).
=item C<iban>
An IBAN (International Bank Account Number).
=item C<bic>
A BIC (Business Identifier Code).
=item C<doi>
A doi (digital object identifier).
=back
The following options are supported:
=over
=item C<validate>
A regex that should be used to validate identifiers if this identifier is used as a type.
=item C<namespace>
The namespace used by a type. Must be a L<Data::Identifier> or an ISE. Must also resolve to an UUID.
=item C<displayname>
The name as to be returned by L</displayname>.
Must be a scalar string value or a code reference that returns a scalar string.
If it is a code reference the identifier object is passed as C<$_[0]>.
=item C<extractor>
An instance of L<Data::URIID>. This option is currently ignored.
=item C<db>
An instance of L<Data::TagDB>. This option is currently ignored.
=back
=head2 random
my Data::Identifier $identifier = Data::Identifier->random([ %opts ]);
Generate a new random identifier.
This method will C<die> on error.
The following options (all optional) are supported:
=over
=item C<displayname>
The same as the option of the same name in L</new>. See also L</displayname>.
=item C<sources>
The backend data sources to use for generation of the identifier. This is an array reference
with the names of the modules in order of preference (most preferred first).
Defaults to a list of high quality sources.
Currently supported are at least:
L<Crypt::URandom>,
L<UUID4::Tiny>,
L<Math::Random::Secure>,
L<UUID::URandom>,
L<UUID::Tiny::Patch::UseMRS>,
L<UUID::Tiny> (low quality).
=item C<type>
The type to generate a random identifier in.
A L<Data::Identifier> or one of the special values C<uuid> or C<ise>.
=back
=head2 known
my @list = Data::Identifier->known($class [, %opts ] );
This module implements L<Data::Identifier::Interface::Known/known>. See there for details.
Supported classes:
=over
=item C<wellknown>
Returns a list with all well known identifiers.
This is useful to prime a database.
=item C<registered>
Returns the list of all currently registered identifiers.
=item C<:all>
Returns the list of all currently known identifiers.
=back
=head2 wellknown
my @wellknown = Data::Identifier->wellknown(%opts);
This is an alias for:
my @wellknown = Data::Identifier->known('wellknown', %opts);
See also L</known>.
=head2 type
my Data::Identifier $type = $identifier->type;
Returns the type of the identifier.
=head2 id
my $id = $identifier->id;
Returns the raw id of the identifier.
=head2 uuid, oid, uri, sid
my $uuid = $identifier->uuid( [ %opts ] );
my $oid = $identifier->oid( [ %opts ] );
my $uri = $identifier->uri( [ %opts ] );
my $sid = $identifier->sid( [ %opts ] );
Return the UUID, OID, URI, or SID (small-identifier) of the current identifier or die if no identifier of that type is known nor can be calculated.
The following options (all optional) are supported:
=over
=item C<default>
The default value to return if no other value is available.
This can be set to C<undef> to change the method from C<die>ing in failture to returning C<undef>.
=item C<no_defaults>
If set true do not try to generate a matching identifier.
Note: This does not apply to C<sid()> as small-identifiers cannot be generated. For C<sid()> the option is ignored.
=back
=head2 ise
my $ise = $identifier->ise( [ %opts ] );
Returns the ISE (UUID, OID, or URI) for the current identifier or die if no ISE is known nor can be calculated.
Supports all options also supported by L</uuid>, L</oid>, and L</uri>.
=head2 as
my $res = $identifier->as($as, %opts);
# or:
my $res = $identifier->Data::Identifier::as($as, %opts); # $identifier is an alien type
This method converts the given identifier to another type of object.
C<$as> must be a name of the package (containing C<::> or starting with an uppercase letter),
or one of the special values.
Currently the following packages are supported:
L<URI>,
L<Data::Identifier>,
L<Data::URIID::Result>,
L<Data::URIID::Service>,
L<Data::TagDB::Tag>,
L<Business::ISBN>.
Other packages might be supported. Packages need to be installed in order to be supported.
Also some packages need special options to be passed to be available.
The folliwng special values are supported:
C<uuid>, C<oid>, C<uri>, C<sid>, C<ise>, and C<raw>.
All but C<raw> are aliases to the corresponding functions.
C<raw> is an alias for the type set with the C<rawtype> option (see below).
If C<$identifier> is or may not be an L<Data::Identifier> this method can be called like
C<$identifier-E<gt>Data::Identifier::as($as...)>.
In that case C<$identifier> is parsed as with C<from> in L</new>.
If C<$identifier> is a C<$as> (see also C<rawtype> below) then C<$identifier> is returned as-is,
even if C<$as> would not be supported otherwise.
The following options (all optional) are supported:
=over
=item C<autocreate>
If the requested type refers to some permanent storage and the object does not exist for
the given identifier whether to create a new object or not.
Defaults to false.
=item C<db>
An instance of L<Data::TagDB>. This is used to create instances of related packages.
=item C<default>
Same as in L</uuid>.
=item C<no_defaults>
Same as in L</uuid>.
=item C<rawtype>
If C<$as> is given as C<raw> then this value is used for C<$as>.
This can be used to ease implementation of other methods that are required to accept C<raw>.
=item C<extractor>
An instance of L<Data::URIID>. This is used to create instances of related packages
such as L<Data::URIID::Result>.
=back
=head2 eq
my $bool = $identifier->eq($other); # $identifier must be non-undef
# or:
my $bool = Data::Identifier::eq($identifier, $other); # $identifier can be undef
Compares two identifiers to be equal.
If both identifiers are C<undef> they are considered equal.
If C<$identifier> or C<$other> is not an instance of L<Data::Identifier> or C<undef>
then it is checked against the list of well known identifiers (see L</new>).
If it has still no match L</new> with the virtual type C<from> is used.
=head2 cmp
my $val = $identifier->cmp($other); # $identifier must be non-undef
# or:
my $val = Data::Identifier::cmp($identifier, $other); # $identifier can be undef
Compares the identifiers similar to C<cmp>. This method can be used to order identifiers.
To check for them to be equal see L</eq>.
The parameters are parsed the same way as L</eq>.
If this method is used for sorting the exact resulting order is not defined. However:
=over
=item *
The order is stable
=item *
Identifiers are ordered by type first
=item *
If the all identifiers have the same type this method tries to be smart about ordering
(ordering numeric values correctly).
=item *
The order is the same for C<$a-E<gt>cmp($b)> as for C<- $b-E<gt>cmp($a)>.
=back
=head2 namespace
my Data::Identifier $namespace = $identifier->namespace;
Gets the namespace for the type C<$identifier> or dies.
This call is only valid for identifiers that are types.
=head2 register
$identifier->register;
Registers the identifier for deduplication.
This can be used to register much used identifiers and types
early in an application to increase performance.
However, note that once registered an identifier object is cached for
the life time of the process.
=head2 userdata
my $value = $identifier->userdata(__PACKAGE__, $key);
$identifier->userdata(__PACKAGE__, $key => $value);
Get or set user data to be used with this identifier. The data is stored using the given C<$key>.
The package of the caller is given to provide namespaces for the userdata, so two independent packages
can use the same C<$key>.
The meaning of C<$key>, and C<$value> is up to C<__PACKAGE__>.
=head2 displayname
my $displayname = $identifier->displayname( [ %opts ] );
Returns a display name suitable to display to the user. This function always returns a string.
This is mostly for compatibility with L<Data::TagDB::Tag>.
The following options (all optional) are supported:
=over
=item C<default>
The default value to return if no other value is available.
This can be set to C<undef> to let this method return undef (not die).
=item C<no_defaults>
If set true do not try to use any identifier or other fallback as displayname.
=back
=head2 displaycolour, icontext, description
my $displaycolour = $identifier->displaycolour( [ %opts ] );
my $icontext = $identifier->icontext( [ %opts ] );
my $description = $identifier->description( [ %opts ] );
These functions always return C<undef>. They are for compatibility with L<Data::TagDB::Tag>.
The following options (all optional) are supported:
=over
=item C<default>
The default value to return if no other value is available (which is always the case).
This is for compatibility with L</displayname> and implementations of other packages.
=item C<no_defaults>
This option is accepted but ignored.
This is for compatibility with L</displayname> and implementations of other packages.
=back
=head1 AUTHOR
Löwenfelsen UG (haftungsbeschränkt) <support@loewenfelsen.net>
=head1 COPYRIGHT AND LICENSE
This software is Copyright (c) 2023-2025 by Löwenfelsen UG (haftungsbeschränkt) <support@loewenfelsen.net>.
This is free software, licensed under:
The Artistic License 2.0 (GPL Compatible)
=cut