Sponsoring The Perl Toolchain Summit 2025: Help make this important event another success Learn more

use strict;
require UR;
our $VERSION = "0.47"; # UR $VERSION;
{
no warnings 'once';
*namespace = \&get_namespace;
}
UR::Object::Type->define(
class_name => 'UR::DataSource',
is_abstract => 1,
doc => 'A logical database, independent of prod/dev/testing considerations or login details.',
has => [
namespace => { calculate_from => ['id'] },
is_connected => { is => 'Boolean', default_value => 0, is_optional => 1, is_transient => 1 },
get_default_handle => {
is_calculated => 1,
is_constant => 1,
doc => 'Underlying handle for this datasource',
calculate => '$self->create_default_handle_wrapper',
},
],
valid_signals => ['precreate_handle', 'create_handle', 'predisconnect_handle', 'disconnect_handle' ],
);
our @CARP_NOT = qw(UR::Context UR::DataSource::QueryPlan);
sub define { shift->__define__(@_) }
sub get_namespace {
my $class = shift->class;
return substr($class,0,index($class,"::DataSource"));
}
sub get_name {
my $class = shift->class;
return lc(substr($class,index($class,"::DataSource")+14));
}
# The default used to be to force table/column/constraint/etc names to
# upper case when storing them in the MetaDB, and in the column_name
# metadata for properties. The new behavior is to just use whatever the
# database supplies us when interrogating the data dictionary.
# For datasources/clases that still need the old behavior, override this
# to make the column_name metadata for properties forced to upper-case
sub table_and_column_names_are_upper_case { 0; }
# Basic, dumb data sources do not support joins within a single
# query. Instead the Context logic can perform a cross datasource
# join within irs own code
sub does_support_joins { 0; }
# Many data sources do not support limit and offset.
sub does_support_limit_offset {
#my($self, $query_plan) = @_;
0
}
# Most datasources do not support recursive queries
# Oracle and Postgres do, but in different ways
# For data sources without support, it'll have to do multiple queries
# to get all the data
sub does_support_recursive_queries { ''; }
{
no warnings 'once';
*create_dbh = \&create_default_handle_wrapper;
}
sub create_default_handle_wrapper {
my $self = UR::Util::object(shift);
$self->__signal_observers__('precreate_handle');
my $h = $self->create_default_handle;
$self->__signal_observers__('create_handle', $h);
# Hack - This is to avoid infinite recursion in the case where the
# handle initializers below try to get the hadle by calling $ds->get_default_handle.
# The cached/calculated accessor code will look in this hash key and
# return the handle instead of recursing back into the handle creation, and
# back to here
$self->{get_default_handle} = $h;
# Backward compatability for older code that still uses _init_created_dbh
if ($self->can('_init_created_dbh')) {
$self->_init_created_dbh($h);
} else {
$self->init_created_handle($h);
}
return $h;
}
# basic, dumb datasources do not have a handle
sub create_default_handle { undef }
sub disconnect { }
# derived classes can implement this to do extra initialization after the
# handle is created
sub init_created_handle { 1; }
# Peek into the object and see if there's anything in 'get_default_handle' without actually
# creating a handle
*has_default_dbh = \&has_default_handle;
sub has_default_handle {
my $self = UR::Util::object(shift);
return exists($self->{get_default_handle});
}
*disconnect_default_dbh = \&disconnect_default_handle;
sub disconnect_default_handle {
my $self = shift;
if ($self->has_default_handle) {
$self->__signal_observers__('predisconnect_handle');
$self->disconnect();
$self->__signal_observers__('disconnect_handle');
}
1;
}
our $use_dummy_autogenerated_ids;
*use_dummy_autogenerated_ids = \$ENV{UR_USE_DUMMY_AUTOGENERATED_IDS};
sub use_dummy_autogenerated_ids {
# This allows the saved SQL from sync database to be comparable across executions.
# It also
my $class = shift;
if (@_) {
($use_dummy_autogenerated_ids) = @_;
}
$use_dummy_autogenerated_ids ||= 0; # Replace undef with 0
return $use_dummy_autogenerated_ids;
}
our $last_dummy_autogenerated_id;
sub next_dummy_autogenerated_id {
unless($last_dummy_autogenerated_id) {
my $hostname = hostname();
$hostname =~ /(\d+)/;
my $id = $1 ? $1 : 1;
$last_dummy_autogenerated_id = ($id * -10_000_000) - ($$ * 1_000);
}
#limit id to fit within 11 characters
($last_dummy_autogenerated_id) = $last_dummy_autogenerated_id =~ m/(-\d{1,10})/;
return --$last_dummy_autogenerated_id;
}
sub autogenerate_new_object_id_for_class_name_and_rule {
my $ds = shift;
if (ref $ds) {
$ds = ref($ds) . " ID " . $ds->id;
}
# Maybe we could use next_dummy_autogenerated_id instead?
die "Data source $ds did not implement autogenerate_new_object_id_for_class_name_and_rule()";
}
# UR::Context needs to know if a data source supports savepoints
sub can_savepoint {
my $class = ref($_[0]);
die "Class $class didn't supply can_savepoint()";
}
sub set_savepoint {
my $class = ref($_[0]);
die "Class $class didn't supply set_savepoint, but can_savepoint is true";
}
sub rollback_to_savepoint {
my $class = ref($_[0]);
die "Class $class didn't supply rollback_to_savepoint, but can_savepoint is true";
}
sub _get_class_data_for_loading {
my ($self, $class_meta) = @_;
my $class_data = $class_meta->{loading_data_cache};
unless ($class_data) {
$class_data = $self->_generate_class_data_for_loading($class_meta);
}
return $class_data;
}
sub _resolve_query_plan {
my ($self, $rule_template) = @_;
my $qp = UR::DataSource::QueryPlan->get(
rule_template => $rule_template,
data_source => $self,
);
$qp->_init() unless $qp->_is_initialized;
return $qp;
}
# Child classes can override this to return a different datasource
# depending on the rule passed in
sub resolve_data_sources_for_rule {
return $_[0];
}
sub _generate_class_data_for_loading {
my ($self, $class_meta) = @_;
my $class_name = $class_meta->class_name;
my $ghost_class = $class_name->ghost_class;
my @all_id_property_names = $class_meta->all_id_property_names();
my @id_properties = $class_meta->id_property_names;
my $id_property_sorter = $class_meta->id_property_sorter;
my @class_hierarchy = ($class_meta->class_name,$class_meta->ancestry_class_names);
my @parent_class_objects = $class_meta->ancestry_class_metas;
my $sub_classification_method_name;
my ($sub_classification_meta_class_name, $subclassify_by);
my @all_properties;
my $first_table_name;
my %seen;
for my $co ( $class_meta, @parent_class_objects ) {
next if ($seen{ $co->id })++;
my $table_name = $co->table_name || '__default__';
$first_table_name ||= $table_name;
$sub_classification_method_name ||= $co->sub_classification_method_name;
$sub_classification_meta_class_name ||= $co->sub_classification_meta_class_name;
$subclassify_by ||= $co->subclassify_by;
my $sort_sub = sub ($$) { return $_[0]->property_name cmp $_[1]->property_name };
push @all_properties, map { [$co, $_, $table_name, 0]} sort $sort_sub UR::Object::Property->get(class_name => $co->class_name);
}
my $sub_typing_property = $class_meta->subclassify_by;
my $class_table_name = $class_meta->table_name;
my $class_data = {
class_name => $class_name,
ghost_class => $class_name->ghost_class,
parent_class_objects => [$class_meta->ancestry_class_metas], ##
sub_classification_method_name => $sub_classification_method_name,
sub_classification_meta_class_name => $sub_classification_meta_class_name,
subclassify_by => $subclassify_by,
all_properties => \@all_properties,
all_id_property_names => [$class_meta->all_id_property_names()],
id_properties => [$class_meta->id_property_names],
id_property_sorter => $class_meta->id_property_sorter,
sub_typing_property => $sub_typing_property,
# these seem like they go in the RDBMS subclass, but for now the
# "table" concept is stretched to mean any valid structure identifier
# within the datasource.
first_table_name => $first_table_name,
class_table_name => $class_table_name,
};
return $class_data;
}
sub _generate_loading_templates_arrayref {
# Each entry represents a table alias in the query.
# This accounts for different tables, or multiple occurrances
# of the same table in a join, by grouping by alias instead of
# table.
my $class = shift;
my $db_cols = shift;
my $obj_joins = shift;
my $bxt = shift;
use strict;
use warnings;
my %obj_joins_by_source_alias;
if (0) { # ($obj_joins) {
my @obj_joins = @$obj_joins;
while (@obj_joins) {
my $foreign_alias = shift @obj_joins;
my $data = shift @obj_joins;
for my $foreign_property_name (sort keys %$data) {
next if $foreign_property_name eq '-is_required';
my $source_alias = $data->{$foreign_property_name}{'link_alias'};
my $detail = $obj_joins_by_source_alias{$source_alias}{$foreign_alias} ||= {};
# warnings come from the above because we don't have 'link_alias' in filters.
my $source_property_name = $data->{$foreign_property_name}{'link_property_name'};
if ($source_property_name) {
# join
my $links = $detail->{links} ||= [];
push @$links, $foreign_property_name, $source_property_name;
}
if (exists $data->{value}) {
# filter
my $operator = $data->{operator};
my $value = $data->{value};
my $filter = $detail->{filter} ||= [];
my $key = $foreign_property_name;
$key .= ' ' . $operator if $operator;
push @$filter, $key, $value;
}
}
}
}
else {
#Carp::cluck("no obj joins???");
}
my %templates;
my $pos = 0;
my @templates;
my %alias_object_num;
for my $col_data (@$db_cols) {
my ($class_obj, $prop, $table_alias, $object_num) = @$col_data;
unless (defined $object_num) {
die "No object num for loading template data?!";
}
#Carp::confess() unless $table_alias;
my $template = $templates[$object_num];
unless ($template) {
$template = {
object_num => $object_num,
table_alias => $table_alias,
data_class_name => $class_obj->class_name,
final_class_name => $class_obj->class_name,
property_names => [],
column_positions => [],
id_property_names => undef,
id_column_positions => [],
id_resolver => undef, # subref
};
$templates[$object_num] = $template;
$alias_object_num{$table_alias} = $object_num;
}
push @{ $template->{property_names} }, $prop->property_name;
push @{ $template->{column_positions} }, $pos;
$pos++;
}
# remove joins that resulted in no template, such as when it was to a table-less class
@templates = grep { $_ } @templates;
# Post-process the template objects a bit to get the exact id positions.
for my $template (@templates) {
my @id_property_names;
for my $id_class_name ($template->{data_class_name}, $template->{data_class_name}->inheritance) {
my $id_class_obj = UR::Object::Type->get(class_name => $id_class_name);
last if @id_property_names = $id_class_obj->id_property_names;
}
$template->{id_property_names} = \@id_property_names;
my @id_column_positions;
for my $id_property_name (@id_property_names) {
for my $n (0..$#{ $template->{property_names} }) {
if ($template->{property_names}[$n] eq $id_property_name) {
push @id_column_positions, $template->{column_positions}[$n];
last;
}
}
}
$template->{id_column_positions} = \@id_column_positions;
if (@id_column_positions == 1) {
$template->{id_resolver} = sub {
return $_[0][$id_column_positions[0]];
}
}
elsif (@id_column_positions > 1) {
my $class_name = $template->{data_class_name};
$template->{id_resolver} = sub {
my $self = shift;
return $class_name->__meta__->resolve_composite_id_from_ordered_values(@$self[@id_column_positions]);
}
}
else {
Carp::croak("Can't determine which columns will hold the ID property data for class "
. $template->{data_class_name} . ". It's ID properties are (" . join(', ', @id_property_names)
. ") which do not appear in the class' property list (" . join(', ', @{$template->{'property_names'}}).")");
}
my $source_alias = $template->{table_alias};
if (0 and my $join_data_for_source_table = $obj_joins_by_source_alias{$source_alias}) {
# there are joins which come from this entity to other entities
# as these entities are loaded, remember the individual queries covered by this object returning
# NOTE: when we join a <> b, we remember that we've loaded all of the b for a when _a_ loads, not b,
# since it's possible that there ar zero of b, and we don't want to perform the query for b
my $source_object_num = $template->{object_num};
my $source_class_name = $template->{data_class_name};
my $next_joins = $template->{next_joins} ||= [];
for my $foreign_alias (keys %$join_data_for_source_table) {
my $foreign_object_num = $alias_object_num{$foreign_alias};
Carp::confess("no alias for $foreign_alias?") if not defined $foreign_object_num;
my $foreign_template = $templates[$foreign_object_num];
my $foreign_class_name = $foreign_template->{data_class_name};
my $join_data = $join_data_for_source_table->{$foreign_alias};
my %links = map { $_ ? @$_ : () } $join_data->{links};
my %filters = map { $_ ? @$_ : () } $join_data->{filters};
my @keys = sort (keys %links, keys %filters);
my @value_position_source_property;
for (my $n = 0; $n < @keys; $n++) {
my $key = $keys[$n];
if ($links{$key} and $filters{$key}) {
Carp::confess("unexpected same key $key in filters and joins");
}
my $source_property_name = $links{$key};
next unless $source_property_name;
push @value_position_source_property, $n, $source_property_name;
}
my $bx = $foreign_class_name->define_boolexpr(map { $_ => $filters{$_} } @keys);
my ($bxt, @values) = $bx->template_and_values();
push @$next_joins, [ $bxt->id, \@values, \@value_position_source_property ];
}
}
}
return \@templates;
}
sub create_iterator_closure_for_rule_template_and_values {
my ($self, $rule_template, @values) = @_;
my $rule = $rule_template->get_rule_for_values(@values);
return $self->create_iterator_closure_for_rule($rule);
}
sub _reclassify_object_loading_info_for_new_class {
my $self = shift;
my $loading_info = shift;
my $new_class = shift;
my $new_info;
%$new_info = %$loading_info;
foreach my $template_id (keys %$loading_info) {
my $target_class_rules = $loading_info->{$template_id};
foreach my $rule_id (keys %$target_class_rules) {
my $pos = index($rule_id,'/');
$new_info->{$template_id}->{$new_class . "/" . substr($rule_id,$pos+1)} = 1;
}
}
return $new_info;
}
sub _get_object_loading_info {
my $self = shift;
my $obj = shift;
my %param_load_hash;
if ($obj->{'__load'}) {
while( my($template_id, $rules) = each %{ $obj->{'__load'} } ) {
foreach my $rule_id ( keys %$rules ) {
$param_load_hash{$template_id}->{$rule_id} = $UR::Context::all_params_loaded->{$template_id}->{$rule_id};
}
}
}
return \%param_load_hash;
}
sub _add_object_loading_info {
my $self = shift;
my $obj = shift;
my $param_load_hash = shift;
while( my($template_id, $rules) = each %$param_load_hash) {
foreach my $rule_id ( keys %$rules ) {
$obj->{'__load'}->{$template_id}->{$rule_id} = $rules->{$rule_id};
}
}
}
# same as add_object_loading_info, but manipulates the data in $UR::Context::all_params_loaded
sub _record_that_loading_has_occurred {
my $self = shift;
my $param_load_hash = shift;
while( my($template_id, $rules) = each %$param_load_hash) {
foreach my $rule_id ( keys %$rules ) {
$UR::Context::all_params_loaded->{$template_id}->{$rule_id} ||=
$rules->{$rule_id};
}
}
}
sub _first_class_in_inheritance_with_a_table {
# This is called once per subclass and cached in the subclass from then on.
my $self = shift;
my $class = shift;
$class = ref($class) if ref($class);
unless ($class) {
Carp::confess("No class?");
}
my $class_object = $class->__meta__;
my $found = "";
for ($class_object, $class_object->ancestry_class_metas)
{
if ($_->has_direct_table)
{
$found = $_->class_name;
last;
}
}
#eval qq/
# package $class;
# sub _first_class_in_inheritance_with_a_table {
# return '$found' if \$_[0] eq '$class';
# shift->SUPER::_first_class_in_inheritance_with_a_table(\@_);
# }
#/;
#die "Error setting data in subclass: $@" if $@;
return $found;
}
sub _class_is_safe_to_rebless_from_parent_class {
my ($self, $class, $was_loaded_as_this_parent_class) = @_;
my $fcwt = $self->_first_class_in_inheritance_with_a_table($class);
unless ($fcwt) {
Carp::croak("Can't call _class_is_safe_to_rebless_from_parent_class(): Class $class has no parent classes with a table");
}
return ($was_loaded_as_this_parent_class->isa($fcwt));
}
sub ur_datasource_class_for_dbi_connect_string {
my($class, $dsn) = @_;
my(undef, $driver) = DBI->parse_dsn($dsn);
$driver
|| Carp::croak("Could not parse DBI driver out of connect string $dsn");
return 'UR::DataSource::'.$driver;
}
sub _get_current_entities {
my $self = shift;
my @class_meta = UR::Object::Type->is_loaded(
data_source_id => $self->id
);
my @objects;
for my $class_meta (@class_meta) {
next unless $class_meta->generated(); # Ungenerated classes won't have any instances
my $class_name = $class_meta->class_name;
push @objects, $UR::Context::current->all_objects_loaded($class_name);
}
return @objects;
}
sub _prepare_for_lob { };
sub _set_specified_objects_saved_uncommitted {
my ($self,$objects_arrayref) = @_;
# Sets an objects as though the has been saved but tha changes have not been committed.
# This is called automatically by _sync_databases.
my %objects_by_class;
my $class_name;
for my $object (@$objects_arrayref) {
$class_name = ref($object);
$objects_by_class{$class_name} ||= [];
push @{ $objects_by_class{$class_name} }, $object;
}
for my $class_name (sort keys %objects_by_class) {
my $class_object = $class_name->__meta__;
my @property_names =
map { $_->property_name }
grep { $_->column_name }
$class_object->all_property_metas;
for my $object (@{ $objects_by_class{$class_name} }) {
$object->{db_saved_uncommitted} ||= {};
my $db_saved_uncommitted = $object->{db_saved_uncommitted};
for my $property ( @property_names ) {
$db_saved_uncommitted->{$property} = $object->$property;
}
}
}
return 1;
}
sub _set_all_objects_saved_committed {
# called by UR::DBI on commit
my $self = shift;
return $self->_set_specified_objects_saved_committed([ $self->_get_current_entities ]);
}
sub _set_all_specified_objects_saved_committed {
my $self = shift;
my($pkg, $file, $line) = caller;
Carp::carp("Deprecated method _set_all_specified_objects_saved_committed called at file $file line $line. The new name for this method is _set_specified_objects_saved_committed");
my @changed_objects = @_;
$self->_set_specified_objects_saved_committed(\@changed_objects);
}
sub _set_specified_objects_saved_committed {
my $self = shift;
my $objects = shift;
# Two step process... set saved and committed, then fire commit observers.
# Doing so prevents problems should any of the observers themselves commit.
my @saved_objects;
for my $obj (@$objects) {
my $saved = $self->_set_object_saved_committed($obj);
push @saved_objects, $saved if $saved;
}
for my $obj (@saved_objects) {
next if $obj->isa('UR::DeletedRef');
$obj->__signal_change__('commit');
if ($obj->isa('UR::Object::Ghost')) {
$UR::Context::current->_abandon_object($obj);
}
}
return scalar(@$objects) || "0 but true";
}
sub _set_object_saved_committed {
# called by the above, and some test cases
my ($self, $object) = @_;
if ($object->{db_saved_uncommitted}) {
unless ($object->isa('UR::Object::Ghost')) {
%{ $object->{db_committed} } = (
($object->{db_committed} ? %{ $object->{db_committed} } : ()),
%{ $object->{db_saved_uncommitted} }
);
delete $object->{db_saved_uncommitted};
}
return $object;
}
else {
return;
}
}
sub _set_all_objects_saved_rolled_back {
# called by UR::DBI on commit
my $self = shift;
my @objects = $self->_get_current_entities;
for my $obj (@objects) {
unless ($self->_set_object_saved_rolled_back($obj)) {
die "An error occurred setting " . $obj->__display_name__
. " to match the rolled-back database state. Exiting...";
}
}
}
sub _set_specified_objects_saved_rolled_back {
my $self = shift;
my $objects = shift;
for my $obj (@$objects) {
unless ($self->_set_object_saved_rolled_back($obj)) {
die "An error occurred setting " . $obj->__display_name__
. " to match the rolled-back database state. Exiting...";
}
}
}
sub _set_object_saved_rolled_back {
# called by the above, and some test cases
my ($self,$object) = @_;
delete $object->{db_saved_uncommitted};
return $object;
}
# These are part of the basic DataSource API. Subclasses will want to override these
sub _sync_database {
my $class = shift;
my %args = @_;
$class = ref($class) || $class;
$class->warning_message("Data source $class does not support saving objects to storage. " .
scalar(@{$args{'changed_objects'}}) . " objects will not be saved");
return 1;
}
sub commit {
my $class = shift;
my %args = @_;
$class = ref($class) || $class;
#$class->warning_message("commit() ignored for data source $class");
return 1;
}
sub rollback {
my $class = shift;
my %args = @_;
$class = ref($class) || $class;
$class->warning_message("rollback() ignored for data source $class");
return 1;
}
# When the class initializer is create property objects, it will
# auto-fill-in column_name if the class definition has a table_name.
# File-based data sources do not have tables (and so classes using them
# do not have table_names), but the properties still need column_names
# so loading works properly.
# For now, only UR::DataSource::File and ::FileMux set this.
# FIXME this method's existence is ugly. Find a better way to fill in
# column_name for those properties, or fix the data sources to not
# require column_names to be set by the initializer
sub initializer_should_create_column_name_for_class_properties {
return 0;
}
# Subclasses should override this.
# It's called by the class initializer when the data_source property in a class
# definition contains a hashref with an 'is' key. The method should accept this
# hashref, create a data_source instance (if appropriate) and return the class_name
# of this new datasource.
sub create_from_inline_class_data {
my ($class,$class_data,$ds_data) = @_;
my %ds_data = %$ds_data;
my $ds_class_name = delete $ds_data{is};
unless (my $ds_class_meta = UR::Object::Type->get($ds_class_name)) {
die "No class $ds_class_name found!";
}
my $ds = $ds_class_name->__define__(%ds_data);
unless ($ds) {
die "Failed to construct $ds_class_name: " . $ds_class_name->error_message();
}
return $ds;
}
sub ur_data_type_for_data_source_data_type {
my($class,$type) = @_;
return [undef,undef]; # The default that should give reasonable behavior
}
# prepare_for_fork, do_after_fork_in_child, and finish_up_after_fork are no-op
# here in the UR::DataSource base class and should be implented in subclasses
# as needed.
sub prepare_for_fork { return 1 }
sub do_after_fork_in_child { return 1 }
sub finish_up_after_fork { return 1 }
sub _resolve_owner_and_table_from_table_name {
my($self, $table_name) = @_;
# Basic data sources don't know about owners/schemas
return (undef, $table_name);
}
sub _resolve_table_and_column_from_column_name {
my($self, $column_name) = @_;
# Basic data sources don't know about tables
return (undef,$column_name);
}
1;