package UR::DataSource; use strict; use warnings; require UR; our $VERSION = "0.47"; # UR $VERSION; use Sys::Hostname; { no warnings 'once'; *namespace = \&get_namespace; } UR::Object::Type->define( class_name => 'UR::DataSource', is_abstract => 1, doc => 'A logical database, independent of prod/dev/testing considerations or login details.', has => [ namespace => { calculate_from => ['id'] }, is_connected => { is => 'Boolean', default_value => 0, is_optional => 1, is_transient => 1 }, get_default_handle => { is_calculated => 1, is_constant => 1, doc => 'Underlying handle for this datasource', calculate => '$self->create_default_handle_wrapper', }, ], valid_signals => ['precreate_handle', 'create_handle', 'predisconnect_handle', 'disconnect_handle' ], ); our @CARP_NOT = qw(UR::Context UR::DataSource::QueryPlan); sub define { shift->__define__(@_) } sub get_namespace { my $class = shift->class; return substr($class,0,index($class,"::DataSource")); } sub get_name { my $class = shift->class; return lc(substr($class,index($class,"::DataSource")+14)); } # The default used to be to force table/column/constraint/etc names to # upper case when storing them in the MetaDB, and in the column_name # metadata for properties. The new behavior is to just use whatever the # database supplies us when interrogating the data dictionary. # For datasources/clases that still need the old behavior, override this # to make the column_name metadata for properties forced to upper-case sub table_and_column_names_are_upper_case { 0; } # Basic, dumb data sources do not support joins within a single # query. Instead the Context logic can perform a cross datasource # join within irs own code sub does_support_joins { 0; } # Many data sources do not support limit and offset. sub does_support_limit_offset { #my($self, $query_plan) = @_; 0 } # Most datasources do not support recursive queries # Oracle and Postgres do, but in different ways # For data sources without support, it'll have to do multiple queries # to get all the data sub does_support_recursive_queries { ''; } { no warnings 'once'; *create_dbh = \&create_default_handle_wrapper; } sub create_default_handle_wrapper { my $self = UR::Util::object(shift); $self->__signal_observers__('precreate_handle'); my $h = $self->create_default_handle; $self->__signal_observers__('create_handle', $h); # Hack - This is to avoid infinite recursion in the case where the # handle initializers below try to get the hadle by calling $ds->get_default_handle. # The cached/calculated accessor code will look in this hash key and # return the handle instead of recursing back into the handle creation, and # back to here $self->{get_default_handle} = $h; # Backward compatability for older code that still uses _init_created_dbh if ($self->can('_init_created_dbh')) { $self->_init_created_dbh($h); } else { $self->init_created_handle($h); } return $h; } # basic, dumb datasources do not have a handle sub create_default_handle { undef } sub disconnect { } # derived classes can implement this to do extra initialization after the # handle is created sub init_created_handle { 1; } # Peek into the object and see if there's anything in 'get_default_handle' without actually # creating a handle *has_default_dbh = \&has_default_handle; sub has_default_handle { my $self = UR::Util::object(shift); return exists($self->{get_default_handle}); } *disconnect_default_dbh = \&disconnect_default_handle; sub disconnect_default_handle { my $self = shift; if ($self->has_default_handle) { $self->__signal_observers__('predisconnect_handle'); $self->disconnect(); $self->__signal_observers__('disconnect_handle'); } 1; } our $use_dummy_autogenerated_ids; *use_dummy_autogenerated_ids = \$ENV{UR_USE_DUMMY_AUTOGENERATED_IDS}; sub use_dummy_autogenerated_ids { # This allows the saved SQL from sync database to be comparable across executions. # It also my $class = shift; if (@_) { ($use_dummy_autogenerated_ids) = @_; } $use_dummy_autogenerated_ids ||= 0; # Replace undef with 0 return $use_dummy_autogenerated_ids; } our $last_dummy_autogenerated_id; sub next_dummy_autogenerated_id { unless($last_dummy_autogenerated_id) { my $hostname = hostname(); $hostname =~ /(\d+)/; my $id = $1 ? $1 : 1; $last_dummy_autogenerated_id = ($id * -10_000_000) - ($$ * 1_000); } #limit id to fit within 11 characters ($last_dummy_autogenerated_id) = $last_dummy_autogenerated_id =~ m/(-\d{1,10})/; return --$last_dummy_autogenerated_id; } sub autogenerate_new_object_id_for_class_name_and_rule { my $ds = shift; if (ref $ds) { $ds = ref($ds) . " ID " . $ds->id; } # Maybe we could use next_dummy_autogenerated_id instead? die "Data source $ds did not implement autogenerate_new_object_id_for_class_name_and_rule()"; } # UR::Context needs to know if a data source supports savepoints sub can_savepoint { my $class = ref($_[0]); die "Class $class didn't supply can_savepoint()"; } sub set_savepoint { my $class = ref($_[0]); die "Class $class didn't supply set_savepoint, but can_savepoint is true"; } sub rollback_to_savepoint { my $class = ref($_[0]); die "Class $class didn't supply rollback_to_savepoint, but can_savepoint is true"; } sub _get_class_data_for_loading { my ($self, $class_meta) = @_; my $class_data = $class_meta->{loading_data_cache}; unless ($class_data) { $class_data = $self->_generate_class_data_for_loading($class_meta); } return $class_data; } sub _resolve_query_plan { my ($self, $rule_template) = @_; my $qp = UR::DataSource::QueryPlan->get( rule_template => $rule_template, data_source => $self, ); $qp->_init() unless $qp->_is_initialized; return $qp; } # Child classes can override this to return a different datasource # depending on the rule passed in sub resolve_data_sources_for_rule { return $_[0]; } sub _generate_class_data_for_loading { my ($self, $class_meta) = @_; my $class_name = $class_meta->class_name; my $ghost_class = $class_name->ghost_class; my @all_id_property_names = $class_meta->all_id_property_names(); my @id_properties = $class_meta->id_property_names; my $id_property_sorter = $class_meta->id_property_sorter; my @class_hierarchy = ($class_meta->class_name,$class_meta->ancestry_class_names); my @parent_class_objects = $class_meta->ancestry_class_metas; my $sub_classification_method_name; my ($sub_classification_meta_class_name, $subclassify_by); my @all_properties; my $first_table_name; my %seen; for my $co ( $class_meta, @parent_class_objects ) { next if ($seen{ $co->id })++; my $table_name = $co->table_name || '__default__'; $first_table_name ||= $table_name; $sub_classification_method_name ||= $co->sub_classification_method_name; $sub_classification_meta_class_name ||= $co->sub_classification_meta_class_name; $subclassify_by ||= $co->subclassify_by; my $sort_sub = sub ($$) { return $_[0]->property_name cmp $_[1]->property_name }; push @all_properties, map { [$co, $_, $table_name, 0]} sort $sort_sub UR::Object::Property->get(class_name => $co->class_name); } my $sub_typing_property = $class_meta->subclassify_by; my $class_table_name = $class_meta->table_name; my $class_data = { class_name => $class_name, ghost_class => $class_name->ghost_class, parent_class_objects => [$class_meta->ancestry_class_metas], ## sub_classification_method_name => $sub_classification_method_name, sub_classification_meta_class_name => $sub_classification_meta_class_name, subclassify_by => $subclassify_by, all_properties => \@all_properties, all_id_property_names => [$class_meta->all_id_property_names()], id_properties => [$class_meta->id_property_names], id_property_sorter => $class_meta->id_property_sorter, sub_typing_property => $sub_typing_property, # these seem like they go in the RDBMS subclass, but for now the # "table" concept is stretched to mean any valid structure identifier # within the datasource. first_table_name => $first_table_name, class_table_name => $class_table_name, }; return $class_data; } sub _generate_loading_templates_arrayref { # Each entry represents a table alias in the query. # This accounts for different tables, or multiple occurrances # of the same table in a join, by grouping by alias instead of # table. my $class = shift; my $db_cols = shift; my $obj_joins = shift; my $bxt = shift; use strict; use warnings; my %obj_joins_by_source_alias; if (0) { # ($obj_joins) { my @obj_joins = @$obj_joins; while (@obj_joins) { my $foreign_alias = shift @obj_joins; my $data = shift @obj_joins; for my $foreign_property_name (sort keys %$data) { next if $foreign_property_name eq '-is_required'; my $source_alias = $data->{$foreign_property_name}{'link_alias'}; my $detail = $obj_joins_by_source_alias{$source_alias}{$foreign_alias} ||= {}; # warnings come from the above because we don't have 'link_alias' in filters. my $source_property_name = $data->{$foreign_property_name}{'link_property_name'}; if ($source_property_name) { # join my $links = $detail->{links} ||= []; push @$links, $foreign_property_name, $source_property_name; } if (exists $data->{value}) { # filter my $operator = $data->{operator}; my $value = $data->{value}; my $filter = $detail->{filter} ||= []; my $key = $foreign_property_name; $key .= ' ' . $operator if $operator; push @$filter, $key, $value; } } } } else { #Carp::cluck("no obj joins???"); } my %templates; my $pos = 0; my @templates; my %alias_object_num; for my $col_data (@$db_cols) { my ($class_obj, $prop, $table_alias, $object_num) = @$col_data; unless (defined $object_num) { die "No object num for loading template data?!"; } #Carp::confess() unless $table_alias; my $template = $templates[$object_num]; unless ($template) { $template = { object_num => $object_num, table_alias => $table_alias, data_class_name => $class_obj->class_name, final_class_name => $class_obj->class_name, property_names => [], column_positions => [], id_property_names => undef, id_column_positions => [], id_resolver => undef, # subref }; $templates[$object_num] = $template; $alias_object_num{$table_alias} = $object_num; } push @{ $template->{property_names} }, $prop->property_name; push @{ $template->{column_positions} }, $pos; $pos++; } # remove joins that resulted in no template, such as when it was to a table-less class @templates = grep { $_ } @templates; # Post-process the template objects a bit to get the exact id positions. for my $template (@templates) { my @id_property_names; for my $id_class_name ($template->{data_class_name}, $template->{data_class_name}->inheritance) { my $id_class_obj = UR::Object::Type->get(class_name => $id_class_name); last if @id_property_names = $id_class_obj->id_property_names; } $template->{id_property_names} = \@id_property_names; my @id_column_positions; for my $id_property_name (@id_property_names) { for my $n (0..$#{ $template->{property_names} }) { if ($template->{property_names}[$n] eq $id_property_name) { push @id_column_positions, $template->{column_positions}[$n]; last; } } } $template->{id_column_positions} = \@id_column_positions; if (@id_column_positions == 1) { $template->{id_resolver} = sub { return $_[0][$id_column_positions[0]]; } } elsif (@id_column_positions > 1) { my $class_name = $template->{data_class_name}; $template->{id_resolver} = sub { my $self = shift; return $class_name->__meta__->resolve_composite_id_from_ordered_values(@$self[@id_column_positions]); } } else { Carp::croak("Can't determine which columns will hold the ID property data for class " . $template->{data_class_name} . ". It's ID properties are (" . join(', ', @id_property_names) . ") which do not appear in the class' property list (" . join(', ', @{$template->{'property_names'}}).")"); } my $source_alias = $template->{table_alias}; if (0 and my $join_data_for_source_table = $obj_joins_by_source_alias{$source_alias}) { # there are joins which come from this entity to other entities # as these entities are loaded, remember the individual queries covered by this object returning # NOTE: when we join a <> b, we remember that we've loaded all of the b for a when _a_ loads, not b, # since it's possible that there ar zero of b, and we don't want to perform the query for b my $source_object_num = $template->{object_num}; my $source_class_name = $template->{data_class_name}; my $next_joins = $template->{next_joins} ||= []; for my $foreign_alias (keys %$join_data_for_source_table) { my $foreign_object_num = $alias_object_num{$foreign_alias}; Carp::confess("no alias for $foreign_alias?") if not defined $foreign_object_num; my $foreign_template = $templates[$foreign_object_num]; my $foreign_class_name = $foreign_template->{data_class_name}; my $join_data = $join_data_for_source_table->{$foreign_alias}; my %links = map { $_ ? @$_ : () } $join_data->{links}; my %filters = map { $_ ? @$_ : () } $join_data->{filters}; my @keys = sort (keys %links, keys %filters); my @value_position_source_property; for (my $n = 0; $n < @keys; $n++) { my $key = $keys[$n]; if ($links{$key} and $filters{$key}) { Carp::confess("unexpected same key $key in filters and joins"); } my $source_property_name = $links{$key}; next unless $source_property_name; push @value_position_source_property, $n, $source_property_name; } my $bx = $foreign_class_name->define_boolexpr(map { $_ => $filters{$_} } @keys); my ($bxt, @values) = $bx->template_and_values(); push @$next_joins, [ $bxt->id, \@values, \@value_position_source_property ]; } } } return \@templates; } sub create_iterator_closure_for_rule_template_and_values { my ($self, $rule_template, @values) = @_; my $rule = $rule_template->get_rule_for_values(@values); return $self->create_iterator_closure_for_rule($rule); } sub _reclassify_object_loading_info_for_new_class { my $self = shift; my $loading_info = shift; my $new_class = shift; my $new_info; %$new_info = %$loading_info; foreach my $template_id (keys %$loading_info) { my $target_class_rules = $loading_info->{$template_id}; foreach my $rule_id (keys %$target_class_rules) { my $pos = index($rule_id,'/'); $new_info->{$template_id}->{$new_class . "/" . substr($rule_id,$pos+1)} = 1; } } return $new_info; } sub _get_object_loading_info { my $self = shift; my $obj = shift; my %param_load_hash; if ($obj->{'__load'}) { while( my($template_id, $rules) = each %{ $obj->{'__load'} } ) { foreach my $rule_id ( keys %$rules ) { $param_load_hash{$template_id}->{$rule_id} = $UR::Context::all_params_loaded->{$template_id}->{$rule_id}; } } } return \%param_load_hash; } sub _add_object_loading_info { my $self = shift; my $obj = shift; my $param_load_hash = shift; while( my($template_id, $rules) = each %$param_load_hash) { foreach my $rule_id ( keys %$rules ) { $obj->{'__load'}->{$template_id}->{$rule_id} = $rules->{$rule_id}; } } } # same as add_object_loading_info, but manipulates the data in $UR::Context::all_params_loaded sub _record_that_loading_has_occurred { my $self = shift; my $param_load_hash = shift; while( my($template_id, $rules) = each %$param_load_hash) { foreach my $rule_id ( keys %$rules ) { $UR::Context::all_params_loaded->{$template_id}->{$rule_id} ||= $rules->{$rule_id}; } } } sub _first_class_in_inheritance_with_a_table { # This is called once per subclass and cached in the subclass from then on. my $self = shift; my $class = shift; $class = ref($class) if ref($class); unless ($class) { Carp::confess("No class?"); } my $class_object = $class->__meta__; my $found = ""; for ($class_object, $class_object->ancestry_class_metas) { if ($_->has_direct_table) { $found = $_->class_name; last; } } #eval qq/ # package $class; # sub _first_class_in_inheritance_with_a_table { # return '$found' if \$_[0] eq '$class'; # shift->SUPER::_first_class_in_inheritance_with_a_table(\@_); # } #/; #die "Error setting data in subclass: $@" if $@; return $found; } sub _class_is_safe_to_rebless_from_parent_class { my ($self, $class, $was_loaded_as_this_parent_class) = @_; my $fcwt = $self->_first_class_in_inheritance_with_a_table($class); unless ($fcwt) { Carp::croak("Can't call _class_is_safe_to_rebless_from_parent_class(): Class $class has no parent classes with a table"); } return ($was_loaded_as_this_parent_class->isa($fcwt)); } sub ur_datasource_class_for_dbi_connect_string { my($class, $dsn) = @_; my(undef, $driver) = DBI->parse_dsn($dsn); $driver || Carp::croak("Could not parse DBI driver out of connect string $dsn"); return 'UR::DataSource::'.$driver; } sub _get_current_entities { my $self = shift; my @class_meta = UR::Object::Type->is_loaded( data_source_id => $self->id ); my @objects; for my $class_meta (@class_meta) { next unless $class_meta->generated(); # Ungenerated classes won't have any instances my $class_name = $class_meta->class_name; push @objects, $UR::Context::current->all_objects_loaded($class_name); } return @objects; } sub _prepare_for_lob { }; sub _set_specified_objects_saved_uncommitted { my ($self,$objects_arrayref) = @_; # Sets an objects as though the has been saved but tha changes have not been committed. # This is called automatically by _sync_databases. my %objects_by_class; my $class_name; for my $object (@$objects_arrayref) { $class_name = ref($object); $objects_by_class{$class_name} ||= []; push @{ $objects_by_class{$class_name} }, $object; } for my $class_name (sort keys %objects_by_class) { my $class_object = $class_name->__meta__; my @property_names = map { $_->property_name } grep { $_->column_name } $class_object->all_property_metas; for my $object (@{ $objects_by_class{$class_name} }) { $object->{db_saved_uncommitted} ||= {}; my $db_saved_uncommitted = $object->{db_saved_uncommitted}; for my $property ( @property_names ) { $db_saved_uncommitted->{$property} = $object->$property; } } } return 1; } sub _set_all_objects_saved_committed { # called by UR::DBI on commit my $self = shift; return $self->_set_specified_objects_saved_committed([ $self->_get_current_entities ]); } sub _set_all_specified_objects_saved_committed { my $self = shift; my($pkg, $file, $line) = caller; Carp::carp("Deprecated method _set_all_specified_objects_saved_committed called at file $file line $line. The new name for this method is _set_specified_objects_saved_committed"); my @changed_objects = @_; $self->_set_specified_objects_saved_committed(\@changed_objects); } sub _set_specified_objects_saved_committed { my $self = shift; my $objects = shift; # Two step process... set saved and committed, then fire commit observers. # Doing so prevents problems should any of the observers themselves commit. my @saved_objects; for my $obj (@$objects) { my $saved = $self->_set_object_saved_committed($obj); push @saved_objects, $saved if $saved; } for my $obj (@saved_objects) { next if $obj->isa('UR::DeletedRef'); $obj->__signal_change__('commit'); if ($obj->isa('UR::Object::Ghost')) { $UR::Context::current->_abandon_object($obj); } } return scalar(@$objects) || "0 but true"; } sub _set_object_saved_committed { # called by the above, and some test cases my ($self, $object) = @_; if ($object->{db_saved_uncommitted}) { unless ($object->isa('UR::Object::Ghost')) { %{ $object->{db_committed} } = ( ($object->{db_committed} ? %{ $object->{db_committed} } : ()), %{ $object->{db_saved_uncommitted} } ); delete $object->{db_saved_uncommitted}; } return $object; } else { return; } } sub _set_all_objects_saved_rolled_back { # called by UR::DBI on commit my $self = shift; my @objects = $self->_get_current_entities; for my $obj (@objects) { unless ($self->_set_object_saved_rolled_back($obj)) { die "An error occurred setting " . $obj->__display_name__ . " to match the rolled-back database state. Exiting..."; } } } sub _set_specified_objects_saved_rolled_back { my $self = shift; my $objects = shift; for my $obj (@$objects) { unless ($self->_set_object_saved_rolled_back($obj)) { die "An error occurred setting " . $obj->__display_name__ . " to match the rolled-back database state. Exiting..."; } } } sub _set_object_saved_rolled_back { # called by the above, and some test cases my ($self,$object) = @_; delete $object->{db_saved_uncommitted}; return $object; } # These are part of the basic DataSource API. Subclasses will want to override these sub _sync_database { my $class = shift; my %args = @_; $class = ref($class) || $class; $class->warning_message("Data source $class does not support saving objects to storage. " . scalar(@{$args{'changed_objects'}}) . " objects will not be saved"); return 1; } sub commit { my $class = shift; my %args = @_; $class = ref($class) || $class; #$class->warning_message("commit() ignored for data source $class"); return 1; } sub rollback { my $class = shift; my %args = @_; $class = ref($class) || $class; $class->warning_message("rollback() ignored for data source $class"); return 1; } # When the class initializer is create property objects, it will # auto-fill-in column_name if the class definition has a table_name. # File-based data sources do not have tables (and so classes using them # do not have table_names), but the properties still need column_names # so loading works properly. # For now, only UR::DataSource::File and ::FileMux set this. # FIXME this method's existence is ugly. Find a better way to fill in # column_name for those properties, or fix the data sources to not # require column_names to be set by the initializer sub initializer_should_create_column_name_for_class_properties { return 0; } # Subclasses should override this. # It's called by the class initializer when the data_source property in a class # definition contains a hashref with an 'is' key. The method should accept this # hashref, create a data_source instance (if appropriate) and return the class_name # of this new datasource. sub create_from_inline_class_data { my ($class,$class_data,$ds_data) = @_; my %ds_data = %$ds_data; my $ds_class_name = delete $ds_data{is}; unless (my $ds_class_meta = UR::Object::Type->get($ds_class_name)) { die "No class $ds_class_name found!"; } my $ds = $ds_class_name->__define__(%ds_data); unless ($ds) { die "Failed to construct $ds_class_name: " . $ds_class_name->error_message(); } return $ds; } sub ur_data_type_for_data_source_data_type { my($class,$type) = @_; return [undef,undef]; # The default that should give reasonable behavior } # prepare_for_fork, do_after_fork_in_child, and finish_up_after_fork are no-op # here in the UR::DataSource base class and should be implented in subclasses # as needed. sub prepare_for_fork { return 1 } sub do_after_fork_in_child { return 1 } sub finish_up_after_fork { return 1 } sub _resolve_owner_and_table_from_table_name { my($self, $table_name) = @_; # Basic data sources don't know about owners/schemas return (undef, $table_name); } sub _resolve_table_and_column_from_column_name { my($self, $column_name) = @_; # Basic data sources don't know about tables return (undef,$column_name); } 1;