lib/Gedcom.pm - metacpan.org


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
—
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
              # Copyright 1998-2019, Paul Johnson (paul@pjcj.net)
# This software is free.  It is licensed under the same terms as Perl itself.
# The latest version of this software should be available from my homepage:
# http://www.pjcj.net
# documentation at __END__
use strict;
require 5.005;
package Gedcom;
use Carp;
use Data::Dumper;
use FileHandle;
BEGIN { eval "use Text::Soundex" }           # We'll use this if it is available
use vars qw($VERSION $AUTOLOAD %Funcs);
my $Tags;
my %Top_tag_order;
BEGIN {
    $VERSION = "1.22";
    $Tags = {
        ABBR => "Abbreviation",
        ADDR => "Address",
        ADOP => "Adoption",
        ADR1 => "Address1",
        ADR2 => "Address2",
        AFN  => "Afn",
        AGE  => "Age",
        AGNC => "Agency",
        ALIA => "Alias",
        ANCE => "Ancestors",
        ANCI => "Ances Interest",
        ANUL => "Annulment",
        ASSO => "Associates",
        AUTH => "Author",
        BAPL => "Baptism-LDS",
        BAPM => "Baptism",
        BARM => "Bar Mitzvah",
        BASM => "Bas Mitzvah",
        BIRT => "Birth",
        BLES => "Blessing",
        BLOB => "Binary Object",
        BURI => "Burial",
        CALN => "Call Number",
        CAST => "Caste",
        CAUS => "Cause",
        CENS => "Census",
        CHAN => "Change",
        CHAR => "Character",
        CHIL => "Child",
        CHR  => "Christening",
        CHRA => "Adult Christening",
        CITY => "City",
        CONC => "Concatenation",
        CONF => "Confirmation",
        CONL => "Confirmation L",
        CONT => "Continued",
        COPR => "Copyright",
        CORP => "Corporate",
        CREM => "Cremation",
        CTRY => "Country",
        DATA => "Data",
        DATE => "Date",
        DEAT => "Death",
        DESC => "Descendants",
        DESI => "Descendant Int",
        DEST => "Destination",
        DIV  => "Divorce",
        DIVF => "Divorce Filed",
        DSCR => "Phy Description",
        EDUC => "Education",
        EMIG => "Emigration",
        ENDL => "Endowment",
        ENGA => "Engagement",
        EVEN => "Event",
        FAM  => "Family",
        FAMC => "Family Child",
        FAMF => "Family File",
        FAMS => "Family Spouse",
        FCOM => "First Communion",
        FILE => "File",
        FORM => "Format",
        GEDC => "Gedcom",
        GIVN => "Given Name",
        GRAD => "Graduation",
        HEAD => "Header",
        HUSB => "Husband",
        IDNO => "Ident Number",
        IMMI => "Immigration",
        INDI => "Individual",
        LANG => "Language",
        LEGA => "Legatee",
        MARB => "Marriage Bann",
        MARC => "Marr Contract",
        MARL => "Marr License",
        MARR => "Marriage",
        MARS => "Marr Settlement",
        MEDI => "Media",
        NAME => "Name",
        NATI => "Nationality",
        NATU => "Naturalization",
        NCHI => "Children_count",
        NICK => "Nickname",
        NMR  => "Marriage_count",
        NOTE => "Note",
        NPFX => "Name_prefix",
        NSFX => "Name_suffix",
        OBJE => "Object",
        OCCU => "Occupation",
        ORDI => "Ordinance",
        ORDN => "Ordination",
        PAGE => "Page",
        PEDI => "Pedigree",
        PHON => "Phone",
        PLAC => "Place",
        POST => "Postal_code",
        PROB => "Probate",
        PROP => "Property",
        PUBL => "Publication",
        QUAY => "Quality Of Data",
        REFN => "Reference",
        RELA => "Relationship",
        RELI => "Religion",
        REPO => "Repository",
        RESI => "Residence",
        RESN => "Restriction",
        RETI => "Retirement",
        RFN  => "Rec File Number",
        RIN  => "Rec Id Number",
        ROLE => "Role",
        SEX  => "Sex",
        SLGC => "Sealing Child",
        SLGS => "Sealing Spouse",
        SOUR => "Source",
        SPFX => "Surn Prefix",
        SSN  => "Soc Sec Number",
        STAE => "State",
        STAT => "Status",
        SUBM => "Submitter",
        SUBN => "Submission",
        SURN => "Surname",
        TEMP => "Temple",
        TEXT => "Text",
        TIME => "Time",
        TITL => "Title",
        TRLR => "Trailer",
        TYPE => "Type",
        VERS => "Version",
        WIFE => "Wife",
        WILL => "Will",
    };
    %Top_tag_order = (
        HEAD => 1,
        SUBM => 2,
        INDI => 3,
        FAM  => 4,
        NOTE => 5,
        REPO => 6,
        SOUR => 7,
        TRLR => 8,
    );
    while (my ($tag, $name) = each (%$Tags)) {
        $Funcs{$tag} = $Funcs{lc $tag} = $tag;
        if ($name) {
            $name =~ s/ /_/g;
            $Funcs{lc $name} = $tag;
        }
    }
}
sub DESTROY {}
sub AUTOLOAD {
    my ($self) = @_;  # don't change @_ because of the goto
    my $func = $AUTOLOAD;
    # print "autoloading $func\n";
    $func =~ s/^.*:://;
    my $tag;
    croak "Undefined subroutine $func called"
    if $func !~ /^(add|get)_(.*)$/ ||
    !($tag = $Funcs{lc $2}) ||
    !exists $Top_tag_order{$tag};
    no strict "refs";
    if ($1 eq "add") {
        *$func = sub {
            my $self = shift;
            my ($arg, $val) = @_;
            my $xref;
            if (ref $arg) {
                $xref = $arg->{xref};
            } else {
                $val = $arg;
            }
            my $record = $self->add_record(tag => $tag, val => $val);
            if (defined $val && $tag eq "NOTE") {
                $record->{value} = $val;
            }
            $xref = $tag eq "SUBM" ? "SUBM" : substr $tag, 0, 1
            unless defined $xref;
            unless ($tag =~ /^(HEAD|TRLR)$/) {
                croak "Invalid xref $xref requested in $func"
                unless $xref =~ /^[^\W\d_]+(\d*)$/;
                $xref = $self->next_xref($xref) unless length $1;
                $record->{xref} = $xref;
                $self->{xrefs}{$xref} = $record;
            }
            $record
        };
    } else {
        *$func = sub {
            my $self   = shift;
            my ($xref) = @_;
            my $nxr    = !defined $xref;
            my @a = grep { $_->{tag} eq $tag && ($nxr || $_->{xref} eq $xref) }
                         @{$self->{record}->_items};
            wantarray ? @a : $a[0]
        };
    }
    goto &$func
}
use Gedcom::Grammar    1.22;
use Gedcom::Individual 1.22;
use Gedcom::Family     1.22;
use Gedcom::Event      1.22;
sub new {
    my $proto = shift;
    my $class = ref($proto) || $proto;
    @_ = (gedcom_file => @_) if @_ == 1;
    my $self = {
        records   => [],
        tags      => $Tags,
        read_only => 0,
        types     => {},
        xrefs     => {},
        encoding  => "ansel",
        @_
    };
    # TODO - find a way to do this nicely for different grammars
    $self->{types}{INDI} = "Individual";
    $self->{types}{FAM}  = "Family";
    $self->{types}{$_}   = "Event" for qw(
        ADOP ANUL BAPM BARM BASM BIRT BLES BURI CAST CENS CENS CHR CHRA CONF
        CREM DEAT DIV DIVF DSCR EDUC EMIG ENGA EVEN EVEN FCOM GRAD IDNO IMMI
        MARB MARC MARL MARR MARS NATI NATU NCHI NMR OCCU ORDN PROB PROP RELI
        RESI RETI SSN WILL
    );
    bless $self, $class;
    # first read in the grammar
    my $grammar;
    if (defined $self->{grammar_file}) {
        my $version;
        if (defined $self->{grammar_version}) {
            $version = $self->{grammar_version};
        } else {
            ($version) = $self->{grammar_file} =~ /(\d+(\.\d+)*)/;
        }
        die "version must be a GEDCOM version number\n" unless $version;
        return undef unless
        $grammar = Gedcom::Grammar->new(
            file     => $self->{grammar_file},
            version  => $version,
            callback => $self->{callback}
        );
    } else {
        $self->{grammar_version} = 5.5 unless defined $self->{grammar_version};
        (my $v = $self->{grammar_version}) =~ tr/./_/;
        my $g = "Gedcom::Grammar_$v";
        eval "use $g $VERSION";
        die $@ if $@;
        no strict "refs";
        return undef unless $grammar = ${$g . "::grammar"};
    }
    my @c = ($self->{grammar} = $grammar);
    while (@c) {
        @c = map { $_->{top} = $grammar; @{$_->{items}} } @c;
    }
    # now read in or create the GEDCOM file
    return undef unless
    my $r = $self->{record} = Gedcom::Record->new(
        defined $self->{gedcom_file} ? (file => $self->{gedcom_file}) : (),
        line     => 0,
        tag      => "GEDCOM",
        grammar  => $grammar->structure("GEDCOM"),
        gedcom   => $self,
        callback => $self->{callback},
    );
    unless (defined $self->{gedcom_file}) {
    # Add the required elements, unless they are already there.
        unless ($r->get_record("head")) {
            my $me = "Unknown user";
            my $login = $me;
            if ($login = getlogin || (getpwuid($<))[0] ||
                         $ENV{USER} || $ENV{LOGIN}) {
                my $name;
                eval { $name = (getpwnam($login))[6] };
                $me = $name || $login;
            }
            my $date = localtime;
            my ($l0, $l1, $l2, $l3);
            $l0 = $self->add_header;
                $l1 = $l0->add("SOUR", "Gedcom.pm");
                $l1->add("NAME", "Gedcom.pm");
                $l1->add("VERS", $VERSION);
                    $l2 = $l1->add("CORP", "Paul Johnson");
                    $l2->add("ADDR", "http://www.pjcj.net");
                    $l2 = $l1->add("DATA");
                        $l3 = $l2->add(
                            "COPR",
                            'Copyright 1998-2019, Paul Johnson (paul@pjcj.net)'
                        );
                $l1 = $l0->add("NOTE", "");
                for (split /\n/, <<'EOH')
This output was generated by Gedcom.pm.
Gedcom.pm is Copyright 1998-2019, Paul Johnson (paul@pjcj.net)
Version 1.22 - 15th November 2019
Gedcom.pm is free.  It is licensed under the same terms as Perl itself.
The latest version of Gedcom.pm should be available from my homepage:
http://www.pjcj.net
EOH
                {
                    $l1->add("CONT", $_);
                };
                $l1 = $l0->add("GEDC");
                $l1->add("VERS", $self->{grammar}{version});
                $l1->add("FORM", "LINEAGE-LINKED");
            $l0->add("DATE", $date);
            $l0->add("CHAR", uc ($self->{encoding} || "ansel"));
            my $s = $r->get_record("subm");
            unless ($s) {
                $s = $self->add_submitter;
                $s->add("NAME", $me);
            }
            $l0->add("SUBM", $s->xref);
        }
        $self->add_trailer unless $r->get_record("trlr");
    }
    $self->collect_xrefs;
    $self
}
sub set_encoding {
    my $self = shift;
    ($self->{encoding}) = @_;
}
sub write {
    my $self  = shift;
    my $file  = shift or die "No filename specified";
    my $flush = shift;
    $self->{fh} = FileHandle->new($file, "w") or die "Can't open $file: $!";
    binmode $self->{fh}, ":encoding(UTF-8)"
        if $self->{encoding} eq "utf-8" && $] >= 5.8;
    $self->{record}->write($self->{fh}, -1, $flush);
    $self->{fh}->close or die "Can't close $file: $!";
}
sub write_xml {
    my $self = shift;
    my $file = shift or die "No filename specified";
    $self->{fh} = FileHandle->new($file, "w") or die "Can't open $file: $!";
    binmode $self->{fh}, ":encoding(UTF-8)"
        if $self->{encoding} eq "utf-8" && $] >= 5.8;
    $self->{fh}->print(<<'EOH');
<?xml version="1.0" encoding="utf-8"?>
<!--
This output was generated by Gedcom.pm.
Gedcom.pm is Copyright 1998-2019, Paul Johnson (paul@pjcj.net)
Version 1.22 - 15th November 2019
Gedcom.pm is free.  It is licensed under the same terms as Perl itself.
The latest version of Gedcom.pm should be available from my homepage:
http://www.pjcj.net
EOH
    $self->{fh}->print("Generated on " . localtime() . "\n\n-->\n\n");
    $self->{record}->write_xml($self->{fh});
    $self->{fh}->close or die "Can't close $file: $!";
}
sub add_record {
    my $self = shift;
    $self->{record}->add_record(@_);
}
sub collect_xrefs {
    my $self = shift;
    my ($callback) = @_;
    $self->{xrefs} = {};
    $self->{record}->collect_xrefs($callback);
}
sub resolve_xref {
    my $self = shift;;
    my ($x) = @_;
    my $xref;
    $xref = $self->{xrefs}{$x =~ /^\@(.+)\@$/ ? $1 : $x} if defined $x;
    $xref
}
sub resolve_xrefs {
    my $self = shift;
    my ($callback) = @_;
    $self->{record}->resolve_xrefs($callback);
}
sub unresolve_xrefs {
    my $self = shift;
    my ($callback) = @_;
    $self->{record}->unresolve_xrefs($callback);
}
sub validate {
    my $self = shift;
    my ($callback) = @_;
    $self->{validate_callback} = $callback;
    my $ok = $self->{record}->validate_syntax;
    for my $item (@{$self->{record}->_items}) {
        $ok = 0 unless $item->validate_semantics;
    }
    $ok
}
sub normalise_dates {
    my $self = shift;
    $self->{record}->normalise_dates(@_);
}
sub renumber {
    my $self = shift;
    my (%args) = @_;
    $self->resolve_xrefs;
    # initially, renumber any records passed in
    for my $xref (@{$args{xrefs}}) {
        $self->{xrefs}{$xref}->renumber(\%args, 1)
            if exists $self->{xrefs}{$xref};
    }
    # now, renumber any records left over
    $_->renumber(\%args, 1) for @{$self->{record}->_items};
    # actually change the xref
    for my $record (@{$self->{record}->_items}) {
        $record->{xref} = delete $record->{new_xref};
        delete $record->{recursed}
    }
    # and update the xrefs
    $self->collect_xrefs;
    %args
}
sub sort_sub {
    # subroutine to sort on tag order first, and then on xref
    my $t = sub {
        my ($r) = @_;
        return -2 unless defined $r->{tag};
        exists $Top_tag_order{$r->{tag}} ? $Top_tag_order{$r->{tag}} : -1
    };
    my $x = sub {
        my ($r) = @_;
        return -2 unless defined $r->{xref};
        $r->{xref} =~ /(\d+)/;
        defined $1 ? $1 : -1
    };
    sub {
        $t->($a) <=> $t->($b)
        ||
        $x->($a) <=> $x->($b)
    }
}
sub order {
    my $self     = shift;
    my $sort_sub = shift || sort_sub;   # use default sort unless one passed in
    @{$self->{record}{items}} = sort $sort_sub @{$self->{record}->_items}
}
sub items {
    my $self = shift;
    @{$self->{record}->_items}
}
sub heads        { grep $_->tag eq "HEAD",           shift->items }
sub submitters   { grep $_->tag eq "SUBM",           shift->items }
sub individuals  { grep ref eq "Gedcom::Individual", shift->items }
sub families     { grep ref eq "Gedcom::Family",     shift->items }
sub notes        { grep $_->tag eq "NOTE",           shift->items }
sub repositories { grep $_->tag eq "REPO",           shift->items }
sub sources      { grep $_->tag eq "SOUR",           shift->items }
sub trailers     { grep $_->tag eq "TRLR",           shift->items }
sub get_individual {
    my $self = shift;
    my $name = "@_";
    my $all  = wantarray;
    my @i;
    my $i = $self->resolve_xref($name) || $self->resolve_xref(uc $name);
    if ($i) {
        return $i unless $all;
        push @i, $i;
    }
    # search for the name in the specified order
    my $ordered = sub {
        my ($n, @ind) = @_;
        map { $_->[1] } grep { $_ && $_->[0] =~ $n } @ind
    };
    # search for the name in any order
    my $unordered = sub {
        my ($names, $t, @ind) = @_;
        map { $_->[1] } grep {
            my $i = $_->[0];
            my $r = 1;
            for my $n (@$names) {
                # remove matches as they are found
                # we don't want to match the same name twice
                last unless $r = $i =~ s/$n->[$t]//;
            }
            $r
        }
        @ind;
    };
    # look for various matches in decreasing order of exactitude
    my @individuals = $self->individuals;
    # Store the name with the individual to avoid continually recalculating it.
    # This is a bit like a Schwartzian transform, with a grep instead of a sort.
    my @ind =
        map [do { my $n = $_->tag_value("NAME"); defined $n ? $n : "" } => $_],
        @individuals;
    for my $n (map { qr/^$_$/, qr/\b$_\b/, $_ } map { $_, qr/$_/i } qr/\Q$name/)
    {
        push @i, $ordered->($n, @ind);
        return $i[0] if !$all && @i;
    }
    # create an array with one element per name
    # each element is an array of REs in decreasing order of exactitude
    my @names = map [ map { qr/\b$_\b/, $_ } map { qr/$_/, qr/$_/i } "\Q$_" ],
                split / /, $name;
    for my $t (0 .. $#{$names[0]}) {
        push @i, $unordered->(\@names, $t, @ind);
        return $i[0] if !$all && @i;
    }
    # check soundex
    my @sdx = map { my $s = $_->soundex; $s ? [ $s => $_ ] : () } @individuals;
    my $soundex = soundex($name);
    for my $n ( map { qr/$_/ } $name, ($soundex || ()) ) {
        push @i, $ordered->($n, @sdx);
        return $i[0] if !$all && @i;
    }
    return undef unless $all;
    my @s;
    my %s;
    for (@i) {
        unless (exists $s{$_->{xref}}) {
            push @s, $_;
            $s{$_->{xref}}++;
        }
    }
    @s
}
sub next_xref {
    my $self = shift;
    my ($type) = @_;
    my $re = qr/^$type(\d+)$/;
    my $last = 0;
    for my $c (@{$self->{record}->_items}) {
        $last = $1 if defined $c->{xref} and $c->{xref} =~ /$re/ and $1 > $last;
    }
    $type . ++$last
}
sub top_tag {
    my $self = shift;
    my ($tag) = @_;
    $Top_tag_order{$tag}
}
"
But take your time, think a lot
Think of everything you've got
For you will still be here tomorrow
But your dreams may not
"
__END__
=head1 NAME
Gedcom - a module to manipulate GEDCOM genealogy files
Version 1.22 - 15th November 2019
=head1 SYNOPSIS
  use Gedcom;
  my $ged = Gedcom->new;
  my $ged = Gedcom->new($gedcom_file);
  my $ged = Gedcom->new(grammar_version => "5.5.1",
                        gedcom_file     => $gedcom_file,
                        read_only       => 1,
                        callback        => $cb);
  my $ged = Gedcom->new(grammar_file => "gedcom-5.5.grammar",
                        gedcom_file  => $gedcom_file);
  return unless $ged->validate;
  my $xref = $self->resolve_xref($value);
  $ged->resolve_xrefs;
  $ged->unresolve_xrefs;
  $ged->normalise_dates;
  my %xrefs = $ged->renumber;
  $ged->order;
  $ged->set_encoding("utf-8");
  $ged->write($new_gedcom_file, $flush);
  $ged->write_xml($new_xml_file);
  my @individuals = $ged->individuals;
  my @families = $ged->families;
  my $me = $ged->get_individual("Paul Johnson");
  my $xref = $ged->next_xref("I");
  my $record = $ged->add_header;
                     add_submitter
                     add_individual
                     add_family
                     add_note
                     add_repository
                     add_source
                     add_trailer
  my $source = $ged->get_source("S1");
=head1 DESCRIPTION
This module provides for manipulation of GEDCOM files.  GEDCOM is a format for
storing genealogical information designed by The Church of Jesus Christ of
Latter-Day Saints (http://www.lds.org).  Information about GEDCOM used to be
available as a zip file at ftp://gedcom.org/pub/genealogy/gedcom/gedcom55.zip.
That may still be the case, but it seems to be password protected now.
However, the document in that archive seems to be available in a somewhat more
accessible format at
https://chronoplexsoftware.com/gedcomvalidator/gedcom/gedcom-5.5.pdf.
Requirements:
  Perl 5.005 or later
  ActivePerl5 Build Number 520 or later has been reported to work
Optional Modules:
  Date::Manip.pm       to work with dates
  Text::Soundex.pm     to use soundex
  Parse::RecDescent.pm to use lines2perl
  Roman.pm             to use the LifeLines function roman from lines2perl
The GEDCOM format is specified in a grammar file (gedcom-5.5.grammar).
Gedcom.pm parses the grammar which is then used to validate and allow
manipulation of the GEDCOM file.  I have only used Gedcom.pm with versions 5.5
and 5.5.1 of the GEDCOM grammar, which I had to modify slightly to correct a
few errors.  The advantage of this approach is that Gedcom.pm should be useful
if the GEDCOM grammar is ever updated.  It also made the software easier to
write, and probably more dependable too.  I suppose this is the virtue of
laziness shining through.
The vice of laziness is also shining brightly - I need to document how to use
this module in much greater detail.  This is happening - this release has more
documentation than the previous ones - but if you would like information feel
free to send me mail or better still, ask on the mailing list.
This module provides some functions which work over the entire GEDCOM file,
such as reformatting dates, renumbering entries and ordering the entries.  It
also allows access to individuals, and then to relations of individuals, for
example sons, siblings, spouse, parents and so forth.
The distribution includes a lines2perl program to convert LifeLines programs to
Perl.  The program works, but it has a few rough edges, and some missing
functionality.  I'll be working on it when it hits the top of my TODO list.
There is now an option for read only access to the GEDCOM file.  Actually, this
doesn't stop you changing or writing the file, but it does parse the GEDCOM
file lazily, meaning that only those portions of the GEDCOM file which are
needed will be read.  This can provide a substantial saving of time and memory
providing that not too much of the GEDCOM file is read.  If you are going to
read the whole GEDCOM file, this mode is less efficient unless you do some
manual housekeeping.
Should you find this software useful, or if you make changes to it, or if you
would like me to make changes to it, please send me mail.  I would like to have
some sort of an idea of the use this software is getting.  Apart from being of
interest to me, this will guide my decisions when I feel the need to make
changes to the interface.
There is a low volume mailing list available for discussing the use of Perl in
conjunction with genealogical work.  This is an appropriate forum for
discussing Gedcom.pm and if you use or are interested in this module I would
encourage you to join the list.  To subscribe send an empty message to
S<perl-gedcom-subscribe@perl.org>.
To store my genealogy I wrote a syntax file (gedcom.vim) and used vim
(http://www.vim.org) to enter the data, and Gedcom.pm to validate and
manipulate it.  I find this to be a nice solution.
=head1 GETTING STARTED
This space is reserved for something of a tutorial.  If you learn best by
looking at examples, take a look at the test directory, I<t>.  The most simple
test is I<birthdates.t>.
The first thing to do is to read in the GEDCOM file.  At its most simple, this
will involve a statement such as
  my $ged = Gedcom->new($gedcom_file);
It is now possible to access the records within the GEDCOM file.  Each
individual and family is a record.  Records can contain other records.  For
example, an individual is a record.  The birth information is a sub-record of
the individual, and the date of birth is a sub-record of the birth record.
Some records, such as the birth record, are simply containers for other
records.  Some records have a value, such as the date record, whose value is a
date.  This is all defined in the GEDCOM standard.
To access an individual use a statement such as
  my $i = $ged->get_individual("Paul Johnson");
To access information about the individual, use a function of the same name as
the GEDCOM tag, or its description.  Tags and descriptions are listed at the
head of Gedcom.pm.  For example
  for my $b ($i->birth) {
  }
will loop through all the birth records in the individual.  Usually there will
only be one such record, but there may be zero, one or more.  Calling the
function in scalar context will return only the first record.
  my $b = $i->birth;
But the second record may be returned with
  my $b = $i->birth(2);
If the record required has a value, for example
  my $n = $i->name;
then the value is returned, in this case the name of the individual.  If there
is no value, as is the case for the birth record, then the record itself is
returned.  If there is a value, but the record itself is required, then the
get_record() function can be used.
Information must be accessed through the GEDCOM structure so, for example, the
birthdate is accessed via the date record from the birth record within an
individual.
  my $d = $b->date;
Be aware that if you access a record in scalar context, but there is no such
record, then undef is returned.  In this case, $d would be undef if $b had no
date record.  This is another reason why looping through records is a nice
solution, all else being equal.
Access to values can also be gained through the get_value() function.  This is
a preferable solution where it is necessary to work down the GEDCOM structure.
For example
  my $bd = $i->get_value("birth date");
  my $bd = $i->get_value(qw(birth date));
will both return an individual's birth date or undef if there is none.  And
  my @bd = $i->get_value("birth date");
will return all the birth dates.  The second birth date, if there is one, is
  my $bd2 = $i->get_value(["birth", 2], "date");
Using the get_record() function in place of the get_value() function, in all
cases will return the record rather than the value.
All records are of a type derived from Gedcom::Item.  Individuals are of type
Gedcom::Individual.  Families are of type Gedcom::Family.  Events are of type
Gedcom::Event.  Other records are of type Gedcom::Record which is the base type
of Gedcom::Individual, Gedcom::Family and Gedcom::Event.
As individuals are of type Gedcom::Individual, the functions in
Gedcom::Individual.pm are available.  These allow access to relations and other
information specific to individuals, for example
  my @sons = $i->sons;
It is possible to get all the individuals in the GEDCOM file as
  my @individuals = $ged->individuals;
So putting everything together, here is a little program which will print out
the names and birthdates of everyone in a GEDCOM file specified on the command
line.
  #!/bin/perl -w
  use strict;
  use Gedcom;
  my $ged = Gedcom->new(shift);
  for my $i ($ged->individuals) {
      for my $bd ($i->get_value("birth date")) {
          print $i->name, " was born on $bd\n";
      }
  }
=head1 HASH MEMBERS
I have not gone the whole hog with data encapsulation and such within this
module.  Maybe I should have done.  Maybe I will.  For now though, the data is
accessible though hash members.  This is partly because having functions to do
this is a little slow, especially on my old DECstation, and partly because of
laziness again.  I'm not too sure whether this is good or bad laziness yet.
Time will tell no doubt.
As of version 1.05, you should be able to access all the data through
functions, and as of version 1.10 write access is available.  I have a faster
machine now.
Some of the more important hash members are:
=head2 $ged->{grammar}
This contains the GEDCOM grammar.
See Gedcom::Grammar.pm for more details.
=head2 $ged->{record}
This contains the top level gedcom record.  A record contains a number of
items.  Each of those items are themselves records.  This is the way in which
the hierarchies are modelled.
If you want to get at the data in the gedcom object, this is where you start.
See Gedcom::Record.pm for more details.
=head1 METHODS
=head2 new
  my $ged = Gedcom->new;
  my $ged = Gedcom->new($gedcom_file);
  my $ged = Gedcom->new(grammar_version => "5.5.1",
                        gedcom_file     => $gedcom_file,
                        read_only       => 1,
                        callback        => $cb);
  my $ged = Gedcom->new(grammar_file => "gedcom-5.5.grammar",
                        gedcom_file  => $gedcom_file);
Create a new gedcom object.
gedcom_file is the name of the GEDCOM file to parse.  If you do not supply a
gedcom_file parameter then you will get an empty Gedcom object, empty that is
apart from a few mandatory records.
You may optionally pass grammar_version as the version number of the GEDCOM
grammar you want to use.  There are two versions available, 5.5 and 5.5.1.  If
you do not specify a grammar version, you may specify a grammar file as
grammar_file.  Usually, you will do neither of these, and in this case the
grammar version will default to the latest full available version, currently
5.5.  5.5.1 is only a draft, but it is available if you specify it.
The read_only parameter indicates that the Gedcom data structure will be used
primarily for read_only operations.  In this mode the GEDCOM file is read
lazily, such that whenever possible the Gedcom records are not read until they
are needed.  This can save on both memory and CPU usage, provided that not too
much of the GEDCOM file is needed.  If the whole of the GEDCOM file needs to be
read, for example to validate it, or to write it out in a different format,
then this option should not be used.
When using the read_only option an index file is kept which can also speed up
operations.  It's usage should be transparent, but will require write access to
the directory containing the GEDCOM file.  If you access individuals only by
their xref (eg I20) then the index file will allow only the relevant parts of
the GEDCOM file to be read.
With or without the read_only option, the GEDCOM file is accessed in the same
fashion and the data structures can be changed.  In this respect, the name
read_only is not particularly accurate, but since changing the Gedcom data will
generally mean that the data will be written which means that the data will
first be read, the read_only option is generally useful when the data will not
be written and when not all the data will be read.  You may find it useful to
experiment with this option and check the amount of CPU time and memory that
your application uses.  You may also need to read this paragraph a few times to
understand it.  Sorry.
callback is an optional reference to a subroutine which will be called at
various times while the GEDCOM file (and the grammar file, if applicable) is
being read.  Its purpose is to provide feedback during potentially long
operations.  The subroutine is called with five arguments:
  my ($title, $txt1, $txt2, $current, $total) = @_;
  $title is a brief description of the current operation
  $txt1 and $txt2 provide more information on the current operation
  $current is the number of operations performed
  $total is the number of operations that need to be performed
If the subroutine returns false, the operation is aborted.
=head2 set_encoding
  $ged->set_encoding("utf-8");
Valid arguments are "ansel" and "utf-8".  Defaults to "ansel" but is set to
"utf-8" if the GEDCOM data was read from a file which was deemed to contain
UTF-8, either due to the presence of a BOM or as specified by a CHAR item.
Set the encoding for the GEDCOM file.  Calling this directly doesn't alter the
CHAR item, but does affect the way in which files are written.
=head2 write
  $ged->write($new_gedcom_file, $flush);
Write out the GEDCOM file.
Takes the name of the new GEDCOM file, and whether or not to indent the output
according to the level of the record.  $flush defaults to false, but the new
file name must be specified.
=head2 write_xml
  $ged->write_xml($new_xml_file);
Write the GEDCOM file as XML.
Takes the name of the new GEDCOM file.
Note that this function is experimental.  The XML output doesn't conform to any
standard; it's just me trying to turn the GEDCOM format into sensible XML.
=head2 collect_xrefs
  $ged->collect_xrefs($callback);
Collect all the xrefs into a data structure ($ged->{xrefs}) for easy location.
$callback is not used yet.
Called by new().
=head2 resolve_xref
  my $xref = $self->resolve_xref($value);
Return the record $value points to, or undef.
=head2 resolve_xrefs
  $ged->resolve_xrefs($callback);
Changes all xrefs to reference the record they are pointing to.  Like changing
a soft link to a hard link on a Unix filesystem.  $callback is not used yet.
=head2 unresolve_xrefs
  $ged->unresolve_xrefs($callback);
Changes all xrefs to name the record they contained.  Like changing a hard link
to a soft link on a Unix filesystem.  $callback is not used yet.
=head2 validate
  return unless $ged->validate($callback);
Validate the Gedcom object.  This performs a number of consistency checks, but
could do even more.  $callback is not properly used yet.
Any errors found are given out as warnings.  If this is unwanted, use
$SIG{__WARN__} to catch the warnings.
Returns true iff the Gedcom object is valid.
=head2 normalise_dates
  $ged->normalise_dates;
  $ged->normalise_dates("%A, %E %B %Y");
Change all recognised dates into a consistent format.  This routine uses
Date::Manip to do the work, so you can look at its documentation regarding
formats that are recognised and % sequences for the output.
Optionally takes a format to use for the output.  The default is currently
"%A, %E %B %Y", but I may change this, as it seems that some programs don't
like that format.
=head2 renumber
  $ged->renumber;
  my %xrefs = $ged->renumber(INDI => 34, FAM => 12, xrefs => [$xref1, $xref2]);
Renumber all the records.
Optional parameters are:
  tag name => last used number (defaults to 0)
  xrefs    => list of xrefs to renumber first
As a record is renumbered, it is assigned the next available number.  The
husband, wife, children, parents and siblings are then renumbered in that
order.  This helps to ensure that families are numerically close together.
The hash returned is the updated hash that was passed in.
=head2 sort_sub
  $ged->order($ged->sort_sub);
Default ordering subroutine.
The sort is by record type in the following order: HEAD, SUBM, INDI, FAM, NOTE,
TRLR, and then by xref within the type.
=head2 order
  $ged->order;
  $ged->order($order_sub);
Order all the records.  Optionally provide a sort subroutine.
This orders the entries within the Gedcom object, which will affect the order
in which they are written out.  The default sort function is Gedcom::sort_sub.
You will need to ensure that the HEAD record is first and that the TRLR record
is last.
=head2 individuals
  my @individuals = $ged->individuals;
Return a list of all the individuals.
=head2 families
  my @families = $ged->families;
Return a list of all the families.
=head2 get_individual
  my $me = $ged->get_individual("Paul Johnson");
Return a list of all individuals matching the specified name.
There are thirteen matches performed, in decreasing order of exactitude.  This
means that the more likely matches are at the head of the list.
In scalar context return the first match found.
The matches are:
   1 - Xref
   2 - Exact
   3 - On word boundaries
   4 - Anywhere
   5 - Exact, case insensitive
   6 - On word boundaries, case insensitive
   7 - Anywhere, case insensitive
   8 - Names in any order, on word boundaries
   9 - Names in any order, anywhere
  10 - Names in any order, on word boundaries, case insensitive
  11 - Names in any order, anywhere, case insensitive
  12 - Soundex code
  13 - Soundex of name
=head2 next_xref
  my $xref = $ged->next_xref("I");
Return the next available xref with the specified prefix.
=head2 add_record
       add_header
       add_submitter
       add_individual
       add_family
       add_note
       add_repository
       add_source
       add_trailer
Create and return a new record of the specified type.
Normally you will not want to pass any arguments to the function.  Those
functions which have an xref (ie not header or trailer) accept an optional
first argument { xref => $x } which will use $x as the xref rather than letting
the module automatically choose the xref.
add_note also accepts an optional second argument which is the text to be used
on the first line of the note.
=head2 get_record
       get_header
       get_submitter
       get_family
       get_note
       get_repository
       get_source
       get_trailer
Return all records of the specified type.  In scalar context just return the
first record.  If a parameter is passed in, just return records of that xref.
=head1 LICENCE
Copyright 1998-2019, Paul Johnson (paul@pjcj.net)
This software is free.  It is licensed under the same terms as Perl itself.
The latest version of this software should be available from my homepage:
http://www.pjcj.net
=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)