#!/usr/bin/env perl # PODNAME: tab2mapper.pl # ABSTRACT: Build an id mapper from a tabular file giving annotation strings. # CONTRIBUTOR: Loic MEUNIER <loic.meunier@doct.uliege.be> use Modern::Perl '2011'; use autodie; use Getopt::Euclid qw(:vars); use Smart::Comments; use Bio::MUST::Core; use Bio::MUST::Core::Utils qw(change_suffix); use aliased 'Bio::MUST::Core::IdMapper'; use aliased 'Bio::MUST::Core::Taxonomy'; die <<'EOT' if !$ARGV_taxdir && $ARGV_gi2taxid; Missing required arguments: --taxdir=<dir> EOT # optionally build taxonomy object my $tax; if ($ARGV_taxdir) { $tax = Taxonomy->new_from_cache( tax_dir => $ARGV_taxdir ); } for my $infile (@ARGV_infiles) { ### Processing: $infile my $mapper = $tax->tab_mapper( $infile, { column => $ARGV_column, gi2taxid => $ARGV_gi2taxid, } ); my $outfile = change_suffix($infile, '.idm'); $mapper->store($outfile); } __END__ =pod =head1 NAME tab2mapper.pl - Build an id mapper from a tabular file giving annotation strings. =head1 VERSION version 0.191300 =head1 USAGE tab2mapper.pl <infiles> [optional arguments] =head1 REQUIRED ARGUMENTS =over =item <infiles> Path to input tabular (TSV) files [repeatable argument]. =for Euclid: infiles.type: readable repeatable =back =head1 OPTIONAL ARGUMENTS =over =item --col[umn]=<n> Column number providing the string to be used as the family [default: 1]. The first column (at index 0) is the id and thus cannot be used as the family. =for Euclid: n.type: +int n.default: 1 =item --gi2taxid=<file> Optional GI-to-taxid IDM to be used to expand GIs to modern MUST ids [default: none]. This option requires a local mirror of the NCBI Taxonomy database. =for Euclid: file.type: readable =item --taxdir=<dir> Path to local mirror of the NCBI Taxonomy database. =for Euclid: dir.type: string =item --version =item --usage =item --help =item --man Print the usual program information =back =head1 AUTHOR Denis BAURAIN <denis.baurain@uliege.be> =head1 CONTRIBUTOR =for stopwords Loic MEUNIER Loic MEUNIER <loic.meunier@doct.uliege.be> =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut