#!/usr/bin/env perl # PODNAME: clust2mapper.pl # ABSTRACT: Build id mapper from UCLUST/CD-HIT clusters for tree formatting use Modern::Perl '2011'; use autodie; use Getopt::Euclid qw(:vars); use Smart::Comments; use Bio::FastParsers; use Bio::MUST::Core; use Bio::MUST::Core::Utils qw(change_suffix); my %class_for = ( 'cd-hit' => 'Bio::FastParsers::CdHit', 'uclust' => 'Bio::FastParsers::Uclust', ); my $class = $class_for{ lc $ARGV_engine }; for my $infile (@ARGV_infiles) { ### Processing: $infile my $report = $class->new( file => $infile ); my $mapper = $report->clust_mapper($ARGV_separator); my $outfile = change_suffix($infile, '.idm'); $mapper->store($outfile); } __END__ =pod =head1 NAME clust2mapper.pl - Build id mapper from UCLUST/CD-HIT clusters for tree formatting =head1 VERSION version 0.190900 =head1 USAGE clust2mapper.pl <infiles> --engine=<pgm> [optional arguments] =head1 REQUIRED ARGUMENTS =over =item <infiles> Path to input UCLUST/CD-HIT cluster files [repeatable argument]. =for Euclid: infiles.type: readable repeatable =item --engine=<pgm> Engine used to generate the clusters. The following programs are available: cd-hit and uclust. =for Euclid: pgm.type: /cd-hit|uclust/ pgm.type.error: <pgm> must be one of cd-hit or uclust (not pgm) =back =head1 OPTIONAL ARGUMENTS =over =item --sep[arator]=<str> Separator used to join members ids for each cluster [default: '/']. =for Euclid: str.type: string str.default: '/' =item --version =item --usage =item --help =item --man Print the usual program information =back =head1 AUTHOR Denis BAURAIN <denis.baurain@uliege.be> =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut