#!/usr/bin/perl use 5.008 ; use strict ; use warnings ; # Confirmed also for 5.010 use Getopt::Std ; getopts '~2e:n:t:vQ', \my %o ; use Text::CSV_XS ; # Not a core module. use FindBin qw [ $Script ] ; use Term::ANSIColor qw[ :constants color ] ; $Term::ANSIColor::AUTORESET = 1 ; use Encode ;# Encode was first released with perl v5.7.3 $o{e} = decode_utf8 $o{e} if defined $o{e} ; $o{e} //= qw[ \ ] ; # エスケープã—ãŸã„æ–‡å—列ã«ã¤ã‘ã‚‹æ–‡å— $o{t} = decode_utf8 $o{t} if defined $o{t} ; $o{n} = decode_utf8 $o{n} if defined $o{n} ; & rev if $o{'~'} ; & main ; exit 0 ; # 逆æ“作。 TSV -> CSV sub rev ( ) { grep { $_ = quotemeta $_ if defined $_ } ( $o{e} , $o{n}, $o{t} ) ; my $csv = Text::CSV_XS->new( { binary => 1 } ) ; # if binary =0 then UTF-8 character cause trouble while (<>){ chomp ; s/\r$// ; my @F = split /\t/, $_ , -1 ; # エスケープã•ã‚ŒãŸæ–‡å—を考慮ã—ã¤ã¤ã€-t 㨠-n ã®æŒ‡å®šã«å¾“ã£ã¦ï¼Œã‚¿ãƒ–æ–‡å—も改行文å—も復元ã™ã‚‹ã€‚ for (@F){ if ( defined $o{t} ) { s/(?<!$o{e})$o{t}/\t/g ; # å¦å®šçš„後èªã¿ã¯ (?<!pattern) ; 肯定的後èªã¿ã¯ (?<=pattern) s/$o{e}$o{t}/$o{t}/g ; } if ( defined $o{n} ) { my $e = $o{e} ; my $n = $o{n} ; s/(?<!$o{e})$o{n}/\n/g ; # å¦å®šçš„後èªã¿ã¯ (?<!pattern) ; 肯定的後èªã¿ã¯ (?<=pattern) s/$o{e}$o{n}/$o{n}/g ; #print STDERR BLUE "$o{e}, $o{n}\n" ; } my $status = $csv->print(*STDOUT, [@F]); print STDERR BRIGHT_RED "Something wrong at line $.\n" unless $status ; print "\n" ; #print join ',' , map {qq["$_"]} @F ; } } exit 0; } sub main ( ) { binmode * STDOUT , ":utf8" ; # Necessry because Text::CSV_XS decodes UTF8 input. & core ; } sub core ( ) { my $lines = 1 ; # CSV ã§èªã¿è¾¼ã‚“ã§ã„ã‚‹ã®ã§ã€$. ã¯2以上増ãˆã‚‹ã“ã¨ãŒã‚る。èªã¿å–る度ã«ã€ $lines ã‹ã‚‰ $. 行目ã¾ã§ã¨èªè˜ã™ã‚‹ãŸã‚。 my %cols ; # 何個ã®åˆ—を何行ãŒæŒã£ã¦ã„ãŸã‹ã‚’表ã™ã€‚3列ã®è¡ŒãŒ120è¡Œå˜åœ¨ã—ãŸã€ãªã©ã‚’表ã™ã€‚ our $csv = Text::CSV_XS -> new ( { binary => 1 } ); # if binary => 0 then when "\n" is included in a cell it cause trouble. # 入力㌠一定秒数以内ã«å§‹ã¾ã‚‰ãªã„å ´åˆã«ã€ç”»é¢ã«æ³¨æ„を表示ã™ã‚‹ã€‚ my $alarmF = 0 ; if ( -t ) { $alarmF = 1 ; $SIG{ALRM} = sub { print STDERR GREEN "Waiting CSV-formatted input from STDIN.. ($Script)\n" ; $SIG{ALRM} = sub { print STDERR GREEN "." ; alarm 1 } ; alarm 1 ; } ; alarm 1 ; } my @from ; # ã©ã®æ–‡å—列をã©ã†ç½®ãã‹ãˆã‚‹ã‹ã€‚ my @leng ; # ãã®é•·ã• my @dest ; # ç½®æ›å…ˆ do {push @from , "\t" ; push @leng ,1 ; push @dest , $o{t} } if defined $o{t} ; do {push @from , "\n" ; push @leng ,1 ; push @dest , $o{n} } if defined $o{n} ; my @warnstr ; # è¦å‘Šå¯¾è±¡ã®æ–‡å—列。改行やタブ文å—ãªã© my @escape ; # エスケープ対象ã®æ–‡å—列 unless ($o{Q}) { push @warnstr , $o{t} if defined $o{t} ; push @warnstr , $o{n} if defined $o{n} ; @warnstr = grep { $_ ne '' } @warnstr ; @escape = map { quotemeta $_ } @warnstr ; # ã“ã®æ™‚点㧠-vã®ã‚‚ã®ã¯ã¯ã„ã£ã¦ã„ãªã„ #print STDERR BRIGHT_BLUE join ", " , @escape , "\n" ; push @warnstr , "\t" if $o{v} || ! defined $o{t} ; push @warnstr , "\n" if $o{v} || ! defined $o{n} ; } # 入力ã‹ã‚‰ã®èªå–り。 my $posV = 0 ; # 出力上ã®ç¸¦æ–¹å‘ã®ä½ç½®ã‚’表㙠while ( my $x = $csv -> getline( *ARGV ) ) { # *ARGVã¯Old(er) support 㨠perldoc Text::CSV_XSã«è¨˜è¼‰ã‚り。将æ¥ã‚µãƒãƒ¼ãƒˆã•ã‚Œãªã„ã‹ã‚‚。 do { $alarmF = 0 ; alarm 0 } if $alarmF ; $posV ++ ; $cols{ @$x } ++ ; # ã“ã®è¡Œã¯ã€åˆ—を何個æŒã£ã¦ã„ãŸã‹ã®æ•°ã‹ã‚‰ï¼Œå¾Œã§ï¼Œä½•å€‹ã®è¡ŒãŒä½•å€‹ã®åˆ—ã‚’æŒã£ã¦ã„ãŸã‹æƒ…å ±è¡¨ç¤ºã‚’ã™ã‚‹ã‚ˆã†ã«ã™ã‚‹ã€‚ # 入力レコードä¸ã«ã‚¿ãƒ–æ–‡å—ã‹æ”¹è¡Œæ–‡å—ãŒç¾ã‚ŒãŸå ´åˆã«ã€ã‚«ã‚¦ãƒ³ãƒˆã—ã€è¡¨ç¤ºã™ã‚‹ã€‚ my $posH = 0 ; # 出力上ã®ã‚»ãƒ«ã®æ°´å¹³ä½ç½®ã‚’表ã™ã€‚ for ( @$x ) { $posH ++ ; for my $seek ( @warnstr ) { if ( index ($_ , $seek , 0) >= 0 ) { my $tgt = $seek ; #quotemeta $seek ; $tgt =~ s/\n/\\n/g ; $tgt =~ s/\t/\\t/g ; my $lstr = $lines == $. ? $lines : "$lines-$." ; my $t = $_ ; #$t =~s/\r//gs ; $t =~s/\n/\\n/gs; $t =~s/\\n/\e[44m\\n\e[40m/g; $t =~s/\t/\\t/gs ; $t =~s/\\t/\e[44m\\t\e[40m/g; #$t =~ s/\n/\e[41m\\N\e[40m/gs ; my $sout = qq[[$Script] Warning: "$tgt" detected at "$ARGV":] ; $sout .= qq" input line $lstr; output cell ($posV,$posH): \e[0m\e[4m$t\n" ; print STDERR BRIGHT_RED $sout ; } } } # ç½®æ›å¯¾è±¡ã®æ–‡å—ã‚’ç½®æ›ã™ã‚‹ã€‚ for my $cell ( @$x ) { $cell =~ s/$_/$o{e}$_/g for @escape ; # エスケープã™ã‚‹ / for my $i ( 0 .. $#from ) { my $p = 0 ; substr $cell, $p, $leng[$i], $dest[$i] while 1+($p=index$cell,$from[$i],$p); } } # å‡ºåŠ›å‡¦ç† print join ( "\t", @$x ) . "\n" ; print "\n" if $o{2} ; # # 出力å„è¡Œã®é–“ã«ç©ºè¡Œã‚’挿入ã™ã‚‹å ´åˆã®å‡¦ç† $lines = $. + 1 ; # <- tricky! } $csv->eof; # <-- - å¿…è¦ã‹? return if $o{Q} ; my $out = qq[[$Script] "$ARGV": $. lines =>] ; my $tmp = join " + " , map { "${_}x$cols{$_}"} sort {$a<=>$b} keys %cols ; print STDERR CYAN qq[$out $tmp\n] ; # ã‚¨ãƒ©ãƒ¼å‡¦ç† (Text::CSV_XS ã®ã‚¨ãƒ©ãƒ¼å‡¦ç†) , ã“ã®ãƒ—ãƒã‚°ãƒ©ãƒ ã®å¤‰æ•°ã®ä½¿ã„æ–¹ãŒç†ç”±ã§ã€ã“ã®ä½ç½®ã«ENDã‚’ç½®ã„ãŸã€‚ END{ exit if $o{'~'} ; exit if ! defined $csv ; my @tmp = $csv -> error_diag () ; # ($cde, $str, $pos, $rec, $fld) = $csv->error_diag (); if ( $tmp[0] != 2012 ) { # perldoc Text::CSV_XS 㧠2012 をå‚照。EOFã‚’æ„味ã™ã‚‹ã€‚ print STDERR BRIGHT_RED join (":",@tmp),"\n" ; exit 1 ; } } } ## ヘルプã¨ãƒãƒ¼ã‚¸ãƒ§ãƒ³æƒ…å ± BEGIN { our $VERSION = 0.52 ; $Getopt::Std::STANDARD_HELP_VERSION = 1 ; grep { m/--help/} @ARGV and *VERSION_MESSAGE = sub {} ; # 最åˆã¯ 0.21 を目安ã¨ã™ã‚‹ã€‚ # 1.00 以上ã¨ã™ã‚‹å¿…è¦æ¡ä»¶ã¯è‹±èªžç‰ˆã®ãƒ˜ãƒ«ãƒ—ã‚’ãã¡ã‚“ã¨å‡ºã™ã“ã¨ã€‚ # 2.00 以上ã¨ã™ã‚‹å¿…è¦æ¡ä»¶ã¯ãƒ†ã‚¹ãƒˆã‚³ãƒ¼ãƒ‰ãŒå«ã‚€ã“ã¨ã€‚ # 0.22 : 英文マニュアルをPODå½¢å¼ã«ã™ã‚‹ã€‚ # 0.23 : 英文マニュアルã®PODå½¢å¼ã®éƒ¨åˆ†ã‚’ã•ã‚‰ã«å¢—ã‚„ã—ãŸã€‚ } sub HELP_MESSAGE { use FindBin qw[ $Script $Bin ] ; sub EnvJ ( ) { $ENV{LANG} =~ m/^ja_JP/ ? 1 : 0 } ; # # ja_JP.UTF-8 sub en( ) { grep ( /^en(g(i(sh?)?)?)?/i , @ARGV ) ? 1 : 0 } # English ã¨ã„ã†æ–‡å—列を先é ã‹ã‚‰2æ–‡å—以上をå«ã‚€ã‹ sub ja( ) { grep ( /^jp$|^ja(p(a(n?)?)?)?/i , @ARGV ) ? 1 : 0 } # jp ã¾ãŸã¯ japan ã¨ã„ã†æ–‡å—列を先é ã‹ã‚‰2æ–‡å—以上をå«ã‚€ã‹ sub opt( ) { grep (/^opt(i(o(ns?)?)?)?$/i, @ARGV ) ? 1 : 0 } # options ã¨ã„ã†æ–‡å—列を先é ã‹ã‚‰3æ–‡å—以上å«ã‚€ã‹ã‚‰ sub noPOD ( ) { grep (/^no-?p(od?)?\b/i, @ARGV) ? 1 : 0 } # POD を使ã‚ãªã„ã¨è¨€ã†æŒ‡å®šãŒã•ã‚Œã¦ã„ã‚‹ã‹ã©ã†ã‹ my $jd = "JapaneseManual" ; my $flagE = ! ja && ( en || ! EnvJ ) ; # 英語ã«ã™ã‚‹ã‹ã©ã†ã‹ã®ãƒ•ãƒ©ã‚° exec "perldoc $0" if $flagE && ! opt ; #&& ! noPOD ; $ARGV[1] //= '' ; open my $FH , '<' , $0 ; while(<$FH>){ s/\Q'=script='\E/$Script/gi ; s/\Q'=bin='\E/$Bin/gi ; if ( s/^=head1\b\s*// .. s/^=cut\b\s*// ) { if ( s/^=begin\s+$jd\b\s*// .. s/^=end\s+$jd\b\s*// xor $flagE ) { print $_ if ! opt || m/^\s+\-/ ; } } } close $FH ; exit 0 ; } =encoding utf8 =head1 NAME csv2tsv =head1 VERSION 0.51 =head1 SYNOPSIS csv2tsv [B<-t> str] [B<-n> str] [-v] [-Q] [-2] [B<-~>] file =head1 DESCRIPTION Transforms CSV formatted data (cf. RFC4180) into TSV formated data. Input is assumed to be UTF-8. (The input line ends can be both CRLF or LF. The output line ends are LF.) Warnings/erros would be properly printed on STDERR (as far as the author of this program experienced). =head1 EXAMPLE csv2tsv file.csv > file.tsv csv2tsv B<-n> '[\n]' file.csv > file.tsv # "\n" in the CSV cell will be transfomed to [\n]. csv2tsv B<-t> TAB file.csv > file.tsv # "\t" in the CSV cell will be transfomed to "TAB". UTF-8 characters can be specified. B<for> i B<in> *.csv ; B<do> csv2tsv -n'"\n"' -t'"\t"' $i > ${i/csv/tsv} ; B<done> # BASH or ZSH is required to use this "for" statement. Useful for multiple CSV files. For the safety, when '-t' or '-n' is set with string character specification, a B<warning> is displayed every time a values in the input cells matches the specified string charatcter unless B<-Q> is set. csv2tsv < file.csv > file.tsv # file name information cannot be passed to "csv2tsv". So the warning messages may lack a few information. =head1 OPTION =over 4 =item B<-e> str Escape character(s) to be used to attach previous to the string matched to the string specified by -t or -n. =item B<-t> str What the input TAB character will be replaced with is specified. =item B<-n> str What "\n" character in the input CSV cell will be replaced with is specified. =item -v Always tell the existence of "\t" or "\n" even if "-t str" or "-n str" is specified. =item -Q No warning even if "\t" or "\n" is included in the cell of input. =item -2 Double space output, to find "\n" anormality by human eyes. (For a kind expediency when this program author was firstly making this program) =item B<-~> The opposite conversion of csv2tsv, i.e. B<TSV to CSV> conversion. TABs and LINEENDs will be recovered if the intput was generated by this program "csv2tsv" with the same specification of "-t", "-n" and "-e". =item --help Shows this help. =item --help ja Shows Japanese help. =item --version Shows the version information of this program. =back =head1 AUTHOR Toshiyuki Shimono bin4tsv@gmail.com =head1 HISTORY 2015-09-28 : Firstly created on a whim. 2016-07-06 : Some options are added such as -2. 2016-08-03 : Response to tab and enter characgers. 2018-06-24 : Once realeased on CPAN for the sake of Table::Hack. 2018-07-04 : Refinements to options. English manual is added. =head1 LICENSE AND COPYRIGHT Copyright 2018 "Toshiyuki Shimono". This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see L<http://www.gnu.org/licenses/>. =begin JapaneseManual csv2tsv file.csv > file.tsv csv2tsv < file.csv > file.tsv CSV å½¢å¼(RFC 4180)ã®ãƒ•ã‚¡ã‚¤ãƒ«ã‚’ TSVå½¢å¼(タブ文å—区切り) ã«å¤‰æ›ã™ã‚‹ã€‚ 出力ã«ã¤ã„ã¦ã¯ã€æ–‡å—コード UTF-8 ã§æ”¹è¡Œã‚³ãƒ¼ãƒ‰ã¯ "\n" ã¨ãªã‚‹ã€‚ オプション: -e st : -t ã¾ãŸã¯ -e ã§æŒ‡å®šã•ã‚ŒãŸæ–‡å—列ã«ä¸€è‡´ã™ã‚‹æ–‡å—列ã®ç›´å‰ã«ã‚¨ã‚¹ã‚±ãƒ¼ãƒ—を目的ã«å…¥ã‚Œã‚‹æ–‡å—列。 -t str : 入力ã®ã‚¿ãƒ–æ–‡å—を何ã«ç½®ãæ›ãˆã‚‹ã‹ã‚’æ–‡å—列ã§æŒ‡å®šã™ã‚‹ã€‚空文å—列ãŒæŒ‡å®šã•ã‚Œãªã„é™ã‚Šã€ã‚¨ã‚¹ã‚±ãƒ¼ãƒ—も考慮ã•ã‚Œã‚‹ã€‚ -n str : 入力ã®æ”¹è¡Œæ–‡å—を何ã«ç½®ãæ›ãˆã‚‹ã‹ã‚’æ–‡å—列ã§æŒ‡å®šã™ã‚‹ã€‚空文å—列ãŒæŒ‡å®šã•ã‚Œãªã„é™ã‚Šã€ã‚¨ã‚¹ã‚±ãƒ¼ãƒ—も考慮ã•ã‚Œã‚‹ã€‚ -v : タブ文å—ã¨æ”¹è¡Œæ–‡å—ã®å˜åœ¨ã‚’å¿…ãšæŒ‡æ‘˜ã™ã‚‹ã€‚(-t ã‚„ -n ã®æŒ‡å®šãŒã‚ã‚Œã°ï¼Œé€šå¸¸ã€ä½•ã‚‚指摘ã®è¡¨ç¤ºã¯ã—ãªã„。) -Q : 入力ã®ãƒ¬ã‚³ãƒ¼ãƒ‰å†…ã«ã€ã‚¿ãƒ–æ–‡å—ã¾ãŸã¯æ”¹è¡Œæ–‡å—ãŒã‚ã£ã¦ã‚‚ã€è¦å‘Šã‚’出ã•ãªã„。付ã‘ã‚‹ã“ã¨ã§é«˜é€ŸåŒ–ã¯ã™ã‚‹ã€‚(no check) -2 : レコードã®åŒºåˆ‡ã‚Šã‚’å˜ä¸€ã® \n ã§ã¯ãªãã¦ã€2個続ã‘㟠\n\n ã«ã™ã‚‹ã€‚CSVã®ã‚»ãƒ«å†…ã«æ”¹è¡Œæ–‡å—ãŒã‚ã‚‹å ´åˆã«ä½¿ã†ã‹ã‚‚ã—ã‚Œãªã„。 -~ : TSVå½¢å¼ã‹ã‚‰CSVå½¢å¼ã«å¤‰æ›ã€‚ -t 㨠-n 㨠-e ã®æŒ‡å®šã§ã“ã®ãƒ—ãƒã‚°ãƒ©ãƒ ã§å¤‰æ›æ¸ˆã¿ã¨ä»®å®šã—ã¦ã€ã‚¿ãƒ–も改行も復元。 --help : ã“ã® $0 ã®ãƒ˜ãƒ«ãƒ—メッセージを出ã™ã€‚ perldoc -t $0 | cat ã§ã‚‚ã»ã¼åŒã˜ã€‚ --help opt : オプションã®ã¿ã®ãƒ˜ãƒ«ãƒ—を出ã™ã€‚opt以外ã§ã‚‚ options ã¨å…ˆé ãŒ1æ–‡å—以上一致ã™ã‚Œã°è‰¯ã„。 --help en : 英文マニュアルを表示ã™ã‚‹ --version : ã“ã®ãƒ—ãƒã‚°ãƒ©ãƒ ã®ãƒãƒ¼ã‚¸ãƒ§ãƒ³æƒ…å ±ã‚’è¡¨ç¤ºã™ã‚‹ã€‚ =cut