#!/usr/bin/perl
use 5.008 ; use strict ; use warnings ;  # Confirmed also for 5.010 
use Getopt::Std ; getopts '~2e:n:t:vQ', \my %o ;
use Text::CSV_XS ;  #  Not a core module.
use FindBin qw [ $Script ] ; 
use Term::ANSIColor qw[ :constants color ] ; $Term::ANSIColor::AUTORESET = 1 ; 
use Encode ;# Encode was first released with perl v5.7.3

$o{e} = decode_utf8 $o{e} if defined $o{e} ;
$o{e} //= qw[ \ ] ;  # ã‚¨ã‚¹ã‚±ãƒ¼ãƒ—ã—ãŸã„æ–‡å—åˆ—ã«ã¤ã‘ã‚‹æ–‡å—
$o{t} = decode_utf8 $o{t} if defined $o{t} ;
$o{n} = decode_utf8 $o{n} if defined $o{n} ;

& rev if $o{'~'} ;
& main ; 
exit 0 ;

# é€†æ“ä½œã€‚ TSV -> CSV 
sub rev ( ) { 
  grep { $_ = quotemeta $_ if defined $_ } ( $o{e} , $o{n}, $o{t} ) ; 


  my $csv = Text::CSV_XS->new( { binary => 1 } ) ;  # if binary =0 then UTF-8 character cause trouble
  while (<>){ 
    chomp ; 
    s/\r$// ;
    my @F = split /\t/, $_ , -1 ; 

    # ã‚¨ã‚¹ã‚±ãƒ¼ãƒ—ã•ã‚ŒãŸæ–‡å—ã‚’è€ƒæ…®ã—ã¤ã¤ã€-t ã¨ -n ã®æŒ‡å®šã«å¾“ã£ã¦ï¼Œã‚¿ãƒ–æ–‡å—ã‚‚æ”¹è¡Œæ–‡å—ã‚‚å¾©å…ƒã™ã‚‹ã€‚
    for (@F){
      if ( defined $o{t} ) { 
        s/(?<!$o{e})$o{t}/\t/g ;  # å¦å®šçš„å¾Œèªã¿ã¯ (?<!pattern)ã€€; è‚¯å®šçš„å¾Œèªã¿ã¯ (?<=pattern)
        s/$o{e}$o{t}/$o{t}/g ;
      }

      if ( defined $o{n} ) { 
        my $e = $o{e} ; my $n = $o{n} ;
        s/(?<!$o{e})$o{n}/\n/g ;  # å¦å®šçš„å¾Œèªã¿ã¯ (?<!pattern)ã€€; è‚¯å®šçš„å¾Œèªã¿ã¯ (?<=pattern)
        s/$o{e}$o{n}/$o{n}/g ; #print STDERR BLUE "$o{e}, $o{n}\n" ;
      }

      my $status = $csv->print(*STDOUT, [@F]);
      print STDERR BRIGHT_RED "Something wrong at line $.\n" unless $status ; 
      print "\n" ;
    #print join ',' , map {qq["$_"]} @F ;
    }
  }
  exit 0; 
}

sub main ( ) { 
  binmode * STDOUT , ":utf8" ; # Necessry because Text::CSV_XS decodes UTF8 input.
  & core ; 
}

sub core ( ) {
  my $lines = 1 ; # CSV ã§èªã¿è¾¼ã‚“ã§ã„ã‚‹ã®ã§ã€$. ã¯2ä»¥ä¸Šå¢—ãˆã‚‹ã“ã¨ãŒã‚ã‚‹ã€‚èªã¿å–ã‚‹åº¦ã«ã€ $lines ã‹ã‚‰ $. è¡Œç›®ã¾ã§ã¨èªè˜ã™ã‚‹ãŸã‚ã€‚
  my %cols ; # ä½•å€‹ã®åˆ—ã‚’ä½•è¡ŒãŒæŒã£ã¦ã„ãŸã‹ã‚’è¡¨ã™ã€‚3åˆ—ã®è¡ŒãŒ120è¡Œå˜åœ¨ã—ãŸã€ãªã©ã‚’è¡¨ã™ã€‚
  our $csv = Text::CSV_XS -> new ( { binary => 1 } );  # if binary => 0 then when "\n" is included in a cell it cause trouble.

  # å…¥åŠ›ãŒ ä¸€å®šç§’æ•°ä»¥å†…ã«å§‹ã¾ã‚‰ãªã„å ´åˆã«ã€ç”»é¢ã«æ³¨æ„ã‚’è¡¨ç¤ºã™ã‚‹ã€‚
  my $alarmF = 0 ; 
  if ( -t ) { 
    $alarmF = 1 ; 
    $SIG{ALRM} = sub { 
      print STDERR GREEN "Waiting CSV-formatted input from STDIN.. ($Script)\n" ;
      $SIG{ALRM} = sub { print STDERR GREEN "." ; alarm 1 } ; 
      alarm 1 ; 
    } ; 
    alarm 1 ; 
  }

  my @from ; # ã©ã®æ–‡å—åˆ—ã‚’ã©ã†ç½®ãã‹ãˆã‚‹ã‹ã€‚
  my @leng ; # ãã®é•·ã•
  my @dest ; # ç½®æ›å…ˆ
  do {push @from , "\t" ; push @leng ,1 ; push @dest , $o{t} } if defined $o{t} ; 
  do {push @from , "\n" ; push @leng ,1 ; push @dest , $o{n} } if defined $o{n} ; 

  my @warnstr ; # è¦å‘Šå¯¾è±¡ã®æ–‡å—åˆ—ã€‚æ”¹è¡Œã‚„ã‚¿ãƒ–æ–‡å—ãªã©
  my @escape ; # ã‚¨ã‚¹ã‚±ãƒ¼ãƒ—å¯¾è±¡ã®æ–‡å—åˆ—
  unless ($o{Q}) { 
    push @warnstr , $o{t} if defined $o{t} ; 
    push @warnstr , $o{n} if defined $o{n} ; 
    @warnstr = grep { $_ ne '' } @warnstr ;
    @escape = map { quotemeta $_ } @warnstr ; # ã“ã®æ™‚ç‚¹ã§ -vã®ã‚‚ã®ã¯ã¯ã„ã£ã¦ã„ãªã„
    #print STDERR BRIGHT_BLUE join ", " , @escape , "\n" ;
    push @warnstr , "\t" if $o{v} || ! defined $o{t} ; 
    push @warnstr , "\n" if $o{v} || ! defined $o{n} ; 
  }

  # å…¥åŠ›ã‹ã‚‰ã®èªå–ã‚Šã€‚
  my $posV = 0 ; # å‡ºåŠ›ä¸Šã®ç¸¦æ–¹å‘ã®ä½ç½®ã‚’è¡¨ã™
  while ( my $x = $csv -> getline( *ARGV ) ) {   # *ARGVã¯Old(er) support ã¨ perldoc Text::CSV_XSã«è¨˜è¼‰ã‚ã‚Šã€‚å°†æ¥ã‚µãƒãƒ¼ãƒˆã•ã‚Œãªã„ã‹ã‚‚ã€‚
    do { $alarmF = 0 ; alarm 0 } if $alarmF ;
    $posV ++ ; 
    $cols{ @$x } ++ ; # ã“ã®è¡Œã¯ã€åˆ—ã‚’ä½•å€‹æŒã£ã¦ã„ãŸã‹ã®æ•°ã‹ã‚‰ï¼Œå¾Œã§ï¼Œä½•å€‹ã®è¡ŒãŒä½•å€‹ã®åˆ—ã‚’æŒã£ã¦ã„ãŸã‹æƒ…å ±è¡¨ç¤ºã‚’ã™ã‚‹ã‚ˆã†ã«ã™ã‚‹ã€‚

    # å…¥åŠ›ãƒ¬ã‚³ãƒ¼ãƒ‰ä¸ã«ã‚¿ãƒ–æ–‡å—ã‹æ”¹è¡Œæ–‡å—ãŒç¾ã‚ŒãŸå ´åˆã«ã€ã‚«ã‚¦ãƒ³ãƒˆã—ã€è¡¨ç¤ºã™ã‚‹ã€‚
    my $posH = 0 ; # å‡ºåŠ›ä¸Šã®ã‚»ãƒ«ã®æ°´å¹³ä½ç½®ã‚’è¡¨ã™ã€‚ 
    for ( @$x ) { 
      $posH ++ ;
      for my $seek ( @warnstr ) { 
  	    if ( index ($_ , $seek , 0)   >= 0 ) { 
  	  	  my $tgt = $seek ; #quotemeta $seek ; 
  	      $tgt =~ s/\n/\\n/g ; 
  	      $tgt =~ s/\t/\\t/g ;   	  	
  	  	  my $lstr = $lines == $. ? $lines : "$lines-$." ; 
  	  	  my $t = $_ ;
          #$t =~s/\r//gs ;
          $t =~s/\n/\\n/gs; 
          $t =~s/\\n/\e[44m\\n\e[40m/g; 
          $t =~s/\t/\\t/gs ;
          $t =~s/\\t/\e[44m\\t\e[40m/g; 
          #$t =~ s/\n/\e[41m\\N\e[40m/gs ; 
  	  	  my $sout = qq[[$Script] Warning: "$tgt" detected at "$ARGV":] ;
  	  	  $sout .= qq" input line $lstr; output cell ($posV,$posH): \e[0m\e[4m$t\n" ; 
  	      print STDERR BRIGHT_RED $sout ;	
  	    }
      }
    }
  
  # ç½®æ›å¯¾è±¡ã®æ–‡å—ã‚’ç½®æ›ã™ã‚‹ã€‚
    for my $cell ( @$x ) { 
      $cell =~ s/$_/$o{e}$_/g for @escape ; # ã‚¨ã‚¹ã‚±ãƒ¼ãƒ—ã™ã‚‹ /
      for my $i ( 0 .. $#from ) { 
        my $p = 0 ; 
        substr $cell, $p, $leng[$i], $dest[$i] while 1+($p=index$cell,$from[$i],$p);
      }
    }
    
  # å‡ºåŠ›å‡¦ç†
    print join ( "\t", @$x ) . "\n" ;  
    print "\n" if $o{2} ; #   # å‡ºåŠ›å„è¡Œã®é–“ã«ç©ºè¡Œã‚’æŒ¿å…¥ã™ã‚‹å ´åˆã®å‡¦ç†
  
    $lines = $. + 1 ; # <- tricky!
  }
  $csv->eof; # <-- - å¿…è¦ã‹?

  return if $o{Q} ; 
  my $out = qq[[$Script] "$ARGV": $. lines =>] ;
  my $tmp = join " + " , map { "${_}x$cols{$_}"} sort {$a<=>$b} keys %cols ;
  print STDERR CYAN qq[$out $tmp\n] ;

  # ã‚¨ãƒ©ãƒ¼å‡¦ç† (Text::CSV_XS ã®ã‚¨ãƒ©ãƒ¼å‡¦ç†)  , ã“ã®ãƒ—ãƒã‚°ãƒ©ãƒ ã®å¤‰æ•°ã®ä½¿ã„æ–¹ãŒç†ç”±ã§ã€ã“ã®ä½ç½®ã«ENDã‚’ç½®ã„ãŸã€‚
  END{ 
    exit if $o{'~'} ;
    exit if ! defined $csv ; 
    my @tmp = $csv -> error_diag () ; # ($cde, $str, $pos, $rec, $fld) = $csv->error_diag ();
    if ( $tmp[0] != 2012 ) {  # perldoc Text::CSV_XS ã§ 2012Â ã‚’å‚ç…§ã€‚EOFã‚’æ„å‘³ã™ã‚‹ã€‚
      print STDERR BRIGHT_RED join (":",@tmp),"\n" ;
      exit 1 ; 
    }
  }
}


## ãƒ˜ãƒ«ãƒ—ã¨ãƒãƒ¼ã‚¸ãƒ§ãƒ³æƒ…å ±
BEGIN {
  our $VERSION = 0.52 ;
  $Getopt::Std::STANDARD_HELP_VERSION = 1 ; 
  grep { m/--help/} @ARGV and *VERSION_MESSAGE = sub {} ; 
    # æœ€åˆã¯ 0.21 ã‚’ç›®å®‰ã¨ã™ã‚‹ã€‚
    # 1.00 ä»¥ä¸Šã¨ã™ã‚‹å¿…è¦æ¡ä»¶ã¯è‹±èªžç‰ˆã®ãƒ˜ãƒ«ãƒ—ã‚’ãã¡ã‚“ã¨å‡ºã™ã“ã¨ã€‚
    # 2.00 ä»¥ä¸Šã¨ã™ã‚‹å¿…è¦æ¡ä»¶ã¯ãƒ†ã‚¹ãƒˆã‚³ãƒ¼ãƒ‰ãŒå«ã‚€ã“ã¨ã€‚
   # 0.22 : è‹±æ–‡ãƒžãƒ‹ãƒ¥ã‚¢ãƒ«ã‚’PODå½¢å¼ã«ã™ã‚‹ã€‚
   # 0.23 : è‹±æ–‡ãƒžãƒ‹ãƒ¥ã‚¢ãƒ«ã®PODå½¢å¼ã®éƒ¨åˆ†ã‚’ã•ã‚‰ã«å¢—ã‚„ã—ãŸã€‚
}  
sub HELP_MESSAGE {
    use FindBin qw[ $Script $Bin ] ;
    sub EnvJ ( ) { $ENV{LANG} =~ m/^ja_JP/ ? 1 : 0 } ; # # ja_JP.UTF-8 
    sub en( ) { grep ( /^en(g(i(sh?)?)?)?/i , @ARGV ) ? 1 : 0 } # English ã¨ã„ã†æ–‡å—åˆ—ã‚’å…ˆé ã‹ã‚‰2æ–‡å—ä»¥ä¸Šã‚’å«ã‚€ã‹ 
    sub ja( ) { grep ( /^jp$|^ja(p(a(n?)?)?)?/i , @ARGV ) ? 1 : 0 } # jp ã¾ãŸã¯ japan ã¨ã„ã†æ–‡å—åˆ—ã‚’å…ˆé ã‹ã‚‰2æ–‡å—ä»¥ä¸Šã‚’å«ã‚€ã‹ 
    sub opt( ) { grep (/^opt(i(o(ns?)?)?)?$/i, @ARGV ) ? 1 : 0 } # options ã¨ã„ã†æ–‡å—åˆ—ã‚’å…ˆé ã‹ã‚‰3æ–‡å—ä»¥ä¸Šå«ã‚€ã‹ã‚‰
    sub noPOD ( ) { grep (/^no-?p(od?)?\b/i, @ARGV) ? 1 : 0 } # POD ã‚’ä½¿ã‚ãªã„ã¨è¨€ã†æŒ‡å®šãŒã•ã‚Œã¦ã„ã‚‹ã‹ã©ã†ã‹
    my $jd = "JapaneseManual" ;
    my $flagE = ! ja && ( en || ! EnvJ ) ; # è‹±èªžã«ã™ã‚‹ã‹ã©ã†ã‹ã®ãƒ•ãƒ©ã‚°
    exec "perldoc $0" if $flagE &&  ! opt ; #&& ! noPOD   ; 
    $ARGV[1] //= '' ;
    open my $FH , '<' , $0 ;
    while(<$FH>){
        s/\Q'=script='\E/$Script/gi ;
        s/\Q'=bin='\E/$Bin/gi ;
        if ( s/^=head1\b\s*// .. s/^=cut\b\s*// ) { 
            if ( s/^=begin\s+$jd\b\s*// .. s/^=end\s+$jd\b\s*// xor $flagE ) {
                print $_ if ! opt || m/^\s+\-/  ; 
            }
        } 
    }
    close $FH ;
    exit 0 ;
}

=encoding utf8 

=head1 NAME

csv2tsv

=head1 VERSION 

0.51

=head1 SYNOPSIS

csv2tsv [B<-t> str] [B<-n> str] [-v] [-Q] [-2] [B<-~>] file

=head1 DESCRIPTION 

Transforms CSV formatted data (cf. RFC4180) into TSV formated data.
Input is assumed to be UTF-8.
(The input line ends can be both CRLF or LF. The output line ends are LF.)
Warnings/erros would be properly printed on STDERR (as far as the author of
this program experienced).

=head1 EXAMPLE 

csv2tsv file.csv > file.tsv     

csv2tsv B<-n> '[\n]' file.csv > file.tsv       
  # "\n" in the CSV cell will be transfomed to [\n].

csv2tsv B<-t> TAB file.csv > file.tsv       
  # "\t" in the CSV cell will be transfomed to "TAB". UTF-8 characters can be specified.

B<for> i B<in> *.csv ; B<do> csv2tsv -n'"\n"' -t'"\t"' $i > ${i/csv/tsv} ; B<done>
  # BASH or ZSH is required to use this "for" statement. Useful for multiple CSV files.

For the safety, when '-t' or '-n' is set with string character specification,
a B<warning> is displayed every time a values in the input cells matches the specified string charatcter
unless B<-Q> is set.

csv2tsv < file.csv > file.tsv     
  # file name information cannot be passed to "csv2tsv". So the warning messages may lack a few information.

=head1 OPTION

=over 4

=item B<-e> str

Escape character(s) to be used to attach previous to the string matched to the string specified by -t or -n.

=item B<-t> str 

What the input TAB character will be replaced with is specified. 

=item B<-n> str 

What "\n" character in the input CSV cell will be replaced with is specified. 

=item -v 

Always tell the existence of "\t" or "\n" even if "-t str" or "-n str" is specified. 

=item -Q 

No warning even if "\t" or "\n" is included in the cell of input. 

=item -2 

Double space output, to find "\n" anormality by human eyes. 
(For a kind expediency when this program author was firstly making this program)

=item B<-~>

The opposite conversion of csv2tsv, i.e. B<TSV to CSV> conversion.
TABs and LINEENDs will be recovered if the intput was generated by this program "csv2tsv" with the
same specification of "-t", "-n" and "-e".

=item --help 

Shows this help.

=item --help ja 

Shows Japanese help.

=item --version

Shows the version information of this program. 

=back 

=head1 AUTHOR

Toshiyuki Shimono
  bin4tsv@gmail.com

=head1 HISTORY 

 2015-09-28 : Firstly created on a whim.    
 2016-07-06 : Some options are added such as -2.    
 2016-08-03 : Response to tab and enter characgers.     
 2018-06-24 : Once realeased on CPAN for the sake of Table::Hack.    
 2018-07-04 : Refinements to options. English manual is added. 

=head1 LICENSE AND COPYRIGHT

Copyright 2018 "Toshiyuki Shimono".

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see L<http://www.gnu.org/licenses/>.

=begin JapaneseManual

  csv2tsv file.csv > file.tsv 
  csv2tsv < file.csv > file.tsv 

  CSV å½¢å¼(RFC 4180)ã®ãƒ•ã‚¡ã‚¤ãƒ«ã‚’ TSVå½¢å¼(ã‚¿ãƒ–æ–‡å—åŒºåˆ‡ã‚Š) ã«å¤‰æ›ã™ã‚‹ã€‚
  å‡ºåŠ›ã«ã¤ã„ã¦ã¯ã€æ–‡å—ã‚³ãƒ¼ãƒ‰ UTF-8 ã§æ”¹è¡Œã‚³ãƒ¼ãƒ‰ã¯ "\n" ã¨ãªã‚‹ã€‚

 ã‚ªãƒ—ã‚·ãƒ§ãƒ³:

   -e st  : -t ã¾ãŸã¯ -e ã§æŒ‡å®šã•ã‚ŒãŸæ–‡å—åˆ—ã«ä¸€è‡´ã™ã‚‹æ–‡å—åˆ—ã®ç›´å‰ã«ã‚¨ã‚¹ã‚±ãƒ¼ãƒ—ã‚’ç›®çš„ã«å…¥ã‚Œã‚‹æ–‡å—åˆ—ã€‚
   -t str : å…¥åŠ›ã®ã‚¿ãƒ–æ–‡å—ã‚’ä½•ã«ç½®ãæ›ãˆã‚‹ã‹ã‚’æ–‡å—åˆ—ã§æŒ‡å®šã™ã‚‹ã€‚ç©ºæ–‡å—åˆ—ãŒæŒ‡å®šã•ã‚Œãªã„é™ã‚Šã€ã‚¨ã‚¹ã‚±ãƒ¼ãƒ—ã‚‚è€ƒæ…®ã•ã‚Œã‚‹ã€‚
   -n str : å…¥åŠ›ã®æ”¹è¡Œæ–‡å—ã‚’ä½•ã«ç½®ãæ›ãˆã‚‹ã‹ã‚’æ–‡å—åˆ—ã§æŒ‡å®šã™ã‚‹ã€‚ç©ºæ–‡å—åˆ—ãŒæŒ‡å®šã•ã‚Œãªã„é™ã‚Šã€ã‚¨ã‚¹ã‚±ãƒ¼ãƒ—ã‚‚è€ƒæ…®ã•ã‚Œã‚‹ã€‚
   -v    :  ã‚¿ãƒ–æ–‡å—ã¨æ”¹è¡Œæ–‡å—ã®å˜åœ¨ã‚’å¿…ãšæŒ‡æ‘˜ã™ã‚‹ã€‚(-t ã‚„ -n ã®æŒ‡å®šãŒã‚ã‚Œã°ï¼Œé€šå¸¸ã€ä½•ã‚‚æŒ‡æ‘˜ã®è¡¨ç¤ºã¯ã—ãªã„ã€‚)
   -Q : å…¥åŠ›ã®ãƒ¬ã‚³ãƒ¼ãƒ‰å†…ã«ã€ã‚¿ãƒ–æ–‡å—ã¾ãŸã¯æ”¹è¡Œæ–‡å—ãŒã‚ã£ã¦ã‚‚ã€è¦å‘Šã‚’å‡ºã•ãªã„ã€‚ä»˜ã‘ã‚‹ã“ã¨ã§é«˜é€ŸåŒ–ã¯ã™ã‚‹ã€‚(no check)
   -2 : ãƒ¬ã‚³ãƒ¼ãƒ‰ã®åŒºåˆ‡ã‚Šã‚’å˜ä¸€ã® \n ã§ã¯ãªãã¦ã€2å€‹ç¶šã‘ãŸ \n\n ã«ã™ã‚‹ã€‚CSVã®ã‚»ãƒ«å†…ã«æ”¹è¡Œæ–‡å—ãŒã‚ã‚‹å ´åˆã«ä½¿ã†ã‹ã‚‚ã—ã‚Œãªã„ã€‚

   -~ : TSVå½¢å¼ã‹ã‚‰CSVå½¢å¼ã«å¤‰æ›ã€‚ -t ã¨ -n ã¨ -e ã®æŒ‡å®šã§ã“ã®ãƒ—ãƒã‚°ãƒ©ãƒ ã§å¤‰æ›æ¸ˆã¿ã¨ä»®å®šã—ã¦ã€ã‚¿ãƒ–ã‚‚æ”¹è¡Œã‚‚å¾©å…ƒã€‚

   --help : ã“ã® $0 ã®ãƒ˜ãƒ«ãƒ—ãƒ¡ãƒƒã‚»ãƒ¼ã‚¸ã‚’å‡ºã™ã€‚  perldoc -t $0 | cat ã§ã‚‚ã»ã¼åŒã˜ã€‚
   --help opt : ã‚ªãƒ—ã‚·ãƒ§ãƒ³ã®ã¿ã®ãƒ˜ãƒ«ãƒ—ã‚’å‡ºã™ã€‚optä»¥å¤–ã§ã‚‚ options ã¨å…ˆé ãŒ1æ–‡å—ä»¥ä¸Šä¸€è‡´ã™ã‚Œã°è‰¯ã„ã€‚
   --help en : è‹±æ–‡ãƒžãƒ‹ãƒ¥ã‚¢ãƒ«ã‚’è¡¨ç¤ºã™ã‚‹
   --version : ã“ã®ãƒ—ãƒã‚°ãƒ©ãƒ ã®ãƒãƒ¼ã‚¸ãƒ§ãƒ³æƒ…å ±ã‚’è¡¨ç¤ºã™ã‚‹ã€‚
=cut