#!/usr/bin/perl -w # # tabler.pl # # reads a table, stores the data of 2 custom columns, # and stores that data in a col1 vs. col2 basis # # by Jorge Amigo Lechuga (USC - CeGen) # # changelog # v1.0 initial script # v1.1 added joining character on results # v1.2 added end-of-line character removal when reading input # ################ # INITIALIZING # ################ # initialize variables $programName = "tabler"; $version = "1.2"; $outFile = "out_tabler.txt"; $logFile = ""; undef @dataFilesArray; $interactive = 0; $headers = 1; $split = 0; $splitChar = "\t"; $joinChar = ""; undef @horCols; undef @verCols; undef @resCols; undef @horArray; undef @verArray; # interactive mode check if ( (scalar @ARGV) == 0 ) { interactiveMode(); } else { # read command line arguments readArgs(); } # print defined variables print "$programName v$version\n"; print "\nInput:\n"; foreach $inputFile (@dataFilesArray) { print $inputFile."\n"; } if ($headers > 0) { print "(header present)\n"; } else { print "(headers not present)\n"; } print "\nVertical columns:\t"; foreach $verCol (@verCols) { print "".($verCol + 1)." "; } print "\nHorizontal columns:\t"; foreach $horCol (@horCols) { print "".($horCol + 1)." "; } print "\nResults columns:\t"; foreach $resCol (@resCols) { print "".($resCol + 1)." "; } if ($splitChar eq "\t") {$splitCharText = "tab"} elsif ($splitChar eq " ") {$splitCharText = "space"} else {$splitCharText = $splitChar} print "\nSplitting character:\t'".$splitCharText."'\n"; if ($split == 0) { print "\nJoining character:\t'".$joinChar."'\n"; } ################# # START WORKING # ################# # read all available files foreach $inputFile (@dataFilesArray) { # open data file open(INPUT, $inputFile) || endMessage("ERROR: Could not open '$inputFile'."); # remove headers for (0..$headers-1) { $line = ; } # process data lines while ($line = ) { $ver = ""; $hor = ""; $res = ""; chomp($line); @columns = split($splitChar, $line); foreach $verCol (@verCols) { $ver .= $columns[$verCol]; } foreach $horCol (@horCols) { $hor .= $columns[$horCol]; } foreach $resCol (@resCols) { $res .= $columns[$resCol]."|"; } $res =~ s/\|$//; # foreach $verCol (@verCols) { print "+$verCol+"; } # print "*$ver*$hor*$res*\n"; # ; if ( not exists $horHash{$hor} ) { # store horizontal label $horHash{$hor} = ""; push @horArray, $hor; } if ( not exists $verHash{$ver} ) { # store vertical label $verHash{$ver} = ""; push @verArray, $ver; } if ( not exists $results{$ver."|".$hor} ) { # store result $results{$ver."|".$hor} = $res; } else { if ( $res ne $results{$ver."|".$hor} ) { # repeated result does not match if ( exists $multiple{$ver."|".$hor} ) { $multiple{$ver."|".$hor} .= ", ".$res; } else { $multiple{$ver."|".$hor} = $results{$ver."|".$hor}.", ".$res; } } } } close INPUT; } # print the resulting table open(OUTPUT, ">$outFile") || endMessage("Could not write the output file."); foreach $hor (@horArray) { if ($split) { for (1 .. $#resCols+1) { print OUTPUT "\t".$hor."_".$_; } } else { print OUTPUT "\t".$hor; } } print OUTPUT "\n"; foreach $ver (@verArray) { print OUTPUT $ver; foreach $hor (@horArray) { if ( exists $results{$ver."|".$hor} ) { if ( exists $multiple{$ver."|".$hor} ) { print OUTPUT "\t?"; } else { $res = $results{$ver."|".$hor}; if ($split) { $res =~ s/\|/\t/; } else { $res =~ s/\|/$joinChar/; } print OUTPUT "\t".$res; } } else { print OUTPUT "\t-"; } } print OUTPUT "\n"; } close OUTPUT; print "\nThe output has been printed into '$outFile'.\n"; # build the errors file name $errorsFile = $outFile; if ($errorsFile =~ /\./) { $errorsFile =~ s/\.([^\.]+)$//; $errorsFile .= "_errors.$1"; } else { $errorsFile .= "_errors"; } # print the errors if any if ( scalar(keys %multiple) > 0 ) { open(OUTPUT2, ">$errorsFile") || endMessage("Could not write the errors file."); foreach $key (keys %multiple) { if ($key =~ /(.+)\|(.+)/) { $ver = $1; $hor = $2; $res = $multiple{$ver."|".$hor}; $res =~ s/\|//g; print OUTPUT2 $ver."\t".$hor."\t".$res."\n"; } } close OUTPUT2; print "\nThe errors have been printed into '$errorsFile'.\n"; } # end of the program endMessage("Program ended."); ##################################################################### # endMessage() subroutine ##################################################################### # ask interactively for all the needed values ##################################################################### sub endMessage { my($message) = @_; print "\n".$message."\n"; if ($interactive) { print "\nPress the 'return' key to exit...\n"; ; } exit; } ##################################################################### # interactiveMode() subroutine ##################################################################### # ask interactively for all the needed values ##################################################################### sub interactiveMode { # input file print "\nEnter the input file: "; chomp($inputFile = ); if ($inputFile ne "") { push @dataFilesArray, $inputFile; } else { endMessage("A data file must be specified."); } # input file print "\nEnter the output file: "; chomp($outFile = ); if ($outFile eq "") { endMessage("An output file must be specified."); } else { if ($outFile eq $inputFile) { endMessage("Same input and output file names are not accepted."); } } # headers print "\nHow many header lines? "; chomp($headers = ); if ($headers =~ /\D/) { endMessage("Invalid option."); } # vertical column print "\nVertical column index/indexes? "; chomp($verCol = ); if ($verCol eq "") { endMessage("A vertical column index must be specified."); } while ($verCol =~ /(\S+)/g) { $index = $1; if ($index =~ /[^\d]/) { endMessage("Invalid option. Only numbers are accepted."); } else { push @verCols, $index - 1; } } # horizontal column print "\nHorizontal column index/indexes? "; chomp($horCol = ); if ($horCol eq "") { endMessage("A horizontal column index must be specified."); } while ($horCol =~ /(\S+)/g) { $index = $1; if ($index =~ /[^\d]/) { endMessage("Invalid option. Only numbers are accepted."); } else { push @horCols, $index - 1; } } # results column print "\nResults column index/indexes? "; chomp($resCol = ); if ($resCol eq "") { endMessage("A results column index must be specified."); } while ($resCol =~ /(\S+)/g) { $index = $1; if ($index =~ /[^\d]/) { endMessage("Invalid option. Only numbers are accepted."); } else { push @resCols, $index - 1; } } # split results if ($#resCols > 0) { print "\nDo you want to join the results columns?\n"; print "1. Yes\t(default)\n"; print "2. No\n"; chomp($split = ); if ($split eq "1" || $split eq "") { $split = 0; } elsif ($split eq "2") { $split = 1; } else { endMessage("Invalid option."); } if ($split == 0) { # join char print "\nJoining character? "; chomp($joinChar = ); } } # split char print "\nSplitting character?\n"; print "1. tab\t(default)\n"; print "2. space\n"; print "3. ;\n"; print "4. ,\n"; print "Other (please enter it)\n"; chomp($splitChar = ); if ($splitChar eq "1" || $splitChar eq "") { $splitChar = "\t"; } elsif ($splitChar eq "2") { $splitChar = " "; } elsif ($splitChar eq "3") { $splitChar = ";"; } elsif ($splitChar eq "4") { $splitChar = ","; } } ##################################################################### # readArgs() subroutine ##################################################################### # parse the command line arguments for customizing the analisys ##################################################################### sub readArgs { # initialize variables $printVersion = 0; $printHelp = 0; $horCol = -1; $verCol = -1; $resCol = -1; # check the correct values for the command line parameters for ($i = 0; $i < (scalar @ARGV); $i++) { if ($ARGV[$i] eq "-h") { $printHelp = 1; } elsif ($ARGV[$i] eq "-data") { while ( ($i < $#ARGV) && ($ARGV[$i+1] !~ /^-/) ) { $i++; push @dataFilesArray, $ARGV[$i]; } if ( scalar(@dataFilesArray) == 0 ) { $printHelp = 4; } } elsif ($ARGV[$i] eq "-out") { if ($ARGV[$i+1] !~ /^-/) { $i++; $outFile = $ARGV[$i]; } else { $printHelp = 6; } } elsif ($ARGV[$i] eq "-log") { if ($ARGV[$i+1] !~ /^-/) { $i++; $logFile = $ARGV[$i]; } else { $printHelp = 6; } } elsif ($ARGV[$i] eq "-head") { if ($ARGV[$i+1] !~ /\D/) { $i++; $headers = $ARGV[$i]; } else { $printHelp = 3; } } elsif ($ARGV[$i] eq "-ver") { while ( ($i < $#ARGV) && ($ARGV[$i+1] !~ /^-/) ) { $i++; unless ($ARGV[$i] =~ /[^\d]/) { $verCol = $ARGV[$i] - 1; push @verCols, $verCol; } } if ($verCol == -1) { $printHelp = 2; } } elsif ($ARGV[$i] eq "-hor") { while ( ($i < $#ARGV) && ($ARGV[$i+1] !~ /^-/) ) { $i++; unless ($ARGV[$i] =~ /[^\d]/) { $horCol = $ARGV[$i] - 1; push @horCols, $horCol; } } if ($horCol == -1) { $printHelp = 2; } } elsif ($ARGV[$i] eq "-res") { while ( ($i < $#ARGV) && ($ARGV[$i+1] !~ /^-/) ) { $i++; unless ($ARGV[$i] =~ /[^\d]/) { $resCol = $ARGV[$i] - 1; push @resCols, $resCol; } } if ($resCol == -1) { $printHelp = 2; } } elsif ($ARGV[$i] eq "-char") { if ($ARGV[$i+1] !~ /^-/) { $i++; $joinChar = $ARGV[$i]; if ($joinChar eq "tab") { $joinChar = "\t"; } elsif ($joinChar eq "space") { $joinChar = " "; } } else { $printHelp = 2; } } elsif ($ARGV[$i] eq "-split") { $split = 1; } elsif ($ARGV[$i] eq "-splitter") { if ($ARGV[$i+1] !~ /^-/) { $i++; $splitChar = $ARGV[$i]; if ($splitChar eq "tab") { $splitChar = "\t"; } elsif ($splitChar eq "space") { $splitChar = " "; } } else { $printHelp = 2; } } elsif ($ARGV[$i] eq "-v") { $printVersion = 1; } else { $printHelp = 3; } last if ($printHelp > 0); } if ($horCol == -1 || $verCol == -1 || $resCol == -1) { if ( ( $printHelp == 0 ) && ( $printVersion == 0 ) && ( (scalar @ARGV) > 0 ) ) { $printHelp = 5; } } # write everything onto a log file, not on screen if ($logFile ne "") { open STDOUT, '>', $logFile; } if ($printHelp == 1) { # display help information print "\n$programName v$version\n"; print "\nCommand line options:\n"; print "-data\t\tInput files.\n"; print "-out\t\tOutput files.\n"; print "-head\t\tHeader present (default = no).\n"; print "-hor\t\tHorizontal column indexes (will concatenate them).\n"; print "-ver\t\tVertical column indexes (will concatenate them).\n"; print "-res\t\tResults column indexes (will concatenate them).\n"; print "-char\t\tJoining character (default = none).\n"; print "-split\t\tMaintain results columns splitted (default = no).\n"; print "-splitter\tSplitting character (default = tab).\n"; print "\tavailable options: tab space ; ,\n"; print "-h\t\tPrint the help.\n"; print "\n"; print "Usage example:\n"; print "perl columns2table.pl -data table.txt -out table2.txt -head -ver 1 -hor 2 -res 3 4 -char /\n"; print "\n"; exit; } elsif ($printHelp == 2) { print "\n$programName v$version\n"; print "\n"; print "ERROR: Invalid char option.\n"; print "Type '-h' or for usage help.\n"; exit; } elsif ($printHelp == 3) { print "\n$programName v$version\n"; print "\n"; print "ERROR: Invalid option.\n"; print "Type '-h' or for usage help.\n"; exit; } elsif ($printHelp == 4) { print "\n$programName v$version\n"; print "\n"; print "ERROR: No data file defined.\n"; print "Type '-h' or for usage help.\n"; exit; } elsif ($printHelp == 5) { print "\n$programName v$version\n"; print "\n"; print "ERROR: Not all the columns have been correctly defined.\n"; print "Type '-h' or for usage help.\n"; exit; } elsif ($printHelp == 6) { print "\n$programName v$version\n"; print "\n"; print "ERROR: Invalid out file name.\n"; print "Type '-h' or for usage help.\n"; exit; } elsif ($printVersion == 1) { print "$programName v$version"; exit; } }