#!/usr/bin/perl -w
#
# tabler.pl
#
# reads a table, stores the data of 2 custom columns,
# and stores that data in a col1 vs. col2 basis
#
# by Jorge Amigo Lechuga (USC - CeGen)
#
# changelog
# v1.0 initial script
# v1.1 added joining character on results
# v1.2 added end-of-line character removal when reading input
#
################
# INITIALIZING #
################
# initialize variables
$programName = "tabler";
$version = "1.2";
$outFile = "out_tabler.txt";
$logFile = "";
undef @dataFilesArray;
$interactive = 0;
$headers = 1;
$split = 0;
$splitChar = "\t";
$joinChar = "";
undef @horCols;
undef @verCols;
undef @resCols;
undef @horArray;
undef @verArray;
# interactive mode check
if ( (scalar @ARGV) == 0 ) {
interactiveMode();
} else {
# read command line arguments
readArgs();
}
# print defined variables
print "$programName v$version\n";
print "\nInput:\n";
foreach $inputFile (@dataFilesArray) { print $inputFile."\n"; }
if ($headers > 0) { print "(header present)\n"; }
else { print "(headers not present)\n"; }
print "\nVertical columns:\t";
foreach $verCol (@verCols) { print "".($verCol + 1)." "; }
print "\nHorizontal columns:\t";
foreach $horCol (@horCols) { print "".($horCol + 1)." "; }
print "\nResults columns:\t";
foreach $resCol (@resCols) { print "".($resCol + 1)." "; }
if ($splitChar eq "\t") {$splitCharText = "tab"}
elsif ($splitChar eq " ") {$splitCharText = "space"}
else {$splitCharText = $splitChar}
print "\nSplitting character:\t'".$splitCharText."'\n";
if ($split == 0) {
print "\nJoining character:\t'".$joinChar."'\n";
}
#################
# START WORKING #
#################
# read all available files
foreach $inputFile (@dataFilesArray) {
# open data file
open(INPUT, $inputFile) || endMessage("ERROR: Could not open '$inputFile'.");
# remove headers
for (0..$headers-1) { $line = ; }
# process data lines
while ($line = ) {
$ver = "";
$hor = "";
$res = "";
chomp($line);
@columns = split($splitChar, $line);
foreach $verCol (@verCols) { $ver .= $columns[$verCol]; }
foreach $horCol (@horCols) { $hor .= $columns[$horCol]; }
foreach $resCol (@resCols) { $res .= $columns[$resCol]."|"; }
$res =~ s/\|$//;
# foreach $verCol (@verCols) { print "+$verCol+"; }
# print "*$ver*$hor*$res*\n";
# ;
if ( not exists $horHash{$hor} ) {
# store horizontal label
$horHash{$hor} = "";
push @horArray, $hor;
}
if ( not exists $verHash{$ver} ) {
# store vertical label
$verHash{$ver} = "";
push @verArray, $ver;
}
if ( not exists $results{$ver."|".$hor} ) {
# store result
$results{$ver."|".$hor} = $res;
} else {
if ( $res ne $results{$ver."|".$hor} ) {
# repeated result does not match
if ( exists $multiple{$ver."|".$hor} ) {
$multiple{$ver."|".$hor} .= ", ".$res;
} else {
$multiple{$ver."|".$hor} = $results{$ver."|".$hor}.", ".$res;
}
}
}
}
close INPUT;
}
# print the resulting table
open(OUTPUT, ">$outFile") || endMessage("Could not write the output file.");
foreach $hor (@horArray) {
if ($split) {
for (1 .. $#resCols+1) {
print OUTPUT "\t".$hor."_".$_;
}
} else {
print OUTPUT "\t".$hor;
}
}
print OUTPUT "\n";
foreach $ver (@verArray) {
print OUTPUT $ver;
foreach $hor (@horArray) {
if ( exists $results{$ver."|".$hor} ) {
if ( exists $multiple{$ver."|".$hor} ) {
print OUTPUT "\t?";
} else {
$res = $results{$ver."|".$hor};
if ($split) { $res =~ s/\|/\t/; }
else { $res =~ s/\|/$joinChar/; }
print OUTPUT "\t".$res;
}
} else {
print OUTPUT "\t-";
}
}
print OUTPUT "\n";
}
close OUTPUT;
print "\nThe output has been printed into '$outFile'.\n";
# build the errors file name
$errorsFile = $outFile;
if ($errorsFile =~ /\./) {
$errorsFile =~ s/\.([^\.]+)$//;
$errorsFile .= "_errors.$1";
} else {
$errorsFile .= "_errors";
}
# print the errors if any
if ( scalar(keys %multiple) > 0 ) {
open(OUTPUT2, ">$errorsFile") || endMessage("Could not write the errors file.");
foreach $key (keys %multiple) {
if ($key =~ /(.+)\|(.+)/) {
$ver = $1;
$hor = $2;
$res = $multiple{$ver."|".$hor};
$res =~ s/\|//g;
print OUTPUT2 $ver."\t".$hor."\t".$res."\n";
}
}
close OUTPUT2;
print "\nThe errors have been printed into '$errorsFile'.\n";
}
# end of the program
endMessage("Program ended.");
#####################################################################
# endMessage() subroutine
#####################################################################
# ask interactively for all the needed values
#####################################################################
sub endMessage {
my($message) = @_;
print "\n".$message."\n";
if ($interactive) {
print "\nPress the 'return' key to exit...\n";
;
}
exit;
}
#####################################################################
# interactiveMode() subroutine
#####################################################################
# ask interactively for all the needed values
#####################################################################
sub interactiveMode {
# input file
print "\nEnter the input file: ";
chomp($inputFile = );
if ($inputFile ne "") { push @dataFilesArray, $inputFile; }
else { endMessage("A data file must be specified."); }
# input file
print "\nEnter the output file: ";
chomp($outFile = );
if ($outFile eq "") { endMessage("An output file must be specified."); }
else {
if ($outFile eq $inputFile) { endMessage("Same input and output file names are not accepted."); }
}
# headers
print "\nHow many header lines? ";
chomp($headers = );
if ($headers =~ /\D/) { endMessage("Invalid option."); }
# vertical column
print "\nVertical column index/indexes? ";
chomp($verCol = );
if ($verCol eq "") { endMessage("A vertical column index must be specified."); }
while ($verCol =~ /(\S+)/g) {
$index = $1;
if ($index =~ /[^\d]/) { endMessage("Invalid option. Only numbers are accepted."); }
else { push @verCols, $index - 1; }
}
# horizontal column
print "\nHorizontal column index/indexes? ";
chomp($horCol = );
if ($horCol eq "") { endMessage("A horizontal column index must be specified."); }
while ($horCol =~ /(\S+)/g) {
$index = $1;
if ($index =~ /[^\d]/) { endMessage("Invalid option. Only numbers are accepted."); }
else { push @horCols, $index - 1; }
}
# results column
print "\nResults column index/indexes? ";
chomp($resCol = );
if ($resCol eq "") { endMessage("A results column index must be specified."); }
while ($resCol =~ /(\S+)/g) {
$index = $1;
if ($index =~ /[^\d]/) { endMessage("Invalid option. Only numbers are accepted."); }
else { push @resCols, $index - 1; }
}
# split results
if ($#resCols > 0) {
print "\nDo you want to join the results columns?\n";
print "1. Yes\t(default)\n";
print "2. No\n";
chomp($split = );
if ($split eq "1" || $split eq "") { $split = 0; }
elsif ($split eq "2") { $split = 1; }
else { endMessage("Invalid option."); }
if ($split == 0) {
# join char
print "\nJoining character? ";
chomp($joinChar = );
}
}
# split char
print "\nSplitting character?\n";
print "1. tab\t(default)\n";
print "2. space\n";
print "3. ;\n";
print "4. ,\n";
print "Other (please enter it)\n";
chomp($splitChar = );
if ($splitChar eq "1" || $splitChar eq "") { $splitChar = "\t"; }
elsif ($splitChar eq "2") { $splitChar = " "; }
elsif ($splitChar eq "3") { $splitChar = ";"; }
elsif ($splitChar eq "4") { $splitChar = ","; }
}
#####################################################################
# readArgs() subroutine
#####################################################################
# parse the command line arguments for customizing the analisys
#####################################################################
sub readArgs {
# initialize variables
$printVersion = 0;
$printHelp = 0;
$horCol = -1;
$verCol = -1;
$resCol = -1;
# check the correct values for the command line parameters
for ($i = 0; $i < (scalar @ARGV); $i++) {
if ($ARGV[$i] eq "-h") {
$printHelp = 1;
} elsif ($ARGV[$i] eq "-data") {
while ( ($i < $#ARGV) && ($ARGV[$i+1] !~ /^-/) ) {
$i++;
push @dataFilesArray, $ARGV[$i];
}
if ( scalar(@dataFilesArray) == 0 ) {
$printHelp = 4;
}
} elsif ($ARGV[$i] eq "-out") {
if ($ARGV[$i+1] !~ /^-/) {
$i++;
$outFile = $ARGV[$i];
} else {
$printHelp = 6;
}
} elsif ($ARGV[$i] eq "-log") {
if ($ARGV[$i+1] !~ /^-/) {
$i++;
$logFile = $ARGV[$i];
} else {
$printHelp = 6;
}
} elsif ($ARGV[$i] eq "-head") {
if ($ARGV[$i+1] !~ /\D/) {
$i++;
$headers = $ARGV[$i];
} else {
$printHelp = 3;
}
} elsif ($ARGV[$i] eq "-ver") {
while ( ($i < $#ARGV) && ($ARGV[$i+1] !~ /^-/) ) {
$i++;
unless ($ARGV[$i] =~ /[^\d]/) {
$verCol = $ARGV[$i] - 1;
push @verCols, $verCol;
}
}
if ($verCol == -1) {
$printHelp = 2;
}
} elsif ($ARGV[$i] eq "-hor") {
while ( ($i < $#ARGV) && ($ARGV[$i+1] !~ /^-/) ) {
$i++;
unless ($ARGV[$i] =~ /[^\d]/) {
$horCol = $ARGV[$i] - 1;
push @horCols, $horCol;
}
}
if ($horCol == -1) {
$printHelp = 2;
}
} elsif ($ARGV[$i] eq "-res") {
while ( ($i < $#ARGV) && ($ARGV[$i+1] !~ /^-/) ) {
$i++;
unless ($ARGV[$i] =~ /[^\d]/) {
$resCol = $ARGV[$i] - 1;
push @resCols, $resCol;
}
}
if ($resCol == -1) {
$printHelp = 2;
}
} elsif ($ARGV[$i] eq "-char") {
if ($ARGV[$i+1] !~ /^-/) {
$i++;
$joinChar = $ARGV[$i];
if ($joinChar eq "tab") { $joinChar = "\t"; }
elsif ($joinChar eq "space") { $joinChar = " "; }
} else {
$printHelp = 2;
}
} elsif ($ARGV[$i] eq "-split") {
$split = 1;
} elsif ($ARGV[$i] eq "-splitter") {
if ($ARGV[$i+1] !~ /^-/) {
$i++;
$splitChar = $ARGV[$i];
if ($splitChar eq "tab") { $splitChar = "\t"; }
elsif ($splitChar eq "space") { $splitChar = " "; }
} else {
$printHelp = 2;
}
} elsif ($ARGV[$i] eq "-v") {
$printVersion = 1;
} else {
$printHelp = 3;
}
last if ($printHelp > 0);
}
if ($horCol == -1 || $verCol == -1 || $resCol == -1) {
if ( ( $printHelp == 0 ) && ( $printVersion == 0 ) && ( (scalar @ARGV) > 0 ) ) {
$printHelp = 5;
}
}
# write everything onto a log file, not on screen
if ($logFile ne "") {
open STDOUT, '>', $logFile;
}
if ($printHelp == 1) {
# display help information
print "\n$programName v$version\n";
print "\nCommand line options:\n";
print "-data\t\tInput files.\n";
print "-out\t\tOutput files.\n";
print "-head\t\tHeader present (default = no).\n";
print "-hor\t\tHorizontal column indexes (will concatenate them).\n";
print "-ver\t\tVertical column indexes (will concatenate them).\n";
print "-res\t\tResults column indexes (will concatenate them).\n";
print "-char\t\tJoining character (default = none).\n";
print "-split\t\tMaintain results columns splitted (default = no).\n";
print "-splitter\tSplitting character (default = tab).\n";
print "\tavailable options: tab space ; ,\n";
print "-h\t\tPrint the help.\n";
print "\n";
print "Usage example:\n";
print "perl columns2table.pl -data table.txt -out table2.txt -head -ver 1 -hor 2 -res 3 4 -char /\n";
print "\n";
exit;
} elsif ($printHelp == 2) {
print "\n$programName v$version\n";
print "\n";
print "ERROR: Invalid char option.\n";
print "Type '-h' or for usage help.\n";
exit;
} elsif ($printHelp == 3) {
print "\n$programName v$version\n";
print "\n";
print "ERROR: Invalid option.\n";
print "Type '-h' or for usage help.\n";
exit;
} elsif ($printHelp == 4) {
print "\n$programName v$version\n";
print "\n";
print "ERROR: No data file defined.\n";
print "Type '-h' or for usage help.\n";
exit;
} elsif ($printHelp == 5) {
print "\n$programName v$version\n";
print "\n";
print "ERROR: Not all the columns have been correctly defined.\n";
print "Type '-h' or for usage help.\n";
exit;
} elsif ($printHelp == 6) {
print "\n$programName v$version\n";
print "\n";
print "ERROR: Invalid out file name.\n";
print "Type '-h' or for usage help.\n";
exit;
} elsif ($printVersion == 1) {
print "$programName v$version";
exit;
}
}