-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtblstack
executable file
·95 lines (81 loc) · 1.81 KB
/
tblstack
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/perl
# tblstack: stack multiple CSV files
use strict;
use warnings;
use locale;
use Getopt::Std;
my %flags;
getopts("ih", \%flags);
my $SEP = ($ENV{'TBLSEP'} || "\t");
my @files = @ARGV;
if((!@files and !defined($flags{'i'})) || defined($flags{'h'}))
{
print(STDERR qq{$0 [-ih] file [file ...]:
Stack multiple CSV files into a single table, merging columns (but not rows).
CSV files are TAB separated, containing column labels on the first row.
You can change the column separator by setting the TBLSEP environment variable.
-i: read input file list from STDIN
-h: help summary
});
exit(2);
}
# read input file list
if(defined($flags{'i'}))
{
while(<STDIN>) {
s/[\r\n]+$//;
push(@files, $_);
}
}
# initial state
my %cols;
# 1st stage: compute labels
foreach my $file(@files)
{
# open the file and read columns
open(FD, $file) or die("cannot open $file\n");
$_ = <FD>;
s/[\r\n]+$//;
# remp (labels)
foreach my $col(split($SEP, $_, -1))
{
my $pos = $cols{$col};
if(!defined($pos))
{
$pos = keys(%cols);
$cols{$col} = $pos;
}
}
}
# output labels
my %invc;
while(my ($k, $v) = each(%cols)) {
$invc{$v} = $k;
}
print(join($SEP, (map { $invc{$_} } 0..keys(%invc)-1)) . "\n");
# 2nd stage: align data
foreach my $file(@files)
{
# open the file and read columns
open(FD, $file) or die("cannot open $file\n");
$_ = <FD>;
s/[\r\n]+$//;
my @cmap;
# remap (labels)
foreach my $col(split($SEP, $_, -1)) {
push(@cmap, $cols{$col});
}
# remap (contents)
while(<FD>)
{
s/[\r\n]+$//;
my @line = split($SEP, $_, -1);
die("line error at $file:$.:$#line") if($#line != $#{cmap});
my @out = ('') x keys(%cols);
foreach my $col(0..$#line) {
$out[$cmap[$col]] = $line[$col];
}
print(join($SEP, @out) . "\n");
}
close(FD);
}