-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpopulated_columns.rb
More file actions
executable file
·63 lines (54 loc) · 1.31 KB
/
populated_columns.rb
File metadata and controls
executable file
·63 lines (54 loc) · 1.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env ruby
# Outputs to STDOUT:
# - List of columns having values
require "bundler/inline"
gemfile do
source "https://rubygems.org"
gem "csv"
gem "optparse"
gem "pry"
gem "reline"
end
options = {}
OptionParser.new do |opts|
opts.banner = "Usage: ruby populated_columns.rb -i path-to-input-file"
opts.on("-i", "--input PATH",
"Path to input directory containing files") do |i|
options[:input] = File.expand_path(i)
end
end.parse!
# strips, collapses multiple spaces, removes terminal commas, strips again
CSV::Converters[:stripplus] = lambda { |s|
begin
if s.nil?
nil
elsif s == "NULL"
nil
else
s.strip
.gsub(/ +/, " ")
.sub(/,$/, "")
.sub(/^%(LINEBREAK|CRLF|CR|TAB)%/, "")
.sub(/%(LINEBREAK|CRLF|CR|TAB)%$/, "")
.strip
end
rescue ArgumentError
s
end
}
table = CSV.parse(
File.read(options[:input]),
headers: true,
converters: [:stripplus]
)
headers = table.headers
total_header_ct = headers.length
table.by_col!
table.headers.each do |hdr|
pop = table[hdr].reject { |v| v.nil? || v.empty? }
headers.delete(hdr) if pop.empty?
end
puts headers
diff = total_header_ct - headers.length
puts "#{headers.length} of #{total_header_ct} columns populated"
puts "#{diff} of #{total_header_ct} columns empty"