From a7cdc79ed3f3acf8680bead604aba9eb3a6984fb Mon Sep 17 00:00:00 2001 From: VVSiz Date: Mon, 7 Mar 2011 13:59:57 +0100 Subject: [PATCH] dsl2txt script added --- dsl2txt.rb | 81 ++++++++++++++++++++++++++++++++++++++++++++ lib/Card.rb | 2 +- lib/Dictionary.rb | 21 ++++++++++++ lib/TxtDictionary.rb | 32 +++++++++++++++++ 4 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 dsl2txt.rb create mode 100644 lib/TxtDictionary.rb diff --git a/dsl2txt.rb b/dsl2txt.rb new file mode 100644 index 0000000..9ee347a --- /dev/null +++ b/dsl2txt.rb @@ -0,0 +1,81 @@ +# TODO: +# 1. check for key, value, desc validity +# 2. check for empty cards + +require 'optparse' +require 'fileutils' + +require File.expand_path('../lib/Dictionary', __FILE__) +require File.expand_path('../lib/Card', __FILE__) +require File.expand_path('../lib/TxtDictionary', __FILE__) + +$IN = "" +$OUT = "" +$DOT = "*" +$DESC_DOT = "-" + +$separotor = nil +opts = OptionParser.new +opts.on("-i FILE", "--in FILE", "input file to convert", String) {|val| $IN = val } +opts.on("-o DIR", "--out DIR", "directory to extract txt files", String) { |val| + if ($OUT.empty?) + $OUT = val + else + $stderr.puts "Error: output file can be defined only once!\n\n" + $stderr.puts opts + exit + end +} +opts.on("-s", "start each line with a separator") { $separator = $DOT } +opts.on_tail("-h", "--help", "Show this message") do + $stderr.puts opts + exit +end +rest = opts.parse(ARGV) + +if ($IN.size == 0 || $OUT.size == 0) + $stderr.puts opts + exit +end + +# load the DSL dictionary +d = nil +File.open($IN, 'rb') { |f| + d = Dictionary.load_from_dsl(f) +} + +if File.exist?($OUT) + $stderr.puts "ERROR: Output directory already exist: '#{$OUT}'" + # TODO: must be enabled + # exit + + # back = $OUT + ".bak" + # FileUtils.copy($OUT, back) + # $stderr.puts "Made backup copy: #{back}" + # File.open($OUT, 'rb') { |f| + # d = Dictionary.load_from_dsl(f) + #} +else + $stderr.puts "Creating output directory: '#{$OUT}'" + Dir.mkdir($OUT) +end + +[].each { |file_name| + desc = "" + File.foreach(file_name) { |line| + if line =~ /^(?:\xEF\xBB\xBF)?#\s*[Dd]escription\s*(.*)$/ + desc = $1.strip + elsif line =~ /^\s*$/ + # ignore + elsif line =~ /^(.*?):(.*)$/ + key, value = $1, $2 + d.add(key, value, desc) + end + } +} + +d.get_txt_dicts.each { |dict| + dict.print_out("CON") + dict.extract_to_dir($OUT) +} + diff --git a/lib/Card.rb b/lib/Card.rb index a07e84e..44f462b 100644 --- a/lib/Card.rb +++ b/lib/Card.rb @@ -1,5 +1,5 @@ class Card - attr_accessor :headword + attr_accessor :headword, :entries def initialize(headword) @headword = headword @entries = {} diff --git a/lib/Dictionary.rb b/lib/Dictionary.rb index e924ff1..47a6fd5 100644 --- a/lib/Dictionary.rb +++ b/lib/Dictionary.rb @@ -90,4 +90,25 @@ def print_out(out_name) $stderr.puts "File #{out_name} written...." $stderr.puts "Total number of headwords: #{@cards.size}" end + + def each_card + @cards.values.sort.each { |card| + yield card + } + end + + def get_txt_dicts + txt_dicts = {} + + each_card { |card| + hwd = card.headword + card.entries.each { |trn, descs| + descs.each { |desc| + (txt_dicts[desc] ||= TxtDictionary.new(desc)).add(hwd, trn) + } + } + } + + txt_dicts.values + end end diff --git a/lib/TxtDictionary.rb b/lib/TxtDictionary.rb new file mode 100644 index 0000000..924b0ab --- /dev/null +++ b/lib/TxtDictionary.rb @@ -0,0 +1,32 @@ +class TxtDictionary + def initialize(desc) + @desc = desc + @data = [] + end + + def add(key, trn) + @data << [key, trn] + end + + def extract_to_dir(dir_name) + file = dir_name + "/" + desc_to_filename + ".txt" + if (File.exist?(file)) + $stderr.puts "ERROR: file already exist: '#{file}'" + exit + end + print_out(file) + end + + def print_out(out_name) + out = File.open(out_name, 'w') + out.puts "#description #{@desc}" + out.puts "" + @data.each { |line| + out.puts "#{line[0]}:#{line[1]}" + } + end + + def desc_to_filename + @desc.gsub('"', '').gsub(/\s\\#/, '_') + end +end