null+****@clear*****
null+****@clear*****
Mon Jan 23 13:05:29 JST 2012
Kouhei Sutou 2012-01-23 13:05:29 +0900 (Mon, 23 Jan 2012) New Revision: f3bde7f32b630806fc48bb66f4507823d12e5c52 Log: [external-glossary] share common codes. Modified files: lib/logaling.rb lib/logaling/external_glossaries/debian_project.rb lib/logaling/external_glossaries/gene95.rb lib/logaling/external_glossaries/gnome_project.rb lib/logaling/external_glossaries/postgresql_manual.rb lib/logaling/external_glossary.rb Modified: lib/logaling.rb (+3 -1) =================================================================== --- lib/logaling.rb 2012-01-23 12:42:33 +0900 (91bf34f) +++ lib/logaling.rb 2012-01-23 13:05:29 +0900 (1c1155a) @@ -1,6 +1,7 @@ -# -*- encoding: utf-8 -*- +# -*- coding: utf-8 -*- # # Copyright (C) 2011 Miho SUZUKI +# Copyright (C) 2011 Kouhei Sutou <kou****@clear*****> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,4 +26,5 @@ module Logaling class GlossaryNotFound < LogalingError; end class GlossaryDBNotFound < LogalingError; end class ExternalGlossaryNotFound < LogalingError; end + class UnsupportedFormat < LogalingError; end end Modified: lib/logaling/external_glossaries/debian_project.rb (+8 -8) =================================================================== --- lib/logaling/external_glossaries/debian_project.rb 2012-01-23 12:42:33 +0900 (48339da) +++ lib/logaling/external_glossaries/debian_project.rb 2012-01-23 13:05:29 +0900 (9cf07cc) @@ -1,4 +1,5 @@ # Copyright (C) 2011 Miho SUZUKI +# Copyright (C) 2012 Kouhei Sutou <kou****@clear*****> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,15 +22,14 @@ module Logaling description 'Debian JP Project (http://www.debian.or.jp/community/translate/)' source_language 'en' target_language 'ja' + output_format 'csv' - def convert - buffer = "" - CSV.generate(buffer) do |csv| - doc = ::Nokogiri::HTML(open("http://www.debian.or.jp/community/translate/trans_table.html", "r")) - doc.css('dl').each do |dl| - dl.children.each_slice(3) do |dt, dd, _| - csv << [dt.text, dd.text.gsub(/(^\/|\/$)/,'')] - end + private + def convert_to_csv(csv) + doc = ::Nokogiri::HTML(open("http://www.debian.or.jp/community/translate/trans_table.html", "r")) + doc.css('dl').each do |dl| + dl.children.each_slice(3) do |dt, dd, _| + csv << [dt.text, dd.text.gsub(/(^\/|\/$)/,'')] end end end Modified: lib/logaling/external_glossaries/gene95.rb (+23 -22) =================================================================== --- lib/logaling/external_glossaries/gene95.rb 2012-01-23 12:42:33 +0900 (c6617e8) +++ lib/logaling/external_glossaries/gene95.rb 2012-01-23 13:05:29 +0900 (1667233) @@ -1,4 +1,5 @@ -# Copyright (C) 2012 Koji SHIMADA +# Copyright (C) 2012 Koji SHIMADA +# Copyright (C) 2012 Kouhei Sutou <kou****@clear*****> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,33 +24,33 @@ module Logaling description 'GENE95 Dictionary (http://www.namazu.org/~tsuchiya/sdic/data/gene.html)' source_language 'en' target_language 'ja' + output_format 'csv' - def convert - CSV.generate do |csv| - puts "downloading gene95 dictionary..." - url = 'http://www.namazu.org/~tsuchiya/sdic/data/gene95.tar.gz' - Zlib::GzipReader.open(open(url)) do |gz| - puts "importing gene95 dictionary..." + private + def convert_to_csv(csv) + puts "downloading gene95 dictionary..." + url = 'http://www.namazu.org/~tsuchiya/sdic/data/gene95.tar.gz' + Zlib::GzipReader.open(open(url)) do |gz| + puts "importing gene95 dictionary..." - Gem::Package::TarReader.new(gz) do |tar| - tar.each do |entry| - case entry.full_name - when "gene.txt" - lines = StringIO.new(entry.read).each_line + Gem::Package::TarReader.new(gz) do |tar| + tar.each do |entry| + case entry.full_name + when "gene.txt" + lines = StringIO.new(entry.read).each_line - 2.times { lines.next } # skip header + 2.times { lines.next } # skip header - preprocessed_lines = lines.map.map do |line| - line.encode("UTF-8", "CP932", - undef: :replace, replace: '').chomp - end + preprocessed_lines = lines.map.map do |line| + line.encode("UTF-8", "CP932", + undef: :replace, replace: '').chomp + end - preprocessed_lines.each_slice(2) do |source, target| - csv << [source.sub(/( .*)/, ''), target] - end - else - # ignore + preprocessed_lines.each_slice(2) do |source, target| + csv << [source.sub(/( .*)/, ''), target] end + else + # ignore end end end Modified: lib/logaling/external_glossaries/gnome_project.rb (+7 -7) =================================================================== --- lib/logaling/external_glossaries/gnome_project.rb 2012-01-23 12:42:33 +0900 (3ac5d8e) +++ lib/logaling/external_glossaries/gnome_project.rb 2012-01-23 13:05:29 +0900 (06d3837) @@ -1,4 +1,5 @@ # Copyright (C) 2011 Miho SUZUKI +# Copyright (C) 2012 Kouhei Sutou <kou****@clear*****> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,14 +22,13 @@ module Logaling description 'GNOME Translation Project Ja (http://live.gnome.org/TranslationProjectJa)' source_language 'en' target_language 'ja' + output_format 'csv' - def convert - buffer = "" - CSV.generate(buffer) do |csv| - doc = ::Nokogiri::HTML(open("http://www.gnome.gr.jp/l10n/trans-terms.html", "r")) - doc.css('table tr')[1..-1].each do |tr| - csv << [tr.children[0].text, tr.children[1].text] - end + private + def convert_to_csv(csv) + doc = ::Nokogiri::HTML(open("http://www.gnome.gr.jp/l10n/trans-terms.html", "r")) + doc.css('table tr')[1..-1].each do |tr| + csv << [tr.children[0].text, tr.children[1].text] end end end Modified: lib/logaling/external_glossaries/postgresql_manual.rb (+8 -8) =================================================================== --- lib/logaling/external_glossaries/postgresql_manual.rb 2012-01-23 12:42:33 +0900 (95238aa) +++ lib/logaling/external_glossaries/postgresql_manual.rb 2012-01-23 13:05:29 +0900 (473a8dc) @@ -1,4 +1,5 @@ # Copyright (C) 2011 Miho SUZUKI +# Copyright (C) 2012 Kouhei Sutou <kou****@clear*****> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,15 +22,14 @@ module Logaling description 'PostgreSQL7.1 Manual (http://osb.sraoss.co.jp/PostgreSQL/Manual/)' source_language 'en' target_language 'ja' + output_format 'csv' - def convert - buffer = "" - CSV.generate(buffer) do |csv| - doc = ::Nokogiri::HTML(open("http://osb.sraoss.co.jp/PostgreSQL/Manual/word.html", "r:iso-2022-jp").read.encode("utf-8")) - doc.css('table table tr')[2..-1].each do |tr| - if tr.children[2] - csv << [tr.children[2].text, tr.children[4].text] - end + private + def convert_to_csv(csv) + doc = ::Nokogiri::HTML(open("http://osb.sraoss.co.jp/PostgreSQL/Manual/word.html", "r:iso-2022-jp").read.encode("utf-8")) + doc.css('table table tr')[2..-1].each do |tr| + if tr.children[2] + csv << [tr.children[2].text, tr.children[4].text] end end end Modified: lib/logaling/external_glossary.rb (+20 -2) =================================================================== --- lib/logaling/external_glossary.rb 2012-01-23 12:42:33 +0900 (ab2b09c) +++ lib/logaling/external_glossary.rb 2012-01-23 13:05:29 +0900 (0d9a0a7) @@ -56,14 +56,32 @@ class Logaling::ExternalGlossary def target_language val=nil @target_language ||= val end + + def output_format(*args) + if args.empty? + @output_format ||= "csv" + else + @output_format = args.first + end + end end def import - File.open(import_file_name, "w") {|f| f.write(self.convert) } + File.open(import_file_name, "w") do |output| + output_format = self.class.output_format + output_format = output_format.to_s if output_format.is_a?(Symbol) + case output_format + when "csv" + convert_to_csv(CSV.new(output)) + else + raise UnsupportedFormat, "unsupported format: <#{output_format}>" + end + end end private def import_file_name - [self.class.name, self.class.source_language, self.class.target_language, 'csv'].join('.') + [self.class.name, self.class.source_language, + self.class.target_language, self.class.output_format].join('.') end end