diff options
| author | hukl <hukl@eight.local> | 2009-01-31 19:56:06 +0100 |
|---|---|---|
| committer | hukl <hukl@eight.local> | 2009-01-31 19:56:06 +0100 |
| commit | 816b5e0a0096a6b86279986ac2f9000723dae118 (patch) | |
| tree | 5af49046f8191be6977a6957b7054e6b5e7076de /lib | |
| parent | 482448d05686b6aaa549208dafa36d682b3059b6 (diff) | |
added importer for old ccc updates
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/update_importer.rb | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/lib/update_importer.rb b/lib/update_importer.rb new file mode 100644 index 0000000..979501f --- /dev/null +++ b/lib/update_importer.rb | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | require 'rexml/document' | ||
| 2 | |||
| 3 | class UpdateImporter | ||
| 4 | |||
| 5 | def initialize path | ||
| 6 | Node.delete_all | ||
| 7 | Page.delete_all | ||
| 8 | |||
| 9 | @path = path | ||
| 10 | |||
| 11 | unless Node.root | ||
| 12 | Node.create! | ||
| 13 | end | ||
| 14 | |||
| 15 | unless @updates = Node.find_by_unique_name('updates') | ||
| 16 | @updates = Node.create! :slug => 'updates' | ||
| 17 | @updates.move_to_child_of Node.root | ||
| 18 | end | ||
| 19 | end | ||
| 20 | |||
| 21 | # Class Methods | ||
| 22 | |||
| 23 | # Instance Methods | ||
| 24 | |||
| 25 | def import_xml | ||
| 26 | directories = Dir.glob("#{@path}/*/*.xml{,.de,.en}") | ||
| 27 | |||
| 28 | directories.each do |dir| | ||
| 29 | next if dir =~ /index\.xml/ | ||
| 30 | chaospage = REXML::Document.new(File.new(dir)) | ||
| 31 | |||
| 32 | lang = case dir | ||
| 33 | when /\.de$/ then "de" | ||
| 34 | when /\.en$/ then "en" | ||
| 35 | else "de" | ||
| 36 | end | ||
| 37 | |||
| 38 | tmp_dir = dir.sub(@path, "").split(/\//).last | ||
| 39 | chaos_id = tmp_dir.split(/\./)[0] | ||
| 40 | |||
| 41 | create_node_and_page( chaospage.root, lang, chaos_id ) | ||
| 42 | end | ||
| 43 | end | ||
| 44 | |||
| 45 | def create_node_and_page chaospage, lang, chaos_id | ||
| 46 | date = chaospage.root.elements['date'].get_text.to_s.to_date | ||
| 47 | unique_name = "updates/#{date.year}/#{chaos_id}" | ||
| 48 | |||
| 49 | unless parent_node = Node.find_by_unique_name("updates/#{date.year}") | ||
| 50 | parent_node = Node.create :slug => date.year | ||
| 51 | parent_node.move_to_child_of @updates | ||
| 52 | end | ||
| 53 | |||
| 54 | # puts "#{chaos_id} >>> #{lang} >>> #{date.year}" | ||
| 55 | |||
| 56 | unless node = Node.find_by_unique_name(unique_name) | ||
| 57 | node = Node.create :slug => chaos_id | ||
| 58 | node.move_to_child_of parent_node | ||
| 59 | end | ||
| 60 | |||
| 61 | create_node_for_page chaospage, node | ||
| 62 | end | ||
| 63 | |||
| 64 | def create_node_for_page chaospage, node | ||
| 65 | xhtml = convert_chaospage_to_xhtml(chaospage) | ||
| 66 | |||
| 67 | body = "" | ||
| 68 | |||
| 69 | element = xhtml.elements['abstract'].next_sibling | ||
| 70 | |||
| 71 | while element do | ||
| 72 | body << element.to_s | ||
| 73 | element = element.next_sibling | ||
| 74 | end | ||
| 75 | |||
| 76 | puts body | ||
| 77 | |||
| 78 | if node.pages.empty? | ||
| 79 | node.pages.create!( | ||
| 80 | :body => body | ||
| 81 | ) | ||
| 82 | end | ||
| 83 | end | ||
| 84 | |||
| 85 | def convert_chaospage_to_xhtml( element ) | ||
| 86 | element.each_element('//paragraph') {|sub| sub.name = "p" } | ||
| 87 | element.each_element('//link') do |sub| | ||
| 88 | sub.name = "a" | ||
| 89 | sub.attributes.get_attribute("ref").name = "href" | ||
| 90 | sub.attributes.delete_all("type") | ||
| 91 | end | ||
| 92 | element.each_element('//quote') {|sub| sub.name = "q" } | ||
| 93 | element.each_element('//subtitle') {|sub| sub.name = "h3" } | ||
| 94 | element.each_element('//strong') {|sub| sub.name = "i" } | ||
| 95 | element.each_element('//stronger') {|sub| sub.name = "b" } | ||
| 96 | element.each_element('//chapter') {|sub| sub.name = "h2" } | ||
| 97 | |||
| 98 | element.each_element('//list') {|sub| | ||
| 99 | if sub.get_elements( '//row' ).size > 0 | ||
| 100 | sub.name = "table" | ||
| 101 | sub.each_element('//row') {|row| | ||
| 102 | row.name = "tr" | ||
| 103 | row.each_element('//item') {|td| td.name = "td"} | ||
| 104 | } | ||
| 105 | else | ||
| 106 | sub.name = "ul" | ||
| 107 | sub.each_element('//item)') {|item| item.name = "li" } | ||
| 108 | sub.each_element('//sub') {|sl| sl.name = "ul" } | ||
| 109 | end | ||
| 110 | } | ||
| 111 | element.each_element('//media') {|sub| | ||
| 112 | sub.name = "img" | ||
| 113 | sub.attributes.get_attribute("ref").name = "src" | ||
| 114 | if sub.has_text?() then | ||
| 115 | sub.add_attribute("alt"=>sub.text()) | ||
| 116 | sub.delete_element('//*') | ||
| 117 | end | ||
| 118 | } | ||
| 119 | element.each_element('//name') {|sub| | ||
| 120 | if sub.attributes.get_attribute("email") then | ||
| 121 | sub.attributes["email"] = "mailto:" + sub.attributes["email"] | ||
| 122 | sub.attributes.get_attribute("email").name = "href" | ||
| 123 | end | ||
| 124 | sub.name = "a" | ||
| 125 | sub.attributes.get_attribute("ref").name = "href" if sub.attributes.get_attribute("ref") | ||
| 126 | } | ||
| 127 | |||
| 128 | element | ||
| 129 | end | ||
| 130 | |||
| 131 | end | ||
| 132 | |||
| 133 | i = UpdateImporter.new ('/Users/hukl/Desktop/updates') | ||
| 134 | i.import_xml | ||
| 135 | |||
