diff options
Diffstat (limited to 'lib/chaos_xml.rb')
| -rw-r--r-- | lib/chaos_xml.rb | 159 |
1 files changed, 159 insertions, 0 deletions
diff --git a/lib/chaos_xml.rb b/lib/chaos_xml.rb new file mode 100644 index 0000000..f36708a --- /dev/null +++ b/lib/chaos_xml.rb | |||
| @@ -0,0 +1,159 @@ | |||
| 1 | require 'iconv' | ||
| 2 | require 'nokogiri' | ||
| 3 | |||
| 4 | class ChaosXml | ||
| 5 | include Enumerable | ||
| 6 | |||
| 7 | def initialize path | ||
| 8 | unless Node.root | ||
| 9 | Node.create! | ||
| 10 | end | ||
| 11 | |||
| 12 | @path = path | ||
| 13 | @years = {} | ||
| 14 | end | ||
| 15 | |||
| 16 | def import_xml | ||
| 17 | unless @updates = Node.find_by_unique_name('updates') | ||
| 18 | @updates = Node.create!( :slug => 'updates' ) | ||
| 19 | @updates.move_to_child_of Node.root | ||
| 20 | end | ||
| 21 | |||
| 22 | self.each do |chaospage, chaos_id, lang| | ||
| 23 | node = find_or_create_node( chaospage, chaos_id ) | ||
| 24 | html = convert_to_html( chaospage ) | ||
| 25 | page = fill_draft_with_content(node.draft, html, lang) | ||
| 26 | end | ||
| 27 | end | ||
| 28 | |||
| 29 | def each | ||
| 30 | directories = Dir.glob("#{@path}/*/*.xml{,.de,.en}") | ||
| 31 | |||
| 32 | directories.each do |path| | ||
| 33 | next if path =~ /index\.xml/ | ||
| 34 | chaospage = Nokogiri::XML( File.new(path).read ) | ||
| 35 | lang = lang_from_path( path ) | ||
| 36 | chaos_id = chaos_id_from_path( path ) | ||
| 37 | |||
| 38 | yield chaospage, chaos_id, lang | ||
| 39 | end | ||
| 40 | end | ||
| 41 | |||
| 42 | def lang_from_path path | ||
| 43 | case path | ||
| 44 | when /\.de$/ then :de | ||
| 45 | when /\.en$/ then :en | ||
| 46 | else | ||
| 47 | :de | ||
| 48 | end | ||
| 49 | end | ||
| 50 | |||
| 51 | def chaos_id_from_path path | ||
| 52 | path.sub(@path, "").split(/\//).last.split(/\./)[0] | ||
| 53 | end | ||
| 54 | |||
| 55 | def find_or_create_node chaospage, chaos_id | ||
| 56 | |||
| 57 | date = chaospage.xpath("//date").first.content.to_date | ||
| 58 | unique_name = "updates/#{date.year}/#{chaos_id}" | ||
| 59 | year = date.year | ||
| 60 | |||
| 61 | unique_name_array = unique_name.split("/") | ||
| 62 | |||
| 63 | unless @years[year] || (@years[year] = Node.find_by_unique_name("updates/#{year}")) | ||
| 64 | @years[year] = Node.create :slug => year | ||
| 65 | @years[year].move_to_child_of @updates | ||
| 66 | end | ||
| 67 | |||
| 68 | unless node = Node.find_by_unique_name(unique_name) | ||
| 69 | node = Node.create :slug => chaos_id | ||
| 70 | node.move_to_child_of @years[year] | ||
| 71 | end | ||
| 72 | |||
| 73 | node | ||
| 74 | end | ||
| 75 | |||
| 76 | def fill_draft_with_content draft, chaospage, lang | ||
| 77 | I18n.locale = lang | ||
| 78 | |||
| 79 | options = { | ||
| 80 | :title => chaospage.xpath("//title")[0].content, | ||
| 81 | :abstract => chaospage.xpath("//abstract")[0].content, | ||
| 82 | :body => extract_body(chaospage) | ||
| 83 | } | ||
| 84 | |||
| 85 | puts options.inspect | ||
| 86 | #draft.update_attributes options | ||
| 87 | end | ||
| 88 | |||
| 89 | def extract_body chaospage | ||
| 90 | body = "" | ||
| 91 | element = chaospage.xpath("//abstract")[0].next_sibling | ||
| 92 | |||
| 93 | while element do | ||
| 94 | body << element.to_s | ||
| 95 | element = element.next_sibling | ||
| 96 | end | ||
| 97 | |||
| 98 | puts body | ||
| 99 | end | ||
| 100 | |||
| 101 | def convert_to_html chaospage | ||
| 102 | |||
| 103 | chaospage.xpath('//paragraph').each {|sub| sub.name = "p"} | ||
| 104 | chaospage.xpath('//quote').each {|sub| sub.name = "blockquote" } | ||
| 105 | chaospage.xpath('//subtitle').each {|sub| sub.name = "h3" } | ||
| 106 | chaospage.xpath('//strong').each {|sub| sub.name = "em" } | ||
| 107 | chaospage.xpath('//stronger').each {|sub| sub.name = "strong" } | ||
| 108 | chaospage.xpath('//chapter').each {|sub| sub.name = "h2" } | ||
| 109 | |||
| 110 | chaospage.xpath('//link').each do |sub| | ||
| 111 | sub.name = "a" | ||
| 112 | href = sub.[]("ref") | ||
| 113 | sub.remove_attribute("ref") | ||
| 114 | sub.[]=("href", href) | ||
| 115 | sub.remove_attribute("type") | ||
| 116 | end | ||
| 117 | |||
| 118 | chaospage.xpath('//list').each do |sub| | ||
| 119 | if !sub.css("row item").empty? | ||
| 120 | sub.name = "table" | ||
| 121 | |||
| 122 | sub.css("row").each {|x| x.name = "tr"} | ||
| 123 | sub.css("tr item").each {|x| x.name = "td"} | ||
| 124 | elsif !sub.css("item").empty? | ||
| 125 | sub.name = "ul" | ||
| 126 | |||
| 127 | sub.css("item").each {|x| x.name = "li"} | ||
| 128 | end | ||
| 129 | end | ||
| 130 | |||
| 131 | chaospage.xpath('//media').each do |sub| | ||
| 132 | sub.name = "img" | ||
| 133 | src = sub.[]("ref") | ||
| 134 | sub.remove_attribute("src") | ||
| 135 | sub.[]=("src", src) | ||
| 136 | unless sub.content | ||
| 137 | sub.[]=("alt", sub.content) | ||
| 138 | sub.xpath('//*').each {|x| x.remove} | ||
| 139 | end | ||
| 140 | end | ||
| 141 | |||
| 142 | chaospage.xpath('//name').each do |sub| | ||
| 143 | if sub.[]("email") | ||
| 144 | mail_href = "mailto:#{sub.[]('email')}" | ||
| 145 | sub.remove_attribute("email") | ||
| 146 | sub.[]=("href", mail_href) | ||
| 147 | end | ||
| 148 | sub.name = "a" | ||
| 149 | |||
| 150 | if href = sub.[]("ref") | ||
| 151 | sub.remove_attribute("ref") | ||
| 152 | sub.[]=("href", href) | ||
| 153 | end | ||
| 154 | end | ||
| 155 | |||
| 156 | chaospage | ||
| 157 | |||
| 158 | end | ||
| 159 | end \ No newline at end of file | ||
