summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorhukl <contact@smyck.org>2009-03-29 23:26:44 +0200
committerhukl <contact@smyck.org>2009-03-29 23:26:44 +0200
commit2ca3a93bca6ebc8e4639c1dc028eb7804bc1f727 (patch)
tree859c6177be63b5808cecbe4d96b1f25a64cdea51 /lib
parent74aae2ad573118b46522c65b5f682da6a8d8c8e2 (diff)
rewrite of the importer - not done yet
Diffstat (limited to 'lib')
-rw-r--r--lib/chaos_xml.rb159
1 files changed, 159 insertions, 0 deletions
diff --git a/lib/chaos_xml.rb b/lib/chaos_xml.rb
new file mode 100644
index 0000000..f36708a
--- /dev/null
+++ b/lib/chaos_xml.rb
@@ -0,0 +1,159 @@
1require 'iconv'
2require 'nokogiri'
3
4class ChaosXml
5 include Enumerable
6
7 def initialize path
8 unless Node.root
9 Node.create!
10 end
11
12 @path = path
13 @years = {}
14 end
15
16 def import_xml
17 unless @updates = Node.find_by_unique_name('updates')
18 @updates = Node.create!( :slug => 'updates' )
19 @updates.move_to_child_of Node.root
20 end
21
22 self.each do |chaospage, chaos_id, lang|
23 node = find_or_create_node( chaospage, chaos_id )
24 html = convert_to_html( chaospage )
25 page = fill_draft_with_content(node.draft, html, lang)
26 end
27 end
28
29 def each
30 directories = Dir.glob("#{@path}/*/*.xml{,.de,.en}")
31
32 directories.each do |path|
33 next if path =~ /index\.xml/
34 chaospage = Nokogiri::XML( File.new(path).read )
35 lang = lang_from_path( path )
36 chaos_id = chaos_id_from_path( path )
37
38 yield chaospage, chaos_id, lang
39 end
40 end
41
42 def lang_from_path path
43 case path
44 when /\.de$/ then :de
45 when /\.en$/ then :en
46 else
47 :de
48 end
49 end
50
51 def chaos_id_from_path path
52 path.sub(@path, "").split(/\//).last.split(/\./)[0]
53 end
54
55 def find_or_create_node chaospage, chaos_id
56
57 date = chaospage.xpath("//date").first.content.to_date
58 unique_name = "updates/#{date.year}/#{chaos_id}"
59 year = date.year
60
61 unique_name_array = unique_name.split("/")
62
63 unless @years[year] || (@years[year] = Node.find_by_unique_name("updates/#{year}"))
64 @years[year] = Node.create :slug => year
65 @years[year].move_to_child_of @updates
66 end
67
68 unless node = Node.find_by_unique_name(unique_name)
69 node = Node.create :slug => chaos_id
70 node.move_to_child_of @years[year]
71 end
72
73 node
74 end
75
76 def fill_draft_with_content draft, chaospage, lang
77 I18n.locale = lang
78
79 options = {
80 :title => chaospage.xpath("//title")[0].content,
81 :abstract => chaospage.xpath("//abstract")[0].content,
82 :body => extract_body(chaospage)
83 }
84
85 puts options.inspect
86 #draft.update_attributes options
87 end
88
89 def extract_body chaospage
90 body = ""
91 element = chaospage.xpath("//abstract")[0].next_sibling
92
93 while element do
94 body << element.to_s
95 element = element.next_sibling
96 end
97
98 puts body
99 end
100
101 def convert_to_html chaospage
102
103 chaospage.xpath('//paragraph').each {|sub| sub.name = "p"}
104 chaospage.xpath('//quote').each {|sub| sub.name = "blockquote" }
105 chaospage.xpath('//subtitle').each {|sub| sub.name = "h3" }
106 chaospage.xpath('//strong').each {|sub| sub.name = "em" }
107 chaospage.xpath('//stronger').each {|sub| sub.name = "strong" }
108 chaospage.xpath('//chapter').each {|sub| sub.name = "h2" }
109
110 chaospage.xpath('//link').each do |sub|
111 sub.name = "a"
112 href = sub.[]("ref")
113 sub.remove_attribute("ref")
114 sub.[]=("href", href)
115 sub.remove_attribute("type")
116 end
117
118 chaospage.xpath('//list').each do |sub|
119 if !sub.css("row item").empty?
120 sub.name = "table"
121
122 sub.css("row").each {|x| x.name = "tr"}
123 sub.css("tr item").each {|x| x.name = "td"}
124 elsif !sub.css("item").empty?
125 sub.name = "ul"
126
127 sub.css("item").each {|x| x.name = "li"}
128 end
129 end
130
131 chaospage.xpath('//media').each do |sub|
132 sub.name = "img"
133 src = sub.[]("ref")
134 sub.remove_attribute("src")
135 sub.[]=("src", src)
136 unless sub.content
137 sub.[]=("alt", sub.content)
138 sub.xpath('//*').each {|x| x.remove}
139 end
140 end
141
142 chaospage.xpath('//name').each do |sub|
143 if sub.[]("email")
144 mail_href = "mailto:#{sub.[]('email')}"
145 sub.remove_attribute("email")
146 sub.[]=("href", mail_href)
147 end
148 sub.name = "a"
149
150 if href = sub.[]("ref")
151 sub.remove_attribute("ref")
152 sub.[]=("href", href)
153 end
154 end
155
156 chaospage
157
158 end
159end \ No newline at end of file