summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorhukl <hukl@eight.local>2009-01-31 19:56:06 +0100
committerhukl <hukl@eight.local>2009-01-31 19:56:06 +0100
commit816b5e0a0096a6b86279986ac2f9000723dae118 (patch)
tree5af49046f8191be6977a6957b7054e6b5e7076de /lib
parent482448d05686b6aaa549208dafa36d682b3059b6 (diff)
added importer for old ccc updates
Diffstat (limited to 'lib')
-rw-r--r--lib/update_importer.rb135
1 files changed, 135 insertions, 0 deletions
diff --git a/lib/update_importer.rb b/lib/update_importer.rb
new file mode 100644
index 0000000..979501f
--- /dev/null
+++ b/lib/update_importer.rb
@@ -0,0 +1,135 @@
1require 'rexml/document'
2
3class UpdateImporter
4
5 def initialize path
6 Node.delete_all
7 Page.delete_all
8
9 @path = path
10
11 unless Node.root
12 Node.create!
13 end
14
15 unless @updates = Node.find_by_unique_name('updates')
16 @updates = Node.create! :slug => 'updates'
17 @updates.move_to_child_of Node.root
18 end
19 end
20
21 # Class Methods
22
23 # Instance Methods
24
25 def import_xml
26 directories = Dir.glob("#{@path}/*/*.xml{,.de,.en}")
27
28 directories.each do |dir|
29 next if dir =~ /index\.xml/
30 chaospage = REXML::Document.new(File.new(dir))
31
32 lang = case dir
33 when /\.de$/ then "de"
34 when /\.en$/ then "en"
35 else "de"
36 end
37
38 tmp_dir = dir.sub(@path, "").split(/\//).last
39 chaos_id = tmp_dir.split(/\./)[0]
40
41 create_node_and_page( chaospage.root, lang, chaos_id )
42 end
43 end
44
45 def create_node_and_page chaospage, lang, chaos_id
46 date = chaospage.root.elements['date'].get_text.to_s.to_date
47 unique_name = "updates/#{date.year}/#{chaos_id}"
48
49 unless parent_node = Node.find_by_unique_name("updates/#{date.year}")
50 parent_node = Node.create :slug => date.year
51 parent_node.move_to_child_of @updates
52 end
53
54 # puts "#{chaos_id} >>> #{lang} >>> #{date.year}"
55
56 unless node = Node.find_by_unique_name(unique_name)
57 node = Node.create :slug => chaos_id
58 node.move_to_child_of parent_node
59 end
60
61 create_node_for_page chaospage, node
62 end
63
64 def create_node_for_page chaospage, node
65 xhtml = convert_chaospage_to_xhtml(chaospage)
66
67 body = ""
68
69 element = xhtml.elements['abstract'].next_sibling
70
71 while element do
72 body << element.to_s
73 element = element.next_sibling
74 end
75
76 puts body
77
78 if node.pages.empty?
79 node.pages.create!(
80 :body => body
81 )
82 end
83 end
84
85 def convert_chaospage_to_xhtml( element )
86 element.each_element('//paragraph') {|sub| sub.name = "p" }
87 element.each_element('//link') do |sub|
88 sub.name = "a"
89 sub.attributes.get_attribute("ref").name = "href"
90 sub.attributes.delete_all("type")
91 end
92 element.each_element('//quote') {|sub| sub.name = "q" }
93 element.each_element('//subtitle') {|sub| sub.name = "h3" }
94 element.each_element('//strong') {|sub| sub.name = "i" }
95 element.each_element('//stronger') {|sub| sub.name = "b" }
96 element.each_element('//chapter') {|sub| sub.name = "h2" }
97
98 element.each_element('//list') {|sub|
99 if sub.get_elements( '//row' ).size > 0
100 sub.name = "table"
101 sub.each_element('//row') {|row|
102 row.name = "tr"
103 row.each_element('//item') {|td| td.name = "td"}
104 }
105 else
106 sub.name = "ul"
107 sub.each_element('//item)') {|item| item.name = "li" }
108 sub.each_element('//sub') {|sl| sl.name = "ul" }
109 end
110 }
111 element.each_element('//media') {|sub|
112 sub.name = "img"
113 sub.attributes.get_attribute("ref").name = "src"
114 if sub.has_text?() then
115 sub.add_attribute("alt"=>sub.text())
116 sub.delete_element('//*')
117 end
118 }
119 element.each_element('//name') {|sub|
120 if sub.attributes.get_attribute("email") then
121 sub.attributes["email"] = "mailto:" + sub.attributes["email"]
122 sub.attributes.get_attribute("email").name = "href"
123 end
124 sub.name = "a"
125 sub.attributes.get_attribute("ref").name = "href" if sub.attributes.get_attribute("ref")
126 }
127
128 element
129 end
130
131end
132
133i = UpdateImporter.new ('/Users/hukl/Desktop/updates')
134i.import_xml
135