1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
|
require 'vpim/icalendar'
require 'rexml/document'
require 'iconv'
class UpdateImporter
def initialize path
@path = path
unless Node.root
Node.create!
end
unless @updates = Node.find_by_unique_name('updates')
@updates = Node.create!( :slug => 'updates' )
@updates.move_to_child_of Node.root
end
end
# Class Methods
# Instance Methods
def import_xml
directories = Dir.glob("#{@path}/*/*.xml{,.de,.en}")
directories.each do |dir|
next if dir =~ /index\.xml/
chaospage = REXML::Document.new(File.new(dir))
puts dir
lang = case dir
when /\.de$/ then :de
when /\.en$/ then :en
else
:de
end
tmp_dir = dir.sub(@path, "").split(/\//).last
chaos_id = tmp_dir.split(/\./)[0]
create_node_and_page( chaospage.root, lang, chaos_id )
end
end
def create_node_and_page chaospage, lang, chaos_id
date = chaospage.root.elements['date'].get_text.to_s.to_date
unique_name = "updates/#{date.year}/#{chaos_id}"
unless parent_node = Node.find_by_unique_name("updates/#{date.year}")
parent_node = Node.create :slug => date.year
parent_node.move_to_child_of @updates
end
unless node = Node.find_by_unique_name(unique_name)
node = Node.create :slug => chaos_id
node.move_to_child_of parent_node
end
create_node_for_page chaospage, node, date, lang
end
def create_node_for_page chaospage, node, date, lang
xhtml = convert_chaospage_to_xhtml(chaospage)
body = ""
element = xhtml.elements['abstract'].next_sibling
while element do
body << element.to_s
element = element.next_sibling
end
page = node.pages.first
I18n.locale = lang
unless node.head
page.update(
:title => xhtml.elements['title'].get_text.to_s,
:abstract => xhtml.elements['abstract'].get_text.to_s,
:body => body
)
if xhtml.elements['author']
user = User.find_by_login(xhtml.elements['author'].get_text.to_s)
page.user = user
else
page.user = User.first
end
page.published_at = date.to_time
page.save!
puts page.published_at
page.tag_list.add("update") if page
end
if (flags = xhtml.elements['flags']) && page
page.tag_list.add("event") if (flags.attributes['calendar'] && !node.head)
page.tag_list.add("pressemitteilung") if (flags.attributes['pm'] && !node.head)
if flags.attributes['calendar']
event_options = { }
# Figuring out dtstart
dtstart = xhtml.elements['ical:DTSTART']
dtisdate = dtstart.attributes['VALUE']
raise "DTSTART not present in event" unless dtstart
if dtisdate && dtisdate == 'DATE'
# dtstart = dtstart.text.to_date
event_options[:allday] = true
else
# dtstart = dtstart.text.to_time
event_options[:allday] = false
end
event_options[:start_time] = dtstart.text
#Figuring out dtend
duration = xhtml.elements['ical:DURATION']
unless dtend = xhtml.elements['ical:DTEND']
parsed_duration = Ical_occurrences.duration_to_fixnum(duration.text)
event_options[:end_time] = dtstart.text.to_time + parsed_duration
else
event_options[:end_time] = dtend.text
end
raise "WARNING: Neither DTEND nor DURATION present in event" unless dtend || duration
# Figuring out location data
location = xhtml.elements['ical:LOCATION']
event_options[:location] = location.text if location
# Figuring out url
if location
localtrep = location.attributes['ALTREP']
event_options[:url] = localtrep if localtrep
end
# Figuring out geo data latitude / longitude
geo = xhtml.elements['ical:GEO']
event_options[:latitude], event_options[:longitude] = geo.text.split(";") if geo
# Figuring out RRule
if( rrule = xhtml.elements['ical:RRULE'] )
rrtxt = ''
rrule.each_element( ) { |subrule|
rrtxt += subrule.name + '=' + subrule.text + ';'
}
rrtxt.chomp!(';')
event_options[:rrule] = rrtxt
default_rrules = ["FREQ=WEEKLY;INTERVAL=1", "FREQ=MONTHLY;INTERVAL=1", "FREQ=YEARLY;INTERVAL=1"]
unless default_rrules.include? event_options[:rrule]
event_options[:custom_rrule] = true
end
end
puts event_options.inspect
# Creating or updating event data for node
unless tmp_event = node.event
tmp_event = Event.create! event_options.merge({:node_id => node.id})
else
tmp_event.update event_options
end
end
end
unless node.head
page.save!
if node.head.nil? && page
node.head = page
node.draft = nil
node.save!
end
end
end
def convert_chaospage_to_xhtml( element )
element.each_element('//paragraph') {|sub| sub.name = "p" }
element.each_element('//link') do |sub|
sub.name = "a"
sub.attributes.get_attribute("ref").name = "href"
sub.attributes.delete_all("type")
end
element.each_element('//quote') {|sub| sub.name = "q" }
element.each_element('//subtitle') {|sub| sub.name = "h3" }
element.each_element('//strong') {|sub| sub.name = "i" }
element.each_element('//stronger') {|sub| sub.name = "b" }
element.each_element('//chapter') {|sub| sub.name = "h2" }
element.each_element('//list') {|sub|
if sub.get_elements( '//row' ).size > 0
sub.name = "table"
sub.each_element('//row') {|row|
row.name = "tr"
row.each_element('//item') {|td| td.name = "td"}
}
else
sub.name = "ul"
sub.each_element('//item)') {|item| item.name = "li" }
sub.each_element('//sub') {|sl| sl.name = "ul" }
end
}
element.each_element('//media') {|sub|
sub.name = "img"
sub.attributes.get_attribute("ref").name = "src"
if sub.has_text?() then
sub.add_attribute("alt"=>sub.text())
sub.delete_element('//*')
end
}
element.each_element('//name') {|sub|
if sub.attributes.get_attribute("email") then
sub.attributes["email"] = "mailto:" + sub.attributes["email"]
sub.attributes.get_attribute("email").name = "href"
end
sub.name = "a"
sub.attributes.get_attribute("ref").name = "href" if sub.attributes.get_attribute("ref")
}
element
end
end
|