Commit | Line | Data |
---|---|---|
c638d827 CR |
1 | """ Import tracker data from Sourceforge.NET |
2 | ||
3 | This script needs four steps to work: | |
4 | ||
5 | 1. Export the project XML data using the admin web interface at sf.net | |
6 | 2. Run the file fetching (these are not included in the XML): | |
7 | ||
8 | import_sf.py files <path to XML> <path to files dir> | |
9 | ||
10 | this will place all the downloaded files in the files dir by file id. | |
11 | 3. Convert the sf.net XML to Roundup "export" format: | |
12 | ||
13 | import_sf.py import <tracker home> <path to XML> <path to files dir> | |
14 | ||
15 | this will generate a directory "/tmp/imported" which contains the | |
16 | data to be imported into a Roundup tracker. | |
17 | 4. Import the data: | |
18 | ||
19 | roundup-admin -i <tracker home> import /tmp/imported | |
20 | ||
21 | And you're done! | |
22 | """ | |
23 | ||
24 | import sys, os, csv, time, urllib2, httplib, mimetypes, urlparse | |
25 | # Python 2.3 ... 2.6 compatibility: | |
26 | from roundup.anypy.sets_ import set | |
27 | ||
28 | try: | |
29 | import cElementTree as ElementTree | |
30 | except ImportError: | |
31 | from elementtree import ElementTree | |
32 | ||
33 | from roundup import instance, hyperdb, date, support, password | |
34 | ||
35 | today = date.Date('.') | |
36 | ||
37 | DL_URL = 'http://sourceforge.net/tracker/download.php?group_id=%(group_id)s&atid=%(atid)s&aid=%(aid)s' | |
38 | ||
39 | def get_url(aid): | |
40 | """ so basically we have to jump through hoops, given an artifact id, to | |
41 | figure what the URL should be to access that artifact, and hence any | |
42 | attached files.""" | |
43 | # first we hit this URL... | |
44 | conn = httplib.HTTPConnection("sourceforge.net") | |
45 | conn.request("GET", "/support/tracker.php?aid=%s"%aid) | |
46 | response = conn.getresponse() | |
47 | # which should respond with a redirect to the correct url which has the | |
48 | # magic "group_id" and "atid" values in it that we need | |
49 | assert response.status == 302, 'response code was %s'%response.status | |
50 | location = response.getheader('location') | |
51 | query = urlparse.urlparse(response.getheader('location'))[-2] | |
52 | info = dict([param.split('=') for param in query.split('&')]) | |
53 | return DL_URL%info | |
54 | ||
55 | def fetch_files(xml_file, file_dir): | |
56 | """ Fetch files referenced in the xml_file into the dir file_dir. """ | |
57 | root = ElementTree.parse(xml_file).getroot() | |
58 | to_fetch = set() | |
59 | deleted = set() | |
60 | for artifact in root.find('artifacts'): | |
61 | for field in artifact.findall('field'): | |
62 | if field.get('name') == 'artifact_id': | |
63 | aid = field.text | |
64 | for field in artifact.findall('field'): | |
65 | if field.get('name') != 'artifact_history': continue | |
66 | for event in field.findall('history'): | |
67 | d = {} | |
68 | for field in event.findall('field'): | |
69 | d[field.get('name')] = field.text | |
70 | if d['field_name'] == 'File Added': | |
71 | fid = d['old_value'].split(':')[0] | |
72 | to_fetch.add((aid, fid)) | |
73 | if d['field_name'] == 'File Deleted': | |
74 | fid = d['old_value'].split(':')[0] | |
75 | deleted.add((aid, fid)) | |
76 | to_fetch = to_fetch - deleted | |
77 | ||
78 | got = set(os.listdir(file_dir)) | |
79 | to_fetch = to_fetch - got | |
80 | ||
81 | # load cached urls (sigh) | |
82 | urls = {} | |
83 | if os.path.exists(os.path.join(file_dir, 'urls.txt')): | |
84 | for line in open(os.path.join(file_dir, 'urls.txt')): | |
85 | aid, url = line.strip().split() | |
86 | urls[aid] = url | |
87 | ||
88 | for aid, fid in support.Progress('Fetching files', list(to_fetch)): | |
89 | if fid in got: continue | |
90 | if not urls.has_key(aid): | |
91 | urls[aid] = get_url(aid) | |
92 | f = open(os.path.join(file_dir, 'urls.txt'), 'a') | |
93 | f.write('%s %s\n'%(aid, urls[aid])) | |
94 | f.close() | |
95 | url = urls[aid] + '&file_id=' + fid | |
96 | f = urllib2.urlopen(url) | |
97 | data = f.read() | |
98 | n = open(os.path.join(file_dir, fid), 'w') | |
99 | n.write(data) | |
100 | f.close() | |
101 | n.close() | |
102 | ||
103 | def import_xml(tracker_home, xml_file, file_dir): | |
104 | """ Generate Roundup tracker import files based on the tracker schema, | |
105 | sf.net xml export and downloaded files from sf.net. """ | |
106 | tracker = instance.open(tracker_home) | |
107 | db = tracker.open('admin') | |
108 | ||
109 | resolved = db.status.lookup('resolved') | |
110 | unread = db.status.lookup('unread') | |
111 | chatting = db.status.lookup('unread') | |
112 | critical = db.priority.lookup('critical') | |
113 | urgent = db.priority.lookup('urgent') | |
114 | bug = db.priority.lookup('bug') | |
115 | feature = db.priority.lookup('feature') | |
116 | wish = db.priority.lookup('wish') | |
117 | adminuid = db.user.lookup('admin') | |
118 | anonuid = db.user.lookup('anonymous') | |
119 | ||
120 | root = ElementTree.parse(xml_file).getroot() | |
121 | ||
122 | def to_date(ts): | |
123 | return date.Date(time.gmtime(float(ts))) | |
124 | ||
125 | # parse out the XML | |
126 | artifacts = [] | |
127 | categories = set() | |
128 | users = set() | |
129 | add_files = set() | |
130 | remove_files = set() | |
131 | for artifact in root.find('artifacts'): | |
132 | d = {} | |
133 | op = {} | |
134 | artifacts.append(d) | |
135 | for field in artifact.findall('field'): | |
136 | name = field.get('name') | |
137 | if name == 'artifact_messages': | |
138 | for message in field.findall('message'): | |
139 | l = d.setdefault('messages', []) | |
140 | m = {} | |
141 | l.append(m) | |
142 | for field in message.findall('field'): | |
143 | name = field.get('name') | |
144 | if name == 'adddate': | |
145 | m[name] = to_date(field.text) | |
146 | else: | |
147 | m[name] = field.text | |
148 | if name == 'user_name': users.add(field.text) | |
149 | elif name == 'artifact_history': | |
150 | for event in field.findall('history'): | |
151 | l = d.setdefault('history', []) | |
152 | e = {} | |
153 | l.append(e) | |
154 | for field in event.findall('field'): | |
155 | name = field.get('name') | |
156 | if name == 'entrydate': | |
157 | e[name] = to_date(field.text) | |
158 | else: | |
159 | e[name] = field.text | |
160 | if name == 'mod_by': users.add(field.text) | |
161 | if e['field_name'] == 'File Added': | |
162 | add_files.add(e['old_value'].split(':')[0]) | |
163 | elif e['field_name'] == 'File Deleted': | |
164 | remove_files.add(e['old_value'].split(':')[0]) | |
165 | elif name == 'details': | |
166 | op['body'] = field.text | |
167 | elif name == 'submitted_by': | |
168 | op['user_name'] = field.text | |
169 | d[name] = field.text | |
170 | users.add(field.text) | |
171 | elif name == 'open_date': | |
172 | thedate = to_date(field.text) | |
173 | op['adddate'] = thedate | |
174 | d[name] = thedate | |
175 | else: | |
176 | d[name] = field.text | |
177 | ||
178 | categories.add(d['category']) | |
179 | ||
180 | if op.has_key('body'): | |
181 | l = d.setdefault('messages', []) | |
182 | l.insert(0, op) | |
183 | ||
184 | add_files -= remove_files | |
185 | ||
186 | # create users | |
187 | userd = {'nobody': '2'} | |
188 | users.remove('nobody') | |
189 | data = [ | |
190 | {'id': '1', 'username': 'admin', 'password': password.Password('admin'), | |
191 | 'roles': 'Admin', 'address': 'richard@python.org'}, | |
192 | {'id': '2', 'username': 'anonymous', 'roles': 'Anonymous'}, | |
193 | ] | |
194 | for n, user in enumerate(list(users)): | |
195 | userd[user] = n+3 | |
196 | data.append({'id': str(n+3), 'username': user, 'roles': 'User', | |
197 | 'address': '%s@users.sourceforge.net'%user}) | |
198 | write_csv(db.user, data) | |
199 | users=userd | |
200 | ||
201 | # create categories | |
202 | categoryd = {'None': None} | |
203 | categories.remove('None') | |
204 | data = [] | |
205 | for n, category in enumerate(list(categories)): | |
206 | categoryd[category] = n | |
207 | data.append({'id': str(n), 'name': category}) | |
208 | write_csv(db.keyword, data) | |
209 | categories = categoryd | |
210 | ||
211 | # create issues | |
212 | issue_data = [] | |
213 | file_data = [] | |
214 | message_data = [] | |
215 | issue_journal = [] | |
216 | message_id = 0 | |
217 | for artifact in artifacts: | |
218 | d = {} | |
219 | d['id'] = artifact['artifact_id'] | |
220 | d['title'] = artifact['summary'] | |
221 | d['assignedto'] = users[artifact['assigned_to']] | |
222 | if d['assignedto'] == '2': | |
223 | d['assignedto'] = None | |
224 | d['creation'] = artifact['open_date'] | |
225 | activity = artifact['open_date'] | |
226 | d['creator'] = users[artifact['submitted_by']] | |
227 | actor = d['creator'] | |
228 | if categories[artifact['category']]: | |
229 | d['keyword'] = [categories[artifact['category']]] | |
230 | issue_journal.append(( | |
231 | d['id'], d['creation'].get_tuple(), d['creator'], "'create'", {} | |
232 | )) | |
233 | ||
234 | p = int(artifact['priority']) | |
235 | if artifact['artifact_type'] == 'Feature Requests': | |
236 | if p > 3: | |
237 | d['priority'] = feature | |
238 | else: | |
239 | d['priority'] = wish | |
240 | else: | |
241 | if p > 7: | |
242 | d['priority'] = critical | |
243 | elif p > 5: | |
244 | d['priority'] = urgent | |
245 | elif p > 3: | |
246 | d['priority'] = bug | |
247 | else: | |
248 | d['priority'] = feature | |
249 | ||
250 | s = artifact['status'] | |
251 | if s == 'Closed': | |
252 | d['status'] = resolved | |
253 | elif s == 'Deleted': | |
254 | d['status'] = resolved | |
255 | d['is retired'] = True | |
256 | else: | |
257 | d['status'] = unread | |
258 | ||
259 | nosy = set() | |
260 | for message in artifact.get('messages', []): | |
261 | authid = users[message['user_name']] | |
262 | if not message['body']: continue | |
263 | body = convert_message(message['body'], message_id) | |
264 | if not body: continue | |
265 | m = {'content': body, 'author': authid, | |
266 | 'date': message['adddate'], | |
267 | 'creation': message['adddate'], } | |
268 | message_data.append(m) | |
269 | if authid not in (None, '2'): | |
270 | nosy.add(authid) | |
271 | activity = message['adddate'] | |
272 | actor = authid | |
273 | if d['status'] == unread: | |
274 | d['status'] = chatting | |
275 | ||
276 | # add import message | |
277 | m = {'content': 'IMPORT FROM SOURCEFORGE', 'author': '1', | |
278 | 'date': today, 'creation': today} | |
279 | message_data.append(m) | |
280 | ||
281 | # sort messages and assign ids | |
282 | d['messages'] = [] | |
283 | message_data.sort(lambda a,b:cmp(a['date'],b['date'])) | |
284 | for message in message_data: | |
285 | message_id += 1 | |
286 | message['id'] = str(message_id) | |
287 | d['messages'].append(message_id) | |
288 | ||
289 | d['nosy'] = list(nosy) | |
290 | ||
291 | files = [] | |
292 | for event in artifact.get('history', []): | |
293 | if event['field_name'] == 'File Added': | |
294 | fid, name = event['old_value'].split(':', 1) | |
295 | if fid in add_files: | |
296 | files.append(fid) | |
297 | name = name.strip() | |
298 | try: | |
299 | f = open(os.path.join(file_dir, fid)) | |
300 | content = f.read() | |
301 | f.close() | |
302 | except: | |
303 | content = 'content missing' | |
304 | file_data.append({ | |
305 | 'id': fid, | |
306 | 'creation': event['entrydate'], | |
307 | 'creator': users[event['mod_by']], | |
308 | 'name': name, | |
309 | 'type': mimetypes.guess_type(name)[0], | |
310 | 'content': content, | |
311 | }) | |
312 | continue | |
313 | elif event['field_name'] == 'close_date': | |
314 | action = "'set'" | |
315 | info = { 'status': unread } | |
316 | elif event['field_name'] == 'summary': | |
317 | action = "'set'" | |
318 | info = { 'title': event['old_value'] } | |
319 | else: | |
320 | # not an interesting / translatable event | |
321 | continue | |
322 | row = [ d['id'], event['entrydate'].get_tuple(), | |
323 | users[event['mod_by']], action, info ] | |
324 | if event['entrydate'] > activity: | |
325 | activity = event['entrydate'] | |
326 | issue_journal.append(row) | |
327 | d['files'] = files | |
328 | ||
329 | d['activity'] = activity | |
330 | d['actor'] = actor | |
331 | issue_data.append(d) | |
332 | ||
333 | write_csv(db.issue, issue_data) | |
334 | write_csv(db.msg, message_data) | |
335 | write_csv(db.file, file_data) | |
336 | ||
337 | f = open('/tmp/imported/issue-journals.csv', 'w') | |
338 | writer = csv.writer(f, colon_separated) | |
339 | writer.writerows(issue_journal) | |
340 | f.close() | |
341 | ||
342 | def convert_message(content, id): | |
343 | """ Strip off the useless sf message header crap """ | |
344 | if content[:14] == 'Logged In: YES': | |
345 | return '\n'.join(content.splitlines()[3:]).strip() | |
346 | return content | |
347 | ||
348 | class colon_separated(csv.excel): | |
349 | delimiter = ':' | |
350 | ||
351 | def write_csv(klass, data): | |
352 | props = klass.getprops() | |
353 | if not os.path.exists('/tmp/imported'): | |
354 | os.mkdir('/tmp/imported') | |
355 | f = open('/tmp/imported/%s.csv'%klass.classname, 'w') | |
356 | writer = csv.writer(f, colon_separated) | |
357 | propnames = klass.export_propnames() | |
358 | propnames.append('is retired') | |
359 | writer.writerow(propnames) | |
360 | for entry in data: | |
361 | row = [] | |
362 | for name in propnames: | |
363 | if name == 'is retired': | |
364 | continue | |
365 | prop = props[name] | |
366 | if entry.has_key(name): | |
367 | if isinstance(prop, hyperdb.Date) or \ | |
368 | isinstance(prop, hyperdb.Interval): | |
369 | row.append(repr(entry[name].get_tuple())) | |
370 | elif isinstance(prop, hyperdb.Password): | |
371 | row.append(repr(str(entry[name]))) | |
372 | else: | |
373 | row.append(repr(entry[name])) | |
374 | elif isinstance(prop, hyperdb.Multilink): | |
375 | row.append('[]') | |
376 | elif name in ('creator', 'actor'): | |
377 | row.append("'1'") | |
378 | elif name in ('created', 'activity'): | |
379 | row.append(repr(today.get_tuple())) | |
380 | else: | |
381 | row.append('None') | |
382 | row.append(entry.get('is retired', False)) | |
383 | writer.writerow(row) | |
384 | ||
385 | if isinstance(klass, hyperdb.FileClass) and entry.get('content'): | |
386 | fname = klass.exportFilename('/tmp/imported/', entry['id']) | |
387 | support.ensureParentsExist(fname) | |
388 | c = open(fname, 'w') | |
389 | if isinstance(entry['content'], unicode): | |
390 | c.write(entry['content'].encode('utf8')) | |
391 | else: | |
392 | c.write(entry['content']) | |
393 | c.close() | |
394 | ||
395 | f.close() | |
396 | f = open('/tmp/imported/%s-journals.csv'%klass.classname, 'w') | |
397 | f.close() | |
398 | ||
399 | if __name__ == '__main__': | |
400 | if sys.argv[1] == 'import': | |
401 | import_xml(*sys.argv[2:]) | |
402 | elif sys.argv[1] == 'files': | |
403 | fetch_files(*sys.argv[2:]) | |
404 |