1 """ Import tracker data from Sourceforge.NET
3 This script needs four steps to work:
5 1. Export the project XML data using the admin web interface at sf.net
6 2. Run the file fetching (these are not included in the XML):
8 import_sf.py files <path to XML> <path to files dir>
10 this will place all the downloaded files in the files dir by file id.
11 3. Convert the sf.net XML to Roundup "export" format:
13 import_sf.py import <tracker home> <path to XML> <path to files dir>
15 this will generate a directory "/tmp/imported" which contains the
16 data to be imported into a Roundup tracker.
19 roundup-admin -i <tracker home> import /tmp/imported
24 import sys
, os
, csv
, time
, urllib2
, httplib
, mimetypes
, urlparse
25 # Python 2.3 ... 2.6 compatibility:
26 from roundup
.anypy
.sets_
import set
29 import cElementTree
as ElementTree
31 from elementtree
import ElementTree
33 from roundup
import instance
, hyperdb
, date
, support
, password
35 today
= date
.Date('.')
37 DL_URL
= 'http://sourceforge.net/tracker/download.php?group_id=%(group_id)s&atid=%(atid)s&aid=%(aid)s'
40 """ so basically we have to jump through hoops, given an artifact id, to
41 figure what the URL should be to access that artifact, and hence any
43 # first we hit this URL...
44 conn
= httplib
.HTTPConnection("sourceforge.net")
45 conn
.request("GET", "/support/tracker.php?aid=%s"%aid
)
46 response
= conn
.getresponse()
47 # which should respond with a redirect to the correct url which has the
48 # magic "group_id" and "atid" values in it that we need
49 assert response
.status
== 302, 'response code was %s'%response
.status
50 location
= response
.getheader('location')
51 query
= urlparse
.urlparse(response
.getheader('location'))[-2]
52 info
= dict([param
.split('=') for param
in query
.split('&')])
55 def fetch_files(xml_file
, file_dir
):
56 """ Fetch files referenced in the xml_file into the dir file_dir. """
57 root
= ElementTree
.parse(xml_file
).getroot()
60 for artifact
in root
.find('artifacts'):
61 for field
in artifact
.findall('field'):
62 if field
.get('name') == 'artifact_id':
64 for field
in artifact
.findall('field'):
65 if field
.get('name') != 'artifact_history': continue
66 for event
in field
.findall('history'):
68 for field
in event
.findall('field'):
69 d
[field
.get('name')] = field
.text
70 if d
['field_name'] == 'File Added':
71 fid
= d
['old_value'].split(':')[0]
72 to_fetch
.add((aid
, fid
))
73 if d
['field_name'] == 'File Deleted':
74 fid
= d
['old_value'].split(':')[0]
75 deleted
.add((aid
, fid
))
76 to_fetch
= to_fetch
- deleted
78 got
= set(os
.listdir(file_dir
))
79 to_fetch
= to_fetch
- got
81 # load cached urls (sigh)
83 if os
.path
.exists(os
.path
.join(file_dir
, 'urls.txt')):
84 for line
in open(os
.path
.join(file_dir
, 'urls.txt')):
85 aid
, url
= line
.strip().split()
88 for aid
, fid
in support
.Progress('Fetching files', list(to_fetch
)):
89 if fid
in got
: continue
90 if not urls
.has_key(aid
):
91 urls
[aid
] = get_url(aid
)
92 f
= open(os
.path
.join(file_dir
, 'urls.txt'), 'a')
93 f
.write('%s %s\n'%(aid
, urls
[aid
]))
95 url
= urls
[aid
] + '&file_id=' + fid
96 f
= urllib2
.urlopen(url
)
98 n
= open(os
.path
.join(file_dir
, fid
), 'w')
103 def import_xml(tracker_home
, xml_file
, file_dir
):
104 """ Generate Roundup tracker import files based on the tracker schema,
105 sf.net xml export and downloaded files from sf.net. """
106 tracker
= instance
.open(tracker_home
)
107 db
= tracker
.open('admin')
109 resolved
= db
.status
.lookup('resolved')
110 unread
= db
.status
.lookup('unread')
111 chatting
= db
.status
.lookup('unread')
112 critical
= db
.priority
.lookup('critical')
113 urgent
= db
.priority
.lookup('urgent')
114 bug
= db
.priority
.lookup('bug')
115 feature
= db
.priority
.lookup('feature')
116 wish
= db
.priority
.lookup('wish')
117 adminuid
= db
.user
.lookup('admin')
118 anonuid
= db
.user
.lookup('anonymous')
120 root
= ElementTree
.parse(xml_file
).getroot()
123 return date
.Date(time
.gmtime(float(ts
)))
131 for artifact
in root
.find('artifacts'):
135 for field
in artifact
.findall('field'):
136 name
= field
.get('name')
137 if name
== 'artifact_messages':
138 for message
in field
.findall('message'):
139 l
= d
.setdefault('messages', [])
142 for field
in message
.findall('field'):
143 name
= field
.get('name')
144 if name
== 'adddate':
145 m
[name
] = to_date(field
.text
)
148 if name
== 'user_name': users
.add(field
.text
)
149 elif name
== 'artifact_history':
150 for event
in field
.findall('history'):
151 l
= d
.setdefault('history', [])
154 for field
in event
.findall('field'):
155 name
= field
.get('name')
156 if name
== 'entrydate':
157 e
[name
] = to_date(field
.text
)
160 if name
== 'mod_by': users
.add(field
.text
)
161 if e
['field_name'] == 'File Added':
162 add_files
.add(e
['old_value'].split(':')[0])
163 elif e
['field_name'] == 'File Deleted':
164 remove_files
.add(e
['old_value'].split(':')[0])
165 elif name
== 'details':
166 op
['body'] = field
.text
167 elif name
== 'submitted_by':
168 op
['user_name'] = field
.text
170 users
.add(field
.text
)
171 elif name
== 'open_date':
172 thedate
= to_date(field
.text
)
173 op
['adddate'] = thedate
178 categories
.add(d
['category'])
180 if op
.has_key('body'):
181 l
= d
.setdefault('messages', [])
184 add_files
-= remove_files
187 userd
= {'nobody': '2'}
188 users
.remove('nobody')
190 {'id': '1', 'username': 'admin', 'password': password
.Password('admin'),
191 'roles': 'Admin', 'address': 'richard@python.org'},
192 {'id': '2', 'username': 'anonymous', 'roles': 'Anonymous'},
194 for n
, user
in enumerate(list(users
)):
196 data
.append({'id': str(n
+3), 'username': user
, 'roles': 'User',
197 'address': '%s@users.sourceforge.net'%user
})
198 write_csv(db
.user
, data
)
202 categoryd
= {'None': None}
203 categories
.remove('None')
205 for n
, category
in enumerate(list(categories
)):
206 categoryd
[category
] = n
207 data
.append({'id': str(n
), 'name': category
})
208 write_csv(db
.keyword
, data
)
209 categories
= categoryd
217 for artifact
in artifacts
:
219 d
['id'] = artifact
['artifact_id']
220 d
['title'] = artifact
['summary']
221 d
['assignedto'] = users
[artifact
['assigned_to']]
222 if d
['assignedto'] == '2':
223 d
['assignedto'] = None
224 d
['creation'] = artifact
['open_date']
225 activity
= artifact
['open_date']
226 d
['creator'] = users
[artifact
['submitted_by']]
228 if categories
[artifact
['category']]:
229 d
['keyword'] = [categories
[artifact
['category']]]
230 issue_journal
.append((
231 d
['id'], d
['creation'].get_tuple(), d
['creator'], "'create'", {}
234 p
= int(artifact
['priority'])
235 if artifact
['artifact_type'] == 'Feature Requests':
237 d
['priority'] = feature
242 d
['priority'] = critical
244 d
['priority'] = urgent
248 d
['priority'] = feature
250 s
= artifact
['status']
252 d
['status'] = resolved
254 d
['status'] = resolved
255 d
['is retired'] = True
260 for message
in artifact
.get('messages', []):
261 authid
= users
[message
['user_name']]
262 if not message
['body']: continue
263 body
= convert_message(message
['body'], message_id
)
264 if not body
: continue
265 m
= {'content': body
, 'author': authid
,
266 'date': message
['adddate'],
267 'creation': message
['adddate'], }
268 message_data
.append(m
)
269 if authid
not in (None, '2'):
271 activity
= message
['adddate']
273 if d
['status'] == unread
:
274 d
['status'] = chatting
277 m
= {'content': 'IMPORT FROM SOURCEFORGE', 'author': '1',
278 'date': today
, 'creation': today
}
279 message_data
.append(m
)
281 # sort messages and assign ids
283 message_data
.sort(lambda a
,b
:cmp(a
['date'],b
['date']))
284 for message
in message_data
:
286 message
['id'] = str(message_id
)
287 d
['messages'].append(message_id
)
289 d
['nosy'] = list(nosy
)
292 for event
in artifact
.get('history', []):
293 if event
['field_name'] == 'File Added':
294 fid
, name
= event
['old_value'].split(':', 1)
299 f
= open(os
.path
.join(file_dir
, fid
))
303 content
= 'content missing'
306 'creation': event
['entrydate'],
307 'creator': users
[event
['mod_by']],
309 'type': mimetypes
.guess_type(name
)[0],
313 elif event
['field_name'] == 'close_date':
315 info
= { 'status': unread
}
316 elif event
['field_name'] == 'summary':
318 info
= { 'title': event
['old_value'] }
320 # not an interesting / translatable event
322 row
= [ d
['id'], event
['entrydate'].get_tuple(),
323 users
[event
['mod_by']], action
, info
]
324 if event
['entrydate'] > activity
:
325 activity
= event
['entrydate']
326 issue_journal
.append(row
)
329 d
['activity'] = activity
333 write_csv(db
.issue
, issue_data
)
334 write_csv(db
.msg
, message_data
)
335 write_csv(db
.file, file_data
)
337 f
= open('/tmp/imported/issue-journals.csv', 'w')
338 writer
= csv
.writer(f
, colon_separated
)
339 writer
.writerows(issue_journal
)
342 def convert_message(content
, id):
343 """ Strip off the useless sf message header crap """
344 if content
[:14] == 'Logged In: YES':
345 return '\n'.join(content
.splitlines()[3:]).strip()
348 class colon_separated(csv
.excel
):
351 def write_csv(klass
, data
):
352 props
= klass
.getprops()
353 if not os
.path
.exists('/tmp/imported'):
354 os
.mkdir('/tmp/imported')
355 f
= open('/tmp/imported/%s.csv'%klass
.classname
, 'w')
356 writer
= csv
.writer(f
, colon_separated
)
357 propnames
= klass
.export_propnames()
358 propnames
.append('is retired')
359 writer
.writerow(propnames
)
362 for name
in propnames
:
363 if name
== 'is retired':
366 if entry
.has_key(name
):
367 if isinstance(prop
, hyperdb
.Date
) or \
368 isinstance(prop
, hyperdb
.Interval
):
369 row
.append(repr(entry
[name
].get_tuple()))
370 elif isinstance(prop
, hyperdb
.Password
):
371 row
.append(repr(str(entry
[name
])))
373 row
.append(repr(entry
[name
]))
374 elif isinstance(prop
, hyperdb
.Multilink
):
376 elif name
in ('creator', 'actor'):
378 elif name
in ('created', 'activity'):
379 row
.append(repr(today
.get_tuple()))
382 row
.append(entry
.get('is retired', False))
385 if isinstance(klass
, hyperdb
.FileClass
) and entry
.get('content'):
386 fname
= klass
.exportFilename('/tmp/imported/', entry
['id'])
387 support
.ensureParentsExist(fname
)
389 if isinstance(entry
['content'], unicode):
390 c
.write(entry
['content'].encode('utf8'))
392 c
.write(entry
['content'])
396 f
= open('/tmp/imported/%s-journals.csv'%klass
.classname
, 'w')
399 if __name__
== '__main__':
400 if sys
.argv
[1] == 'import':
401 import_xml(*sys
.argv
[2:])
402 elif sys
.argv
[1] == 'files':
403 fetch_files(*sys
.argv
[2:])