Update des copyright
[ngomwill.git] / convert_vcf.py
1 #!/usr/bin/python2.5
2 # coding: utf-8
3 """
4 Extrait les contacts de Sogo (Vcf) et les transforme au format .csv de Microsoft
5 Copyright : Agence universitaire de la Francophonie
6 Licence : GNU General Public Licence, version 2
7 Auteur : william.ngom@auf.org
8 Date de création : Août 2017
9
10 Initial project
11 VcfToCsvConverter v0.3 - Converts VCF/VCARD files into CSV
12 Copyright (C) 2009 Petar Strinic (http://petarstrinic.com)
13 Contributor -- Dave Dartt
14 This program is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>
24
25 """
26
27 import os
28 import re
29 import sys
30 import shutil
31 import glob
32 import codecs
33 import mysql.connector
34 from optparse import OptionParser, Option
35
36 config = {
37 'user': 'root',
38 'password': '',
39 'host': '127.0.0.1',
40 'database': 'auf',
41 'use_unicode' : 'True',
42 'charset' : 'utf8'
43 }
44
45 class MyOption(Option):
46 ACTIONS = Option.ACTIONS + ("extend",)
47 STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
48 TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)
49 ALWAYS_TYPED_ACTIONS = Option.ALWAYS_TYPED_ACTIONS + ("extend",)
50 def take_action(self, action, dest, opt, value, values, parser):
51 if action == "extend":
52 lvalue = value.split(",")
53 values.ensure_value(dest, []).extend(lvalue)
54 else:
55 Option.take_action(self, action, dest, opt, value, values, parser)
56
57 class VcfToCsvConverter:
58 def __outputQuote(self):
59 if self.quote == True:
60 self.output += '"'
61
62 def __output(self, text):
63 self.__outputQuote();
64 self.output += text
65 self.__outputQuote();
66 self.output += self.delimiter
67
68 def __trace(self, text):
69 if self.trace == True:
70 print (text)
71
72 def __resetRow(self):
73 self.addressCount = { 'HOME' : 1, 'WORK' : 1 }
74 self.telephoneCount = { 'HOME PHONE' : 1, 'WORK PHONE' : 1, 'MOBILE PHONE' : 1, 'HOME FAX' : 1, 'WORK FAX' : 1 }
75 self.emailCount = { 'HOME' : 1, 'WORK' : 1 }
76 array = {}
77 for k in self.columns:
78 array[ k ] = '';
79 return array
80
81 def __endLine(self):
82 for k in self.columns:
83 try:
84 self.__output(self.data[ k ])
85 except KeyError:
86 self.output += self.delimiter
87 self.output += "\r\n"
88 self.data = self.__resetRow()
89
90 def __getfilenames(self):
91 try:
92 if os.path.isdir(self.inputPath):
93 self.inputFileArray = glob.glob(os.path.join(self.inputPath, '*.vcf'))
94 else:
95 print ("Invalid path please try again")
96 sys.exit(2)
97 except IOError:
98 print ("Directory is empty or does not contain any vcard format files.")
99 sys.exit(2)
100
101 def __parseFile(self):
102 self.__getfilenames()
103 outFile = codecs.open(self.outputFile, mode='w', encoding='utf-8')
104 outFile.write(u'\ufeff')
105
106 for NewFileName in self.inputFileArray:
107 try:
108 if self.verbose:
109 print ("Processing .... %s\n" % NewFileName)
110
111 inFile = codecs.open(NewFileName, mode='r', encoding='utf-8')
112 theLine = inFile.readline()
113 for theLine in inFile:
114 self.__parseLine(theLine)
115
116 inFile.close()
117 except IOError:
118 print ("error opening file during read operation via path: %s\n" % NewFileName)
119 sys.exit(2)
120
121 outFile.write(self.output)
122 outFile.close()
123
124 def __parseLine(self, theLine):
125 theLine = theLine.strip()
126 if len(theLine) < 1:
127 pass
128 elif re.match('^BEGIN:VCARD', theLine):
129 pass
130 elif re.match('^END:VCARD', theLine):
131 self.__endLine()
132 else:
133 self.__processLine(theLine.split(":"))
134
135 def __processLine(self, pieces):
136
137 pre = pieces[0].split(";")
138 if re.match('item.*', pre[0].split(".")[0], re.I) != None:
139 try:
140 pre[0] = pre[0].split(".")[1]
141 except IndexError:
142 self.__trace("item pre0 split: %s " % pre[0].split("."));
143
144 if pre[0] == 'VERSION':
145 pass
146 elif pre[0] == 'N':
147 self.__processName(pre, pieces[1])
148 elif pre[0] == 'FN':
149 pass
150 elif pre[0] == 'NICKNAME':
151 self.__processSingleValue('NICKNAME', pre, pieces[1])
152 elif pre[0] == 'TITLE':
153 self.__processSingleValue('TITLE', pre, pieces[1])
154 elif pre[0] == 'BDAY':
155 self.__processSingleValue('BIRTHDAY', pre, pieces[1])
156 elif pre[0] == 'ORG':
157 self.__processOrganization(pre, pieces[1])
158 elif pre[0] == 'ROLE':
159 self.__processSingleValue('ROLE', pre, pieces[1])
160 elif pre[0] == 'GEO':
161 self.__processSingleValue('GEOCODE', pre, pieces[1])
162 elif pre[0] == 'MAILER':
163 pass
164 elif pre[0] == 'TZ':
165 pass
166 elif pre[0] == 'ADR':
167 self.__processAddress(pre, pieces[1])
168 elif pre[0] == 'LOGO':
169 pass
170 elif pre[0] == 'PHOTO':
171 pass
172 elif pre[0] == 'TEL':
173 self.__processTelephone(pre, pieces[1:])
174 elif pre[0] == 'EMAIL':
175 self.__processEmail(pre, pieces[1:])
176 elif pre[0] == 'AGENT':
177 pass
178 elif pre[0] == 'NOTE':
179 self.__processSingleValue('NOTE', pre, pieces[1])
180 elif pre[0] == 'REV':
181 pass
182 elif pre[0] == 'URL':
183 self.__processSingleValue('URL', pre, ":".join(pieces[1:]))
184 elif pre[0] == 'UID':
185 pass
186 elif pre[0] == 'X-AIM':
187 pass
188 elif pre[0] == 'X-ICQ':
189 pass
190 elif pre[0] == 'X-JABBER':
191 pass
192 elif pre[0] == 'X-MSN':
193 pass
194 elif pre[0] == 'X-YAHOO':
195 pass
196 elif pre[0] == 'X-SKYPE-USERNAME':
197 pass
198 elif pre[0] == 'X-GADUGADU':
199 pass
200 elif pre[0] == 'X-GROUPWISE':
201 pass
202
203 def __processEmail(self, pre, p):
204 self.__trace("__processEmail: %s %s" % (pre, p))
205 hwm = "WORK"
206 #if (re.search('work',(",").join(pre[1:]), re.I) != None) :
207 # hwm = "WORK"
208 self.__trace("__email type: %s" % hwm)
209 if self.emailCount[hwm] <= self.maxEmails:
210 self.data["%s EMAIL %s" % (hwm, self.emailCount[hwm])] = p[0]
211 self.__trace("__email %s %s: %s" % (hwm, self.emailCount[hwm], p[0]))
212
213 self.emailCount[hwm] += 1
214 else:
215 self.data['WARNING'] += ("Maximum number of %s emails reached, discarded: %s. " % (hwm, p[0]))
216
217 def __processTelephone(self, pre, p):
218 self.__trace("__processTelephone: %s %s" % (pre, p))
219 telephoneType = "PHONE"
220 hwm = "HOME"
221 if re.search('work',(",").join(pre[1:]), re.I) != None:
222 hwm = "WORK"
223 elif re.search('cell',(",").join(pre[1:]), re.I) != None:
224 hwm = "MOBILE"
225
226 if re.search('fax',(",").join(pre[1:]), re.I) != None:
227 telephoneType = "FAX"
228
229 self.__trace("_telephone number = %s" % p[0])
230 self.__trace("_telephone type: %s" % "%s %s %s" % (hwm, telephoneType, self.telephoneCount["%s %s" % (hwm, telephoneType)]))
231
232 if self.telephoneCount[("%s PHONE" % hwm)] <= self.maxTelephones:
233 self.data["%s %s %s" % (hwm, telephoneType, self.telephoneCount["%s %s" % (hwm, telephoneType)])] = p[0]
234 self.telephoneCount["%s %s" % (hwm, telephoneType)] += 1
235 else:
236 self.data['WARNING'] += ("Maximum number of %s telephones reached, discarded: %s. " % (hwm, p[0]))
237
238 def __processAddress(self, pre, p):
239 self.__trace("__processAddress: %s %s" % (pre, p))
240 self.__trace("_ADDRESS: %s" % (p))
241 try:
242 (a, b, address, city, state, zip, country ) = p.split(";")
243 except ValueError:
244 (a, b, address, city, state, zip ) = p.split(";")
245 country = '';
246
247 addressType = "HOME"
248 try:
249 (a,addressTypes) = pre[1].split("=");
250 self.__trace("_addressTypes: %s " % addressTypes)
251 if "work" in (addressTypes.lower()).split(","):
252 self.__trace("_work address");
253 addressType = "WORK"
254
255 except ValueError:
256 self.__trace("_home address")
257
258 self.__trace(self.addressCount);
259 self.__trace(self.addressCount[addressType]);
260
261 if self.addressCount[addressType] <= self.maxAddresses:
262 self.data["%s ADDRESS %s" % (addressType, self.addressCount[addressType])] = address
263 self.data["%s CITY %s" % (addressType, self.addressCount[addressType])] = city
264 self.data["%s STATE %s" % (addressType, self.addressCount[addressType])] = state
265 self.data["%s ZIP %s" % (addressType, self.addressCount[addressType])] = zip
266 self.data["%s COUNTRY %s" % (addressType, self.addressCount[addressType])] = country
267 self.addressCount[addressType] += 1
268 else:
269 self.data['WARNING'] += ("Maximum number of %s addresses reached, discarded: %s. " % (addressType, p))
270
271 def __processSingleValue(self, valueType, pre, p):
272 self.__trace("__processSingleValue: %s %s %s" % (valueType, pre, p))
273 self.__trace("_%s: %s" % (valueType,p))
274 if valueType == 'FORMATTED NAME':
275 p = p.replace('\,', ' ')
276 self.data[valueType] = p
277 if valueType == 'ORGANIZATION':
278 self.data[valueType] = p.replace(';', '')
279 else:
280 self.data[valueType] = p
281
282 def __processName(self, pre, p):
283 self.__trace("__processName: %s %s" % (pre, p))
284 p = p.replace('\,', ';')
285 part = len(p.split(";"))
286 ln=fn=mi=pr=po= ''
287 if part==1:
288 ln = p.split(";")
289 elif part==2:
290 ( ln, fn ) = p.split(";")
291 elif part==3:
292 ( ln, fn, mi) = p.split(";")
293 elif part==4:
294 ( ln, fn, mi, pr) = p.split(";")
295 elif part>=5:
296 ( ln, fn, mi, pr, po ) = p.split(";")
297
298 self.data['NAME PREFIX'] = pr
299 self.data['NAME FIRST'] = fn
300 self.data['NAME MIDDLE'] = mi
301 self.data['NAME LAST'] = ln
302 self.data['NAME POSTFIX'] = po
303
304 def __processOrganization(self, pre, p):
305 part = len(p.split(";"))
306 company=office=departement= ''
307 if part==1:
308 company = p.split(";")
309 elif part==2:
310 ( company, office ) = p.split(";")
311 elif part>=3:
312 ( company, office, departement) = p.split(";", 2)
313
314 self.data['ORGANIZATION'] = company
315 self.data['OFFICE LOCATION'] = office
316 self.data['DEPARTEMENT'] = departement
317
318 def __init__(self, inputFilePath, outputFileName, delimiter, quote, trace, verbose):
319 self.trace = trace
320 self.addressCount = { 'HOME' : 1, 'WORK' : 1 }
321 self.telephoneCount = { 'HOME PHONE' : 1, 'WORK PHONE' : 1, 'MOBILE PHONE' : 1, 'HOME FAX' : 1, 'WORK FAX' : 1 }
322 self.emailCount = { 'HOME' : 1, 'WORK' : 1 }
323 self.data = {}
324 self.verbose = verbose
325 self.quote = quote
326 self.delimiter = delimiter
327 self.output = ''
328 self.inputPath = inputFilePath
329 self.inputFileArray = None
330 self.outputFile = outputFileName
331 self.maxAddresses = 3
332 self.maxTelephones = 3
333 self.maxEmails = 3
334 self.columns = (
335 'NAME PREFIX', 'NAME FIRST', 'NAME MIDDLE', 'NAME LAST','NAME POSTFIX', 'NICKNAME', 'BIRTHDAY',
336 'ORGANIZATION', 'OFFICE LOCATION', 'DEPARTEMENT',
337 'TITLE', 'ROLE',
338 'HOME ADDRESS 1', 'HOME CITY 1', 'HOME STATE 1', 'HOME ZIP 1', 'HOME COUNTRY 1',
339 'HOME ADDRESS 2', 'HOME CITY 2', 'HOME STATE 2', 'HOME ZIP 2', 'HOME COUNTRY 2',
340 'HOME ADDRESS 3', 'HOME CITY 3', 'HOME STATE 3', 'HOME ZIP 3', 'HOME COUNTRY 3',
341 'WORK ADDRESS 1', 'WORK CITY 1', 'WORK STATE 1', 'WORK ZIP 1', 'WORK COUNTRY 1',
342 'WORK ADDRESS 2', 'WORK CITY 2', 'WORK STATE 2', 'WORK ZIP 2', 'WORK COUNTRY 2',
343 'WORK ADDRESS 3', 'WORK CITY 3', 'WORK STATE 3', 'WORK ZIP 3', 'WORK COUNTRY 3',
344 'HOME PHONE 1', 'HOME PHONE 2', 'HOME PHONE 3',
345 'WORK PHONE 1', 'WORK PHONE 2', 'WORK PHONE 3',
346 'HOME FAX 1', 'HOME FAX 2', 'HOME FAX 3',
347 'WORK FAX 1', 'WORK FAX 2', 'WORK FAX 3',
348 'MOBILE PHONE 1', 'MOBILE PHONE 2', 'MOBILE PHONE 3',
349 'HOME EMAIL 1', 'HOME EMAIL 2', 'HOME EMAIL 3',
350 'WORK EMAIL 1', 'WORK EMAIL 2', 'WORK EMAIL 3',
351 'NOTE', 'URL')
352 self.colums_map={
353 'NAME PREFIX': 'Title',
354 'NAME FIRST': 'First Name',
355 'NAME MIDDLE': 'Middle Name',
356 'NAME LAST': 'Last Name',
357 'NAME POSTFIX': 'Suffix',
358 'NICKNAME': 'Nickname',
359 'BIRTHDAY': 'Birthday',
360 'ORGANIZATION': 'Company',
361 'OFFICE LOCATION': 'Office Location',
362 'DEPARTEMENT': 'Departement',
363 'TITLE': 'Job Title',
364 'ROLE': 'Role',
365 # Home Adress
366 'HOME ADDRESS 1': 'Home Street',
367 'HOME CITY 1': 'Home City',
368 'HOME STATE 1': 'Home State',
369 'HOME ZIP 1': 'Home Postal Code',
370 'HOME COUNTRY 1': 'Home Country/Region',
371 'HOME ADDRESS 2': 'Home Street 2',
372 'HOME CITY 2': 'Home City 2',
373 'HOME STATE 2': 'Home State 2',
374 'HOME ZIP 2': 'Home Postal Code 2',
375 'HOME COUNTRY 2': 'Home Country/Region 2',
376 'HOME ADDRESS 3': 'Home Street 3',
377 'HOME CITY 3': 'Home City 3',
378 'HOME STATE 3': 'Home State 3',
379 'HOME ZIP 3': 'Home Postal Code 3',
380 'HOME COUNTRY 3': 'Home Country/Region 3',
381 # Home Phone
382 'HOME PHONE 1': 'Home Phone',
383 'HOME PHONE 2': 'Home Phone 2',
384 'HOME PHONE 3': 'Home Phone 3',
385 # Home Fax
386 'HOME FAX 1': 'Home Fax',
387 'HOME FAX 2': 'Home Fax 2',
388 'HOME FAX 3': 'Home Fax 3',
389 # Home Email
390 'HOME EMAIL 1': 'Home E-mail',
391 'HOME EMAIL 2': 'Home E-mail 2',
392 'HOME EMAIL 3': 'Home E-mail 3',
393 # Business Adress
394 'WORK ADDRESS 1': 'Business Street',
395 'WORK CITY 1': 'Business City',
396 'WORK STATE 1': 'Business State',
397 'WORK ZIP 1': 'Business Postal Code',
398 'WORK COUNTRY 1': 'Business Country/Region',
399 'WORK ADDRESS 2': 'Business Street 2',
400 'WORK CITY 2': 'Business City 2',
401 'WORK STATE 2': 'Business State 2',
402 'WORK ZIP 2': 'Business Postal Code 2',
403 'WORK COUNTRY 2': 'Business Country/Region 2',
404 'WORK ADDRESS 3': 'Business Street 3',
405 'WORK CITY 3': 'Business City 3',
406 'WORK STATE 3': 'Business State 3',
407 'WORK ZIP 3': 'Business Postal Code 3',
408 'WORK COUNTRY 3': 'Business Country/Region 3',
409 # Business Phone
410 'WORK PHONE 1': 'Business Phone',
411 'WORK PHONE 2': 'Business Phone 2',
412 'WORK PHONE 3': 'Business Phone 3',
413 # Business Fax
414 'WORK FAX 1': 'Business Fax',
415 'WORK FAX 2': 'Business Fax 2',
416 'WORK FAX 3': 'Business Fax 3',
417 # Business Email
418 'WORK EMAIL 1': 'E-mail Address',
419 'WORK EMAIL 2': 'E-mail 2 Address',
420 'WORK EMAIL 3': 'E-mail 3 Address',
421 # Mobile Phone
422 'MOBILE PHONE 1': 'Mobile Phone',
423 'MOBILE PHONE 2': 'Car Phone',
424 'MOBILE PHONE 3': 'Other Phone',
425 # Other
426 'NOTE': 'Notes',
427 'URL': 'Personal Web Page'}
428
429 self.data = self.__resetRow()
430 for k in self.columns:
431 self.__output(self.colums_map[k])
432
433 self.output += "\r\n"
434 self.__parseFile()
435
436
437
438 def main():
439 usa = "usage: python ./%prog -p<pathname> -o<filename> -d<option> -q -v"
440 ver = "%prog v0.3.000 2009-11-25 - by Petar Strinic http://petarstrinic.com contributions of code snippets by Dave Dartt"
441 des = "This program was designed to take the information within a vcard and export it's contents to a csv file for easy import into other address book clients."
442 parser = OptionParser(option_class=MyOption, usage=usa, version=ver, description=des)
443 parser.add_option("-d", "--delim", action="store", dest="delimiter", default="\t", help="Delimiter to use: comma, semicolon, tab (default is tab)")
444 parser.add_option("-q", "--quote", action="store_true", dest="quote", default=False, help="Double quote the output strings (default is off)")
445 parser.add_option("-v", "--verbose", action="store_false", dest="verbose", default=True, help="Show processing information (default is on)")
446 parser.add_option("--trace", action="store_true", dest="trace", default=False, help="Displays a ton of debugging information.")
447 (options, args) = parser.parse_args()
448
449 delimiter = options.delimiter
450 if options.delimiter == "comma":
451 delimiter = ","
452 elif options.delimiter == "semicolon":
453 delimiter = ";"
454
455 cnx =mysql.connector.connect(**config)
456 cursor = cnx.cursor()
457 query = ("SELECT c_folder_id, c_path, c_path1, c_path2, c_path3, c_path4, c_foldername,SUBSTRING_INDEX(c_location, '/', -1) FROM sogo_folder_info WHERE c_folder_type='Contact' and ( c_path2='pierre-richard.thomas' or c_path2='jean-christophe.andre' )") #
458 cursor.execute(query)
459 groupe_contacts = cursor.fetchall()
460 for groupe_contact in groupe_contacts:
461 print(groupe_contact[7])
462 query = ("SELECT c_name, c_content FROM %s WHERE c_deleted IS NULL" % groupe_contact[7])
463
464 try:
465 cursor.execute(query)
466 contacts = cursor.fetchall()
467
468 except mysql.connector.Error as err:
469 print("{}".format(err))
470 continue
471
472 dir_name = '.'+groupe_contact[1]
473 dir_name = dir_name.replace('/Users','/SoGo/vcards')
474 if os.path.exists(dir_name):
475 shutil.rmtree(dir_name)
476
477 os.makedirs(dir_name)
478
479 for contact in contacts:
480
481 if contact[0][-4:]!='.vcf':
482 file = open((dir_name+'/'+groupe_contact[3]+'-'+contact[0]+'.vcf'), mode='w', encoding='utf-8')
483 else:
484 file = open((dir_name+'/'+groupe_contact[3]+'-'+contact[0]), mode='w', encoding='utf-8')
485
486 file.write(contact[1])
487 file.close()
488
489 contact_csv = './SoGo/'+groupe_contact[3]+'@auf.org '+groupe_contact[4]+' '+groupe_contact[6]+'.csv'
490 print(contact_csv)
491 print(dir_name)
492 VcfToCsvConverter(dir_name, contact_csv, delimiter, options.quote, options.trace, options.verbose)
493
494
495 cursor.close()
496 cnx.close()
497 sys.exit(0)
498
499
500 if __name__ == "__main__":
501 main()
502