e58f25f45554c164d6777de204aa1a676b9b80d6
[ngomwill.git] / convert_vcf.py
1 #!/usr/bin/python2.5
2 # coding: utf-8
3 """
4 Initial project
5
6 VcfToCsvConverter v0.3 - Converts VCF/VCARD files into CSV
7 Copyright (C) 2009 Petar Strinic (http://petarstrinic.com)
8 Contributor -- Dave Dartt
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>.
22
23 """
24
25 import os
26 import re
27 import sys
28 import shutil
29 import glob
30 import codecs
31 import mysql.connector
32 from optparse import OptionParser, Option
33
34 config = {
35 'user': 'root',
36 'password': '',
37 'host': '127.0.0.1',
38 'database': 'auf',
39 'use_unicode' : 'True',
40 'charset' : 'utf8'
41 }
42
43 class MyOption(Option):
44 ACTIONS = Option.ACTIONS + ("extend",)
45 STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
46 TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)
47 ALWAYS_TYPED_ACTIONS = Option.ALWAYS_TYPED_ACTIONS + ("extend",)
48 def take_action(self, action, dest, opt, value, values, parser):
49 if action == "extend":
50 lvalue = value.split(",")
51 values.ensure_value(dest, []).extend(lvalue)
52 else:
53 Option.take_action(self, action, dest, opt, value, values, parser)
54
55 class VcfToCsvConverter:
56 def __outputQuote(self):
57 if self.quote == True:
58 self.output += '"'
59
60 def __output(self, text):
61 self.__outputQuote();
62 self.output += text
63 self.__outputQuote();
64 self.output += self.delimiter
65
66 def __trace(self, text):
67 if self.trace == True:
68 print (text)
69
70 def __resetRow(self):
71 self.addressCount = { 'HOME' : 1, 'WORK' : 1 }
72 self.telephoneCount = { 'HOME PHONE' : 1, 'WORK PHONE' : 1, 'MOBILE PHONE' : 1, 'HOME FAX' : 1, 'WORK FAX' : 1 }
73 self.emailCount = { 'HOME' : 1, 'WORK' : 1 }
74 array = {}
75 for k in self.columns:
76 array[ k ] = '';
77 return array
78
79 def __endLine(self):
80 for k in self.columns:
81 try:
82 self.__output(self.data[ k ])
83 except KeyError:
84 self.output += self.delimiter
85 self.output += "\r\n"
86 self.data = self.__resetRow()
87
88 def __getfilenames(self):
89 try:
90 if os.path.isdir(self.inputPath):
91 self.inputFileArray = glob.glob(os.path.join(self.inputPath, '*.vcf'))
92 else:
93 print ("Invalid path please try again")
94 sys.exit(2)
95 except IOError:
96 print ("Directory is empty or does not contain any vcard format files.")
97 sys.exit(2)
98
99 def __parseFile(self):
100 self.__getfilenames()
101 outFile = codecs.open(self.outputFile, mode='w', encoding='utf-8')
102 outFile.write(u'\ufeff')
103
104 for NewFileName in self.inputFileArray:
105 try:
106 if self.verbose:
107 print ("Processing .... %s\n" % NewFileName)
108
109 inFile = codecs.open(NewFileName, mode='r', encoding='utf-8')
110 theLine = inFile.readline()
111 for theLine in inFile:
112 self.__parseLine(theLine)
113
114 inFile.close()
115 except IOError:
116 print ("error opening file during read operation via path: %s\n" % NewFileName)
117 sys.exit(2)
118
119 outFile.write(self.output)
120 outFile.close()
121
122 def __parseLine(self, theLine):
123 theLine = theLine.strip()
124 if len(theLine) < 1:
125 pass
126 elif re.match('^BEGIN:VCARD', theLine):
127 pass
128 elif re.match('^END:VCARD', theLine):
129 self.__endLine()
130 else:
131 self.__processLine(theLine.split(":"))
132
133 def __processLine(self, pieces):
134
135 pre = pieces[0].split(";")
136 if re.match('item.*', pre[0].split(".")[0], re.I) != None:
137 try:
138 pre[0] = pre[0].split(".")[1]
139 except IndexError:
140 self.__trace("item pre0 split: %s " % pre[0].split("."));
141
142 if pre[0] == 'VERSION':
143 pass
144 elif pre[0] == 'N':
145 self.__processName(pre, pieces[1])
146 elif pre[0] == 'FN':
147 pass
148 elif pre[0] == 'NICKNAME':
149 self.__processSingleValue('NICKNAME', pre, pieces[1])
150 elif pre[0] == 'TITLE':
151 self.__processSingleValue('TITLE', pre, pieces[1])
152 elif pre[0] == 'BDAY':
153 self.__processSingleValue('BIRTHDAY', pre, pieces[1])
154 elif pre[0] == 'ORG':
155 self.__processOrganization(pre, pieces[1])
156 elif pre[0] == 'ROLE':
157 self.__processSingleValue('ROLE', pre, pieces[1])
158 elif pre[0] == 'GEO':
159 self.__processSingleValue('GEOCODE', pre, pieces[1])
160 elif pre[0] == 'MAILER':
161 pass
162 elif pre[0] == 'TZ':
163 pass
164 elif pre[0] == 'ADR':
165 self.__processAddress(pre, pieces[1])
166 elif pre[0] == 'LOGO':
167 pass
168 elif pre[0] == 'PHOTO':
169 pass
170 elif pre[0] == 'TEL':
171 self.__processTelephone(pre, pieces[1:])
172 elif pre[0] == 'EMAIL':
173 self.__processEmail(pre, pieces[1:])
174 elif pre[0] == 'AGENT':
175 pass
176 elif pre[0] == 'NOTE':
177 self.__processSingleValue('NOTE', pre, pieces[1])
178 elif pre[0] == 'REV':
179 pass
180 elif pre[0] == 'URL':
181 self.__processSingleValue('URL', pre, ":".join(pieces[1:]))
182 elif pre[0] == 'UID':
183 pass
184 elif pre[0] == 'X-AIM':
185 pass
186 elif pre[0] == 'X-ICQ':
187 pass
188 elif pre[0] == 'X-JABBER':
189 pass
190 elif pre[0] == 'X-MSN':
191 pass
192 elif pre[0] == 'X-YAHOO':
193 pass
194 elif pre[0] == 'X-SKYPE-USERNAME':
195 pass
196 elif pre[0] == 'X-GADUGADU':
197 pass
198 elif pre[0] == 'X-GROUPWISE':
199 pass
200
201 def __processEmail(self, pre, p):
202 self.__trace("__processEmail: %s %s" % (pre, p))
203 hwm = "WORK"
204 #if (re.search('work',(",").join(pre[1:]), re.I) != None) :
205 # hwm = "WORK"
206 self.__trace("__email type: %s" % hwm)
207 if self.emailCount[hwm] <= self.maxEmails:
208 self.data["%s EMAIL %s" % (hwm, self.emailCount[hwm])] = p[0]
209 self.__trace("__email %s %s: %s" % (hwm, self.emailCount[hwm], p[0]))
210
211 self.emailCount[hwm] += 1
212 else:
213 self.data['WARNING'] += ("Maximum number of %s emails reached, discarded: %s. " % (hwm, p[0]))
214
215 def __processTelephone(self, pre, p):
216 self.__trace("__processTelephone: %s %s" % (pre, p))
217 telephoneType = "PHONE"
218 hwm = "HOME"
219 if re.search('work',(",").join(pre[1:]), re.I) != None:
220 hwm = "WORK"
221 elif re.search('cell',(",").join(pre[1:]), re.I) != None:
222 hwm = "MOBILE"
223
224 if re.search('fax',(",").join(pre[1:]), re.I) != None:
225 telephoneType = "FAX"
226
227 self.__trace("_telephone number = %s" % p[0])
228 self.__trace("_telephone type: %s" % "%s %s %s" % (hwm, telephoneType, self.telephoneCount["%s %s" % (hwm, telephoneType)]))
229
230 if self.telephoneCount[("%s PHONE" % hwm)] <= self.maxTelephones:
231 self.data["%s %s %s" % (hwm, telephoneType, self.telephoneCount["%s %s" % (hwm, telephoneType)])] = p[0]
232 self.telephoneCount["%s %s" % (hwm, telephoneType)] += 1
233 else:
234 self.data['WARNING'] += ("Maximum number of %s telephones reached, discarded: %s. " % (hwm, p[0]))
235
236 def __processAddress(self, pre, p):
237 self.__trace("__processAddress: %s %s" % (pre, p))
238 self.__trace("_ADDRESS: %s" % (p))
239 try:
240 (a, b, address, city, state, zip, country ) = p.split(";")
241 except ValueError:
242 (a, b, address, city, state, zip ) = p.split(";")
243 country = '';
244
245 addressType = "HOME"
246 try:
247 (a,addressTypes) = pre[1].split("=");
248 self.__trace("_addressTypes: %s " % addressTypes)
249 if "work" in (addressTypes.lower()).split(","):
250 self.__trace("_work address");
251 addressType = "WORK"
252
253 except ValueError:
254 self.__trace("_home address")
255
256 self.__trace(self.addressCount);
257 self.__trace(self.addressCount[addressType]);
258
259 if self.addressCount[addressType] <= self.maxAddresses:
260 self.data["%s ADDRESS %s" % (addressType, self.addressCount[addressType])] = address
261 self.data["%s CITY %s" % (addressType, self.addressCount[addressType])] = city
262 self.data["%s STATE %s" % (addressType, self.addressCount[addressType])] = state
263 self.data["%s ZIP %s" % (addressType, self.addressCount[addressType])] = zip
264 self.data["%s COUNTRY %s" % (addressType, self.addressCount[addressType])] = country
265 self.addressCount[addressType] += 1
266 else:
267 self.data['WARNING'] += ("Maximum number of %s addresses reached, discarded: %s. " % (addressType, p))
268
269 def __processSingleValue(self, valueType, pre, p):
270 self.__trace("__processSingleValue: %s %s %s" % (valueType, pre, p))
271 self.__trace("_%s: %s" % (valueType,p))
272 if valueType == 'FORMATTED NAME':
273 p = p.replace('\,', ' ')
274 self.data[valueType] = p
275 if valueType == 'ORGANIZATION':
276 self.data[valueType] = p.replace(';', '')
277 else:
278 self.data[valueType] = p
279
280 def __processName(self, pre, p):
281 self.__trace("__processName: %s %s" % (pre, p))
282 p = p.replace('\,', ';')
283 part = len(p.split(";"))
284 ln=fn=mi=pr=po= ''
285 if part==1:
286 ln = p.split(";")
287 elif part==2:
288 ( ln, fn ) = p.split(";")
289 elif part==3:
290 ( ln, fn, mi) = p.split(";")
291 elif part==4:
292 ( ln, fn, mi, pr) = p.split(";")
293 elif part>=5:
294 ( ln, fn, mi, pr, po ) = p.split(";")
295
296 self.data['NAME PREFIX'] = pr
297 self.data['NAME FIRST'] = fn
298 self.data['NAME MIDDLE'] = mi
299 self.data['NAME LAST'] = ln
300 self.data['NAME POSTFIX'] = po
301
302 def __processOrganization(self, pre, p):
303 part = len(p.split(";"))
304 company=office=departement= ''
305 if part==1:
306 company = p.split(";")
307 elif part==2:
308 ( company, office ) = p.split(";")
309 elif part>=3:
310 ( company, office, departement) = p.split(";", 2)
311
312 self.data['ORGANIZATION'] = company
313 self.data['OFFICE LOCATION'] = office
314 self.data['DEPARTEMENT'] = departement
315
316 def __init__(self, inputFilePath, outputFileName, delimiter, quote, trace, verbose):
317 self.trace = trace
318 self.addressCount = { 'HOME' : 1, 'WORK' : 1 }
319 self.telephoneCount = { 'HOME PHONE' : 1, 'WORK PHONE' : 1, 'MOBILE PHONE' : 1, 'HOME FAX' : 1, 'WORK FAX' : 1 }
320 self.emailCount = { 'HOME' : 1, 'WORK' : 1 }
321 self.data = {}
322 self.verbose = verbose
323 self.quote = quote
324 self.delimiter = delimiter
325 self.output = ''
326 self.inputPath = inputFilePath
327 self.inputFileArray = None
328 self.outputFile = outputFileName
329 self.maxAddresses = 3
330 self.maxTelephones = 3
331 self.maxEmails = 3
332 self.columns = (
333 'NAME PREFIX', 'NAME FIRST', 'NAME MIDDLE', 'NAME LAST','NAME POSTFIX', 'NICKNAME', 'BIRTHDAY',
334 'ORGANIZATION', 'OFFICE LOCATION', 'DEPARTEMENT',
335 'TITLE', 'ROLE',
336 'HOME ADDRESS 1', 'HOME CITY 1', 'HOME STATE 1', 'HOME ZIP 1', 'HOME COUNTRY 1',
337 'HOME ADDRESS 2', 'HOME CITY 2', 'HOME STATE 2', 'HOME ZIP 2', 'HOME COUNTRY 2',
338 'HOME ADDRESS 3', 'HOME CITY 3', 'HOME STATE 3', 'HOME ZIP 3', 'HOME COUNTRY 3',
339 'WORK ADDRESS 1', 'WORK CITY 1', 'WORK STATE 1', 'WORK ZIP 1', 'WORK COUNTRY 1',
340 'WORK ADDRESS 2', 'WORK CITY 2', 'WORK STATE 2', 'WORK ZIP 2', 'WORK COUNTRY 2',
341 'WORK ADDRESS 3', 'WORK CITY 3', 'WORK STATE 3', 'WORK ZIP 3', 'WORK COUNTRY 3',
342 'HOME PHONE 1', 'HOME PHONE 2', 'HOME PHONE 3',
343 'WORK PHONE 1', 'WORK PHONE 2', 'WORK PHONE 3',
344 'HOME FAX 1', 'HOME FAX 2', 'HOME FAX 3',
345 'WORK FAX 1', 'WORK FAX 2', 'WORK FAX 3',
346 'MOBILE PHONE 1', 'MOBILE PHONE 2', 'MOBILE PHONE 3',
347 'HOME EMAIL 1', 'HOME EMAIL 2', 'HOME EMAIL 3',
348 'WORK EMAIL 1', 'WORK EMAIL 2', 'WORK EMAIL 3',
349 'NOTE', 'URL')
350 self.colums_map={
351 'NAME PREFIX': 'Title',
352 'NAME FIRST': 'First Name',
353 'NAME MIDDLE': 'Middle Name',
354 'NAME LAST': 'Last Name',
355 'NAME POSTFIX': 'Suffix',
356 'NICKNAME': 'Nickname',
357 'BIRTHDAY': 'Birthday',
358 'ORGANIZATION': 'Company',
359 'OFFICE LOCATION': 'Office Location',
360 'DEPARTEMENT': 'Departement',
361 'TITLE': 'Job Title',
362 'ROLE': 'Role',
363 # Home Adress
364 'HOME ADDRESS 1': 'Home Street',
365 'HOME CITY 1': 'Home City',
366 'HOME STATE 1': 'Home State',
367 'HOME ZIP 1': 'Home Postal Code',
368 'HOME COUNTRY 1': 'Home Country/Region',
369 'HOME ADDRESS 2': 'Home Street 2',
370 'HOME CITY 2': 'Home City 2',
371 'HOME STATE 2': 'Home State 2',
372 'HOME ZIP 2': 'Home Postal Code 2',
373 'HOME COUNTRY 2': 'Home Country/Region 2',
374 'HOME ADDRESS 3': 'Home Street 3',
375 'HOME CITY 3': 'Home City 3',
376 'HOME STATE 3': 'Home State 3',
377 'HOME ZIP 3': 'Home Postal Code 3',
378 'HOME COUNTRY 3': 'Home Country/Region 3',
379 # Home Phone
380 'HOME PHONE 1': 'Home Phone',
381 'HOME PHONE 2': 'Home Phone 2',
382 'HOME PHONE 3': 'Home Phone 3',
383 # Home Fax
384 'HOME FAX 1': 'Home Fax',
385 'HOME FAX 2': 'Home Fax 2',
386 'HOME FAX 3': 'Home Fax 3',
387 # Home Email
388 'HOME EMAIL 1': 'Home E-mail',
389 'HOME EMAIL 2': 'Home E-mail 2',
390 'HOME EMAIL 3': 'Home E-mail 3',
391 # Business Adress
392 'WORK ADDRESS 1': 'Business Street',
393 'WORK CITY 1': 'Business City',
394 'WORK STATE 1': 'Business State',
395 'WORK ZIP 1': 'Business Postal Code',
396 'WORK COUNTRY 1': 'Business Country/Region',
397 'WORK ADDRESS 2': 'Business Street 2',
398 'WORK CITY 2': 'Business City 2',
399 'WORK STATE 2': 'Business State 2',
400 'WORK ZIP 2': 'Business Postal Code 2',
401 'WORK COUNTRY 2': 'Business Country/Region 2',
402 'WORK ADDRESS 3': 'Business Street 3',
403 'WORK CITY 3': 'Business City 3',
404 'WORK STATE 3': 'Business State 3',
405 'WORK ZIP 3': 'Business Postal Code 3',
406 'WORK COUNTRY 3': 'Business Country/Region 3',
407 # Business Phone
408 'WORK PHONE 1': 'Business Phone',
409 'WORK PHONE 2': 'Business Phone 2',
410 'WORK PHONE 3': 'Business Phone 3',
411 # Business Fax
412 'WORK FAX 1': 'Business Fax',
413 'WORK FAX 2': 'Business Fax 2',
414 'WORK FAX 3': 'Business Fax 3',
415 # Business Email
416 'WORK EMAIL 1': 'E-mail Address',
417 'WORK EMAIL 2': 'E-mail 2 Address',
418 'WORK EMAIL 3': 'E-mail 3 Address',
419 # Mobile Phone
420 'MOBILE PHONE 1': 'Mobile Phone',
421 'MOBILE PHONE 2': 'Car Phone',
422 'MOBILE PHONE 3': 'Other Phone',
423 # Other
424 'NOTE': 'Notes',
425 'URL': 'Personal Web Page'}
426
427 self.data = self.__resetRow()
428 for k in self.columns:
429 self.__output(self.colums_map[k])
430
431 self.output += "\r\n"
432 self.__parseFile()
433
434
435
436 def main():
437 usa = "usage: python ./%prog -p<pathname> -o<filename> -d<option> -q -v"
438 ver = "%prog v0.3.000 2009-11-25 - by Petar Strinic http://petarstrinic.com contributions of code snippets by Dave Dartt"
439 des = "This program was designed to take the information within a vcard and export it's contents to a csv file for easy import into other address book clients."
440 parser = OptionParser(option_class=MyOption, usage=usa, version=ver, description=des)
441 parser.add_option("-d", "--delim", action="store", dest="delimiter", default="\t", help="Delimiter to use: comma, semicolon, tab (default is tab)")
442 parser.add_option("-q", "--quote", action="store_true", dest="quote", default=False, help="Double quote the output strings (default is off)")
443 parser.add_option("-v", "--verbose", action="store_false", dest="verbose", default=True, help="Show processing information (default is on)")
444 parser.add_option("--trace", action="store_true", dest="trace", default=False, help="Displays a ton of debugging information.")
445 (options, args) = parser.parse_args()
446
447 delimiter = options.delimiter
448 if options.delimiter == "comma":
449 delimiter = ","
450 elif options.delimiter == "semicolon":
451 delimiter = ";"
452
453 cnx =mysql.connector.connect(**config)
454 cursor = cnx.cursor()
455 query = ("SELECT c_folder_id, c_path, c_path1, c_path2, c_path3, c_path4, c_foldername,SUBSTRING_INDEX(c_location, '/', -1) FROM sogo_folder_info WHERE c_folder_type='Contact' and ( c_path2='pierre-richard.thomas' or c_path2='jean-christophe.andre' )") #
456 cursor.execute(query)
457 groupe_contacts = cursor.fetchall()
458 for groupe_contact in groupe_contacts:
459 print(groupe_contact[7])
460 query = ("SELECT c_name, c_content FROM %s WHERE c_deleted IS NULL" % groupe_contact[7])
461
462 try:
463 cursor.execute(query)
464 contacts = cursor.fetchall()
465
466 except mysql.connector.Error as err:
467 print("{}".format(err))
468 continue
469
470 dir_name = '.'+groupe_contact[1]
471 dir_name = dir_name.replace('/Users','/SoGo/vcards')
472 if os.path.exists(dir_name):
473 shutil.rmtree(dir_name)
474
475 os.makedirs(dir_name)
476
477 for contact in contacts:
478
479 if contact[0][-4:]!='.vcf':
480 file = open((dir_name+'/'+groupe_contact[3]+'-'+contact[0]+'.vcf'), mode='w', encoding='utf-8')
481 else:
482 file = open((dir_name+'/'+groupe_contact[3]+'-'+contact[0]), mode='w', encoding='utf-8')
483
484 file.write(contact[1])
485 file.close()
486
487 contact_csv = './SoGo/'+groupe_contact[3]+'@auf.org '+groupe_contact[4]+' '+groupe_contact[6]+'.csv'
488 print(contact_csv)
489 print(dir_name)
490 VcfToCsvConverter(dir_name, contact_csv, delimiter, options.quote, options.trace, options.verbose)
491
492
493 cursor.close()
494 cnx.close()
495 sys.exit(0)
496
497
498 if __name__ == "__main__":
499 main()
500