w.c.s : nouvelle version des outils de gestion hors-ligne
[progfou.git] / wcs / wcs-extract.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import os
5 import os.path
6 import sys
7 from time import gmtime, strftime
8 import simplejson as json
9
10 if len(sys.argv) != 4:
11 print sys.stderr, "Usage : %s <dossier-destination> <site> <formulaire>" % sys.argv[0]
12 sys.exit(1)
13 VHOST = sys.argv[2]
14 FORM_NAME = sys.argv[3]
15 OUTPUT_DIRECTORY = os.path.join(sys.argv[1], VHOST, FORM_NAME)
16
17 os.umask(0022)
18 # création du dossier d'extraction, au besoin
19 if not os.path.isdir(OUTPUT_DIRECTORY):
20 os.makedirs(OUTPUT_DIRECTORY, 0755)
21
22 TIME_FORMAT = "%Y-%m-%d"
23 TIME_FORMAT_LONG = "%Y-%m-%d %H:%M:%S"
24
25 # fonction de nettoyage des noms pour ne garder que de l'alpha-numérique
26 def cleanup(s, replacement_char='-'):
27 avec_accent = u'çÇáàâÁÀÂéèêëÉÈÊËíìîïÍÌÎÏóòôöÓÒÔÖúùûüÚÙÛÜýỳyÿÝỲYŸ'
28 sans_accent = u'cCaaaAAAeeeeEEEEiiiiIIIIooooOOOOuuuuUUUUyyyyYYYY'
29 if type(s) is not unicode:
30 s = unicode(s, 'utf-8')
31 u = False
32 r = ''
33 for c in s:
34 index = avec_accent.find(c)
35 if index >= 0:
36 r += sans_accent[index]
37 elif ('a' <= c.lower() <= 'z') or ('0' <= c <= '9'):
38 r += c
39 elif len(r) > 0 and r[-1] != replacement_char:
40 r += replacement_char
41 else: # r == '' or r[-1] == replacement_char
42 pass
43 r = r.strip(replacement_char)
44 if not u:
45 r = r.encode('utf-8')
46 return r
47
48 from wcs import publisher
49 from wcs.formdef import FormDef
50 from wcs.fields import TitleField, CommentField, TextField, \
51 StringField, ItemField, EmailField, \
52 DateField, FileField, BoolField
53
54 pub = publisher.WcsPublisher.create_publisher()
55 pub.app_dir = os.path.join(pub.app_dir, VHOST)
56
57 formdef = FormDef.get_by_urlname(FORM_NAME)
58
59 #
60 # nommage des champs, de façon unique
61 #
62
63 f = open(os.path.join(OUTPUT_DIRECTORY, 'field-names.txt'), 'w')
64
65 field_names = {}
66 field_names_duplicates = {}
67 for field in formdef.fields:
68 if isinstance(field, TitleField) or isinstance(field, CommentField):
69 continue
70 name = cleanup(field.label,'_').lower()
71 if name in field_names.values(): # duplicat
72 field_names_duplicates[name] = field_names_duplicates.get(name, 1) + 1
73 name = '%s_%d' % (name, field_names_duplicates[name])
74 field_names.update({field.id: name})
75 print >>f, "%s:%s:%s" % (field.id, field_names[field.id], field.label)
76
77 f.close()
78
79 f = open('%s/field-names.json' % OUTPUT_DIRECTORY, 'wb')
80 f.write(json.dumps(field_names, ensure_ascii=False))
81 f.close()
82
83 #
84 # extraction des données
85 #
86
87 for object in formdef.data_class().select():
88 result = {
89 'num_dossier': object.id,
90 'wcs_status': object.status,
91 'wcs_workflow_status': object.get_workflow_status().name,
92 'wcs_user_email': object.user.email,
93 'wcs_user_display_name': object.user.display_name,
94 #'wcs_last_modified': strftime(TIME_FORMAT_LONG, gmtime(object.last_modified())),
95 }
96 qfiles = { }
97 for field in formdef.fields:
98 field_id = str(field.id)
99 if not field_id in object.data:
100 continue
101 if isinstance(field, TitleField) or isinstance(field, CommentField):
102 continue
103 field_name = field_names[field_id]
104 data = object.data.get(field_id)
105 if isinstance(field, StringField) or isinstance(field, TextField) \
106 or isinstance(field, EmailField) or isinstance(field, ItemField):
107 result[field_name] = data
108 elif isinstance(field, BoolField):
109 result[field_name] = (data == 'True')
110 elif isinstance(field, DateField):
111 result[field_name] = strftime(TIME_FORMAT, data)
112 elif isinstance(field, FileField):
113 extension = data.orig_filename.rpartition('.')[2].lower()
114 result[field_name] = "%s.%s" % (field_name, extension)
115 qfiles[field_name] = data.qfilename
116 else:
117 print "WARNING: unknown field type '%s' for '%s'" % \
118 (field.__class__.__name__, field.label)
119 raise RuntimeError
120
121 num_dossier = result['num_dossier']
122 nom = cleanup('-'.join(result['nom'].split()).upper())
123 prenom = cleanup('-'.join(result['prenom'].split()).upper())
124 adel = result['adresse_electronique'].replace('@','-').lower()
125
126 filename = "%04d-%s-%s-%s" % (num_dossier, nom, prenom, adel)
127
128 print "Dossier '%s'..." % filename,
129
130 for f in qfiles:
131 result[f] = '%s_%s' % (filename, result[f])
132 src = '%s/uploads/%s' % (pub.app_dir, qfiles[f])
133 dst = '%s/%s' % (OUTPUT_DIRECTORY, result[f])
134 if not os.path.exists(dst) or os.path.getmtime(src) > os.path.getmtime(dst):
135 os.spawnl(os.P_WAIT, '/bin/cp', '-af', src, dst)
136 os.chmod(dst, 0644)
137
138 f = open('%s/%s.json' % (OUTPUT_DIRECTORY, filename), 'wb')
139 f.write(json.dumps(result, ensure_ascii=False))
140 f.close()
141
142 print "OK."
143
144 f = open('%s/last-run.txt' % OUTPUT_DIRECTORY, 'w')
145 f.write('%s GMT' % strftime(TIME_FORMAT_LONG, gmtime()))
146 f.close()