baac67be0fdc5088f22bd7af0a5110e317476463
[progfou.git] / wcs / wcs-extract.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import os
5 import os.path
6 from time import gmtime, strftime
7 import simplejson as json
8
9 from wcs import publisher
10 from wcs.formdef import FormDef
11 from wcs.fields import TitleField, CommentField, TextField, \
12 StringField, ItemField, EmailField, \
13 DateField, FileField, BoolField
14
15 COPY_COMMAND = '/bin/cp'
16 COPY_ARGS = '-af'
17
18
19 def reduce_to_alnum(s, replacement_char='-'):
20 """réduction d'une chaîne de caractères à de l'alpha-numérique"""
21
22 avec_accent = u'çÇáàâÁÀÂéèêëÉÈÊËíìîïÍÌÎÏóòôöÓÒÔÖúùûüÚÙÛÜýỳyÿÝỲYŸ'
23 sans_accent = u'cCaaaAAAeeeeEEEEiiiiIIIIooooOOOOuuuuUUUUyyyyYYYY'
24 if type(s) is not unicode:
25 s = unicode(s, 'utf-8')
26 u = False
27 r = ''
28 for c in s:
29 index = avec_accent.find(c)
30 if index >= 0:
31 r += sans_accent[index]
32 elif ('a' <= c.lower() <= 'z') or ('0' <= c <= '9'):
33 r += c
34 elif len(r) > 0 and r[-1] != replacement_char:
35 r += replacement_char
36 else: # r == '' or r[-1] == replacement_char
37 pass
38 r = r.strip(replacement_char)
39 if not u:
40 r = r.encode('utf-8')
41 return r
42
43
44 def extract_fields(formdef, output_directory):
45 """nommage des champs de façon unique"""
46 # TODO: devrait retourner un résultat, qui serait alors sauvé en dehors
47
48 f = open(os.path.join(output_directory, 'field-names.txt'), 'w')
49
50 field_names = {}
51 field_names_duplicates = {}
52 for field in formdef.fields:
53 if isinstance(field, TitleField) or isinstance(field, CommentField):
54 continue
55 name = reduce_to_alnum(field.label,'_').lower()
56 if name in field_names.values(): # duplicat
57 field_names_duplicates[name] = field_names_duplicates.get(name, 1) + 1
58 name = '%s_%d' % (name, field_names_duplicates[name])
59 field_names.update({field.id: name})
60 print >>f, "%s:%s:%s" % (field.id, field_names[field.id], field.label)
61
62 f.close()
63
64 f = open(os.path.join(output_directory, 'field-names.json'), 'wb')
65 f.write(json.dumps(field_names, ensure_ascii=False))
66 f.close()
67
68
69 def extract_data(formdef, output_directory):
70 """extraction des données du formulaire"""
71 # TODO: devrait retourner un résultat, qui serait alors sauvé en dehors
72
73 for object in formdef.data_class().select():
74 result = {
75 'num_dossier': object.id,
76 'wcs_status': object.status,
77 'wcs_workflow_status': object.get_workflow_status().name,
78 'wcs_user_email': object.user.email,
79 'wcs_user_display_name': object.user.display_name,
80 #'wcs_last_modified': strftime('%Y-%m-%d %H:%M:%S', gmtime(object.last_modified())),
81 }
82 qfiles = { }
83 for field in formdef.fields:
84 field_id = str(field.id)
85 if not field_id in object.data:
86 continue
87 if isinstance(field, TitleField) or isinstance(field, CommentField):
88 continue
89 field_name = field_names[field_id]
90 data = object.data.get(field_id)
91 if isinstance(field, StringField) or isinstance(field, TextField) \
92 or isinstance(field, EmailField) or isinstance(field, ItemField):
93 result[field_name] = data
94 elif isinstance(field, BoolField):
95 result[field_name] = (data == 'True')
96 elif isinstance(field, DateField):
97 result[field_name] = strftime('%Y-%m-%d', data)
98 elif isinstance(field, FileField):
99 extension = data.orig_filename.rpartition('.')[2].lower()
100 result[field_name] = "%s.%s" % (field_name, extension)
101 qfiles[field_name] = data.qfilename
102 else:
103 print "WARNING: unknown field type '%s' for '%s'" % \
104 (field.__class__.__name__, field.label)
105 raise RuntimeError
106
107 num_dossier = result['num_dossier']
108 nom = reduce_to_alnum(result['nom']).upper()
109 prenom = reduce_to_alnum(result['prenom']).upper()
110 adel = result['adresse_electronique'].replace('@','-').lower()
111
112 filename = "%04d-%s-%s-%s" % (num_dossier, nom, prenom, adel)
113
114 print "Dossier '%s'..." % filename,
115
116 # copie des fichiers joints
117 for f in qfiles:
118 result[f] = filename + '_' + result[f]
119 src = os.path.join(pub.app_dir, 'uploads', qfiles[f])
120 dst = os.path.join(output_directory, result[f])
121 if not os.path.exists(dst) or os.path.getmtime(src) > os.path.getmtime(dst):
122 os.spawnl(os.P_WAIT, COPY_COMMAND, COPY_ARGS, src, dst)
123 os.chmod(dst, 0644)
124
125 # génération du fichier JSON
126 f = open(os.path.join(output_directory, filename + '.json'), 'wb')
127 f.write(json.dumps(result, ensure_ascii=False))
128 f.close()
129
130 print "OK."
131
132
133 if __name__ == '__main__':
134 import sys
135
136 if len(sys.argv) != 4:
137 print >>sys.stderr, "Usage : %s <dossier-destination> <site> <formulaire>" % sys.argv[0]
138 sys.exit(1)
139
140 VHOST = sys.argv[2]
141 FORM_NAME = sys.argv[3]
142 OUTPUT_DIRECTORY = os.path.join(sys.argv[1], VHOST, FORM_NAME)
143
144 os.umask(0022)
145 # création du dossier d'extraction, au besoin
146 if not os.path.isdir(OUTPUT_DIRECTORY):
147 os.makedirs(OUTPUT_DIRECTORY, 0755)
148
149 pub = publisher.WcsPublisher.create_publisher()
150 pub.app_dir = os.path.join(pub.app_dir, VHOST)
151
152 formdef = FormDef.get_by_urlname(FORM_NAME)
153
154 extract_fields(formdef, OUTPUT_DIRECTORY)
155
156 extract_data(formdef, OUTPUT_DIRECTORY)
157
158 f = open(os.path.join(OUTPUT_DIRECTORY, 'last-run.txt'), 'w')
159 f.write(strftime('%Y-%m-%d %H:%M:%S GMT', gmtime()))
160 f.close()
161