2 # -*- coding: utf-8 -*-
4 Outil d'export de données w.c.s.
6 Copyright : Agence universitaire de la Francophonie — www.auf.org
7 Licence : GNU General Public Licence, version 2
8 Auteur : Jean Christophe André
9 Date de création : 15 octobre 2009
11 Depends: wcs, python-simplejson, python-magic
17 from time import gmtime, strftime
18 import simplejson as json
22 from wcs import publisher
23 from wcs.formdef import FormDef
24 from wcs.fields import TitleField, CommentField, TextField, \
25 StringField, ItemField, ItemsField, EmailField, \
26 DateField, FileField, BoolField, TableField
29 def reduce_to_alnum(s, replacement_char='-'):
30 """réduction d'une chaîne de caractères à de l'alpha-numérique"""
32 avec_accent = u'çÇáàâÁÀÂéèêëÉÈÊËíìîïÍÌÎÏóòôöÓÒÔÖúùûüÚÙÛÜýỳyÿÝỲYŸ'
33 sans_accent = u'cCaaaAAAeeeeEEEEiiiiIIIIooooOOOOuuuuUUUUyyyyYYYY'
34 if type(s) is not unicode:
35 s = unicode(s, 'utf-8')
39 index = avec_accent.find(c)
41 r += sans_accent[index]
42 elif ('a' <= c.lower() <= 'z') or ('0' <= c <= '9'):
44 elif len(r) > 0 and r[-1] != replacement_char:
46 else: # r == '' or r[-1] == replacement_char
48 r = r.strip(replacement_char)
54 def extract_fields(formdef, output_directory):
55 """nommage des champs de façon unique"""
56 # TODO: devrait retourner un résultat, qui serait alors sauvé en dehors
58 # XXX: hack temporaire… :-/
61 f = open(os.path.join(output_directory, 'field-names.txt'), 'w')
64 field_names_duplicates = {}
65 for field in formdef.fields:
66 if isinstance(field, TitleField) or isinstance(field, CommentField):
71 name = reduce_to_alnum(field.label,'_').lower()
72 if name in field_names.values(): # duplicat
73 field_names_duplicates[name] = field_names_duplicates.get(name, 1) + 1
74 name = '%s_%d' % (name, field_names_duplicates[name])
75 field_names.update({field.id: name})
76 print >>f, "%s:%s:%s" % (field.id, field_names[field.id], field.label)
80 f = open(os.path.join(output_directory, 'field-names.json'), 'wb')
81 f.write(json.dumps(field_names, ensure_ascii=False))
85 def extract_data(formdef, output_directory):
86 """extraction des données du formulaire"""
87 # TODO: devrait retourner un résultat, qui serait alors sauvé en dehors
89 # XXX: hack temporaire… :-/
92 # on charge la base des types MIME une fois pour toutes
93 #magicmime = magic.Magic(mime=True) => ce sera pour plus tard…
94 magicmime = magic.open(magic.MAGIC_MIME)
98 for object in formdef.data_class().select():
99 if object.user is None:
100 logging.warning("Dossier '%s' sans utilisateur associé ?!?"\
101 " On ignore...", object.id)
105 'num_dossier': object.id,
106 'wcs_status': object.status,
107 'wcs_workflow_status': (object.status.startswith('wf-') and \
108 object.get_workflow_status().name or None),
109 'wcs_user_email': object.user.email,
110 'wcs_user_display_name': object.user.display_name,
111 #'wcs_last_modified': strftime('%Y-%m-%d %H:%M:%S', gmtime(object.last_modified())),
115 if object.evolution is not None:
116 for e in object.evolution:
117 if e.comment is not None:
118 who = pub.user_class.get(e.who).display_name
119 time = strftime('%Y-%m-%d %H:%M:%S', e.time)
120 comment = '%s -- %s %s' % (e.comment, who, time)
121 result['wcs_comments'].append(comment)
124 for field in formdef.fields:
125 field_id = str(field.id)
126 if not field_id in object.data:
128 if isinstance(field, TitleField) or isinstance(field, CommentField):
130 field_name = field_names[field_id]
131 data = object.data.get(field_id)
133 result[field_name] = None
135 if isinstance(field, StringField) or isinstance(field, TextField) \
136 or isinstance(field, EmailField) or isinstance(field, ItemField):
137 result[field_name] = data
138 elif isinstance(field, ItemsField) or isinstance(field, TableField):
139 result[field_name] = data # liste => peux-être joindre sur ';'
140 elif isinstance(field, BoolField):
141 result[field_name] = (data == 'True')
142 elif isinstance(field, DateField):
143 result[field_name] = '%04d-%02d-%02d' % (data.tm_year,
144 data.tm_mon, data.tm_mday)
145 elif isinstance(field, FileField):
146 if '.' in data.orig_filename:
147 extension = data.orig_filename.rpartition('.')[2].lower()
148 else: # il n'y a pas d'extension dans le nom de fichier
149 p = os.path.join(pub.app_dir, 'uploads', data.qfilename)
151 #m = magicmime.from_file(p) => ce sera pour plus tard…
152 m = magicmime.file(p).split()[0].strip(';')
153 extension = mimetypes.guess_extension(m)
155 logging.warning("Type de fichier inconnu pour '%s'.", p)
157 if extension is not None:
158 extension = extension[1:]
160 extension = 'unknown'
161 result[field_name] = "%s.%s" % (field_name, extension)
162 qfiles[field_name] = data.qfilename
164 logging.warning("Type de champ inconnu '%s' pour '%s' (%s).",
165 field.__class__.__name__, field_name, field.label)
167 num_dossier = result['num_dossier']
168 nom = reduce_to_alnum(result.get('nom','sans-nom')).upper()
169 prenom = reduce_to_alnum(result.get('prenom','sans-prenom')).upper()
170 adel = result.get('adresse_electronique','sans-adel').replace('@','-').lower()
172 filename = "%04d-%s-%s-%s" % (num_dossier, nom, prenom, adel)
173 liste_dossiers.append(filename + '.json')
175 # copie des fichiers joints
177 result[f] = filename + '_' + result[f]
178 src = os.path.join(pub.app_dir, 'uploads', qfiles[f])
179 dst = os.path.join(output_directory, 'data', result[f])
180 if not os.path.exists(dst) or os.path.getmtime(src) > os.path.getmtime(dst):
181 shutil.copy2(src, dst)
184 # génération du fichier JSON
185 jsonname = os.path.join(output_directory, 'data', filename + '.json')
186 f = open(jsonname, 'wb')
187 f.write(json.dumps(result, ensure_ascii=False).encode('utf-8'))
190 logging.info("Dossier '%s' : %s.",
191 filename, result['wcs_workflow_status'])
193 liste_dossiers.sort()
194 f = open(os.path.join(output_directory, 'liste-dossiers.json'), 'wb')
195 f.write(json.dumps(liste_dossiers, ensure_ascii=False))
199 if __name__ == '__main__':
202 if len(sys.argv) != 4:
203 print >>sys.stderr, "Usage : %s <dossier-destination> <site> <formulaire>" % sys.argv[0]
207 FORM_NAME = sys.argv[3]
208 OUTPUT_DIRECTORY = os.path.join(sys.argv[1], VHOST, FORM_NAME)
211 # création du dossier d'extraction, au besoin
212 if not os.path.isdir(os.path.join(OUTPUT_DIRECTORY, 'data')):
213 os.makedirs(os.path.join(OUTPUT_DIRECTORY, 'data'), 0755)
215 logging.basicConfig(level=logging.DEBUG,
216 format='%(asctime)s %(levelname)s %(message)s',
217 filename=os.path.join(OUTPUT_DIRECTORY, 'last-run.log'),
220 logging.info('Début.')
222 pub = publisher.WcsPublisher.create_publisher()
223 pub.app_dir = os.path.join(pub.app_dir, VHOST)
225 formdef = FormDef.get_by_urlname(FORM_NAME)
227 extract_fields(formdef, OUTPUT_DIRECTORY)
230 extract_data(formdef, OUTPUT_DIRECTORY)
232 logging.exception("Interruption du traitement pour cause d'erreur !")