import os
import os.path
-import sys
from time import gmtime, strftime
import simplejson as json
-if len(sys.argv) != 4:
- print sys.stderr, "Usage : %s <dossier-destination> <site> <formulaire>" % sys.argv[0]
- sys.exit(1)
-VHOST = sys.argv[2]
-FORM_NAME = sys.argv[3]
-OUTPUT_DIRECTORY = os.path.join(sys.argv[1], VHOST, FORM_NAME)
+from wcs import publisher
+from wcs.formdef import FormDef
+from wcs.fields import TitleField, CommentField, TextField, \
+ StringField, ItemField, EmailField, \
+ DateField, FileField, BoolField
-os.umask(0022)
-# création du dossier d'extraction, au besoin
-if not os.path.isdir(OUTPUT_DIRECTORY):
- os.makedirs(OUTPUT_DIRECTORY, 0755)
+COPY_COMMAND = '/bin/cp'
+COPY_ARGS = '-af'
-TIME_FORMAT = "%Y-%m-%d"
-TIME_FORMAT_LONG = "%Y-%m-%d %H:%M:%S"
-# fonction de nettoyage des noms pour ne garder que de l'alpha-numérique
-def cleanup(s, replacement_char='-'):
+def reduce_to_alnum(s, replacement_char='-'):
+ """réduction d'une chaîne de caractères à de l'alpha-numérique"""
+
avec_accent = u'çÇáàâÁÀÂéèêëÉÈÊËíìîïÍÌÎÏóòôöÓÒÔÖúùûüÚÙÛÜýỳyÿÝỲYŸ'
sans_accent = u'cCaaaAAAeeeeEEEEiiiiIIIIooooOOOOuuuuUUUUyyyyYYYY'
if type(s) is not unicode:
r = r.encode('utf-8')
return r
-from wcs import publisher
-from wcs.formdef import FormDef
-from wcs.fields import TitleField, CommentField, TextField, \
- StringField, ItemField, EmailField, \
- DateField, FileField, BoolField
-pub = publisher.WcsPublisher.create_publisher()
-pub.app_dir = os.path.join(pub.app_dir, VHOST)
-
-formdef = FormDef.get_by_urlname(FORM_NAME)
-
-#
-# nommage des champs, de façon unique
-#
-
-f = open(os.path.join(OUTPUT_DIRECTORY, 'field-names.txt'), 'w')
-
-field_names = {}
-field_names_duplicates = {}
-for field in formdef.fields:
- if isinstance(field, TitleField) or isinstance(field, CommentField):
- continue
- name = cleanup(field.label,'_').lower()
- if name in field_names.values(): # duplicat
- field_names_duplicates[name] = field_names_duplicates.get(name, 1) + 1
- name = '%s_%d' % (name, field_names_duplicates[name])
- field_names.update({field.id: name})
- print >>f, "%s:%s:%s" % (field.id, field_names[field.id], field.label)
-
-f.close()
-
-f = open('%s/field-names.json' % OUTPUT_DIRECTORY, 'wb')
-f.write(json.dumps(field_names, ensure_ascii=False))
-f.close()
-
-#
-# extraction des données
-#
-
-for object in formdef.data_class().select():
- result = {
- 'num_dossier': object.id,
- 'wcs_status': object.status,
- 'wcs_workflow_status': object.get_workflow_status().name,
- 'wcs_user_email': object.user.email,
- 'wcs_user_display_name': object.user.display_name,
- #'wcs_last_modified': strftime(TIME_FORMAT_LONG, gmtime(object.last_modified())),
- }
- qfiles = { }
+def extract_fields(formdef, output_directory):
+ """nommage des champs de façon unique"""
+ # TODO: devrait retourner un résultat, qui serait alors sauvé en dehors
+
+ f = open(os.path.join(output_directory, 'field-names.txt'), 'w')
+
+ field_names = {}
+ field_names_duplicates = {}
for field in formdef.fields:
- field_id = str(field.id)
- if not field_id in object.data:
- continue
if isinstance(field, TitleField) or isinstance(field, CommentField):
continue
- field_name = field_names[field_id]
- data = object.data.get(field_id)
- if isinstance(field, StringField) or isinstance(field, TextField) \
- or isinstance(field, EmailField) or isinstance(field, ItemField):
- result[field_name] = data
- elif isinstance(field, BoolField):
- result[field_name] = (data == 'True')
- elif isinstance(field, DateField):
- result[field_name] = strftime(TIME_FORMAT, data)
- elif isinstance(field, FileField):
- extension = data.orig_filename.rpartition('.')[2].lower()
- result[field_name] = "%s.%s" % (field_name, extension)
- qfiles[field_name] = data.qfilename
- else:
- print "WARNING: unknown field type '%s' for '%s'" % \
- (field.__class__.__name__, field.label)
- raise RuntimeError
-
- num_dossier = result['num_dossier']
- nom = cleanup('-'.join(result['nom'].split()).upper())
- prenom = cleanup('-'.join(result['prenom'].split()).upper())
- adel = result['adresse_electronique'].replace('@','-').lower()
-
- filename = "%04d-%s-%s-%s" % (num_dossier, nom, prenom, adel)
-
- print "Dossier '%s'..." % filename,
-
- for f in qfiles:
- result[f] = '%s_%s' % (filename, result[f])
- src = '%s/uploads/%s' % (pub.app_dir, qfiles[f])
- dst = '%s/%s' % (OUTPUT_DIRECTORY, result[f])
- if not os.path.exists(dst) or os.path.getmtime(src) > os.path.getmtime(dst):
- os.spawnl(os.P_WAIT, '/bin/cp', '-af', src, dst)
- os.chmod(dst, 0644)
-
- f = open('%s/%s.json' % (OUTPUT_DIRECTORY, filename), 'wb')
- f.write(json.dumps(result, ensure_ascii=False))
+ name = reduce_to_alnum(field.label,'_').lower()
+ if name in field_names.values(): # duplicat
+ field_names_duplicates[name] = field_names_duplicates.get(name, 1) + 1
+ name = '%s_%d' % (name, field_names_duplicates[name])
+ field_names.update({field.id: name})
+ print >>f, "%s:%s:%s" % (field.id, field_names[field.id], field.label)
+
f.close()
- print "OK."
+ f = open(os.path.join(output_directory, 'field-names.json'), 'wb')
+ f.write(json.dumps(field_names, ensure_ascii=False))
+ f.close()
+
+
+def extract_data(formdef, output_directory):
+ """extraction des données du formulaire"""
+ # TODO: devrait retourner un résultat, qui serait alors sauvé en dehors
+
+ for object in formdef.data_class().select():
+ result = {
+ 'num_dossier': object.id,
+ 'wcs_status': object.status,
+ 'wcs_workflow_status': object.get_workflow_status().name,
+ 'wcs_user_email': object.user.email,
+ 'wcs_user_display_name': object.user.display_name,
+ #'wcs_last_modified': strftime('%Y-%m-%d %H:%M:%S', gmtime(object.last_modified())),
+ }
+ qfiles = { }
+ for field in formdef.fields:
+ field_id = str(field.id)
+ if not field_id in object.data:
+ continue
+ if isinstance(field, TitleField) or isinstance(field, CommentField):
+ continue
+ field_name = field_names[field_id]
+ data = object.data.get(field_id)
+ if isinstance(field, StringField) or isinstance(field, TextField) \
+ or isinstance(field, EmailField) or isinstance(field, ItemField):
+ result[field_name] = data
+ elif isinstance(field, BoolField):
+ result[field_name] = (data == 'True')
+ elif isinstance(field, DateField):
+ result[field_name] = strftime('%Y-%m-%d', data)
+ elif isinstance(field, FileField):
+ extension = data.orig_filename.rpartition('.')[2].lower()
+ result[field_name] = "%s.%s" % (field_name, extension)
+ qfiles[field_name] = data.qfilename
+ else:
+ print "WARNING: unknown field type '%s' for '%s'" % \
+ (field.__class__.__name__, field.label)
+ raise RuntimeError
+
+ num_dossier = result['num_dossier']
+ nom = reduce_to_alnum(result['nom']).upper()
+ prenom = reduce_to_alnum(result['prenom']).upper()
+ adel = result['adresse_electronique'].replace('@','-').lower()
+
+ filename = "%04d-%s-%s-%s" % (num_dossier, nom, prenom, adel)
+
+ print "Dossier '%s'..." % filename,
+
+ # copie des fichiers joints
+ for f in qfiles:
+ result[f] = filename + '_' + result[f]
+ src = os.path.join(pub.app_dir, 'uploads', qfiles[f])
+ dst = os.path.join(output_directory, result[f])
+ if not os.path.exists(dst) or os.path.getmtime(src) > os.path.getmtime(dst):
+ os.spawnl(os.P_WAIT, COPY_COMMAND, COPY_ARGS, src, dst)
+ os.chmod(dst, 0644)
+
+ # génération du fichier JSON
+ f = open(os.path.join(output_directory, filename + '.json'), 'wb')
+ f.write(json.dumps(result, ensure_ascii=False))
+ f.close()
+
+ print "OK."
+
+
+if __name__ == '__main__':
+ import sys
+
+ if len(sys.argv) != 4:
+ print >>sys.stderr, "Usage : %s <dossier-destination> <site> <formulaire>" % sys.argv[0]
+ sys.exit(1)
+
+ VHOST = sys.argv[2]
+ FORM_NAME = sys.argv[3]
+ OUTPUT_DIRECTORY = os.path.join(sys.argv[1], VHOST, FORM_NAME)
+
+ os.umask(0022)
+ # création du dossier d'extraction, au besoin
+ if not os.path.isdir(OUTPUT_DIRECTORY):
+ os.makedirs(OUTPUT_DIRECTORY, 0755)
+
+ pub = publisher.WcsPublisher.create_publisher()
+ pub.app_dir = os.path.join(pub.app_dir, VHOST)
+
+ formdef = FormDef.get_by_urlname(FORM_NAME)
+
+ extract_fields(formdef, OUTPUT_DIRECTORY)
+
+ extract_data(formdef, OUTPUT_DIRECTORY)
+
+ f = open(os.path.join(OUTPUT_DIRECTORY, 'last-run.txt'), 'w')
+ f.write(strftime('%Y-%m-%d %H:%M:%S GMT', gmtime()))
+ f.close()
-f = open('%s/last-run.txt' % OUTPUT_DIRECTORY, 'w')
-f.write('%s GMT' % strftime(TIME_FORMAT_LONG, gmtime()))
-f.close()