w.c.s : nouvelle version des outils de gestion hors-ligne
[progfou.git] / wcs / wcs-extract.py
CommitLineData
0f48356a
P
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4import os
5import os.path
6import sys
7from time import gmtime, strftime
8import simplejson as json
9
10if len(sys.argv) != 4:
11 print sys.stderr, "Usage : %s <dossier-destination> <site> <formulaire>" % sys.argv[0]
12 sys.exit(1)
13VHOST = sys.argv[2]
14FORM_NAME = sys.argv[3]
15OUTPUT_DIRECTORY = os.path.join(sys.argv[1], VHOST, FORM_NAME)
16
17os.umask(0022)
18# création du dossier d'extraction, au besoin
19if not os.path.isdir(OUTPUT_DIRECTORY):
20 os.makedirs(OUTPUT_DIRECTORY, 0755)
21
22TIME_FORMAT = "%Y-%m-%d"
23TIME_FORMAT_LONG = "%Y-%m-%d %H:%M:%S"
24
25# fonction de nettoyage des noms pour ne garder que de l'alpha-numérique
26def cleanup(s, replacement_char='-'):
27 avec_accent = u'çÇáàâÁÀÂéèêëÉÈÊËíìîïÍÌÎÏóòôöÓÒÔÖúùûüÚÙÛÜýỳyÿÝỲYŸ'
28 sans_accent = u'cCaaaAAAeeeeEEEEiiiiIIIIooooOOOOuuuuUUUUyyyyYYYY'
29 if type(s) is not unicode:
30 s = unicode(s, 'utf-8')
31 u = False
32 r = ''
33 for c in s:
34 index = avec_accent.find(c)
35 if index >= 0:
36 r += sans_accent[index]
37 elif ('a' <= c.lower() <= 'z') or ('0' <= c <= '9'):
38 r += c
39 elif len(r) > 0 and r[-1] != replacement_char:
40 r += replacement_char
41 else: # r == '' or r[-1] == replacement_char
42 pass
43 r = r.strip(replacement_char)
44 if not u:
45 r = r.encode('utf-8')
46 return r
47
48from wcs import publisher
49from wcs.formdef import FormDef
50from wcs.fields import TitleField, CommentField, TextField, \
51 StringField, ItemField, EmailField, \
52 DateField, FileField, BoolField
53
54pub = publisher.WcsPublisher.create_publisher()
55pub.app_dir = os.path.join(pub.app_dir, VHOST)
56
57formdef = FormDef.get_by_urlname(FORM_NAME)
58
59#
60# nommage des champs, de façon unique
61#
62
63f = open(os.path.join(OUTPUT_DIRECTORY, 'field-names.txt'), 'w')
64
65field_names = {}
66field_names_duplicates = {}
67for field in formdef.fields:
68 if isinstance(field, TitleField) or isinstance(field, CommentField):
69 continue
70 name = cleanup(field.label,'_').lower()
71 if name in field_names.values(): # duplicat
72 field_names_duplicates[name] = field_names_duplicates.get(name, 1) + 1
73 name = '%s_%d' % (name, field_names_duplicates[name])
74 field_names.update({field.id: name})
75 print >>f, "%s:%s:%s" % (field.id, field_names[field.id], field.label)
76
77f.close()
78
79f = open('%s/field-names.json' % OUTPUT_DIRECTORY, 'wb')
80f.write(json.dumps(field_names, ensure_ascii=False))
81f.close()
82
83#
84# extraction des données
85#
86
87for object in formdef.data_class().select():
88 result = {
89 'num_dossier': object.id,
90 'wcs_status': object.status,
91 'wcs_workflow_status': object.get_workflow_status().name,
92 'wcs_user_email': object.user.email,
93 'wcs_user_display_name': object.user.display_name,
94 #'wcs_last_modified': strftime(TIME_FORMAT_LONG, gmtime(object.last_modified())),
95 }
96 qfiles = { }
97 for field in formdef.fields:
98 field_id = str(field.id)
99 if not field_id in object.data:
100 continue
101 if isinstance(field, TitleField) or isinstance(field, CommentField):
102 continue
103 field_name = field_names[field_id]
104 data = object.data.get(field_id)
105 if isinstance(field, StringField) or isinstance(field, TextField) \
106 or isinstance(field, EmailField) or isinstance(field, ItemField):
107 result[field_name] = data
108 elif isinstance(field, BoolField):
109 result[field_name] = (data == 'True')
110 elif isinstance(field, DateField):
111 result[field_name] = strftime(TIME_FORMAT, data)
112 elif isinstance(field, FileField):
113 extension = data.orig_filename.rpartition('.')[2].lower()
114 result[field_name] = "%s.%s" % (field_name, extension)
115 qfiles[field_name] = data.qfilename
116 else:
117 print "WARNING: unknown field type '%s' for '%s'" % \
118 (field.__class__.__name__, field.label)
119 raise RuntimeError
120
121 num_dossier = result['num_dossier']
122 nom = cleanup('-'.join(result['nom'].split()).upper())
123 prenom = cleanup('-'.join(result['prenom'].split()).upper())
124 adel = result['adresse_electronique'].replace('@','-').lower()
125
126 filename = "%04d-%s-%s-%s" % (num_dossier, nom, prenom, adel)
127
128 print "Dossier '%s'..." % filename,
129
130 for f in qfiles:
131 result[f] = '%s_%s' % (filename, result[f])
132 src = '%s/uploads/%s' % (pub.app_dir, qfiles[f])
133 dst = '%s/%s' % (OUTPUT_DIRECTORY, result[f])
134 if not os.path.exists(dst) or os.path.getmtime(src) > os.path.getmtime(dst):
135 os.spawnl(os.P_WAIT, '/bin/cp', '-af', src, dst)
136 os.chmod(dst, 0644)
137
138 f = open('%s/%s.json' % (OUTPUT_DIRECTORY, filename), 'wb')
139 f.write(json.dumps(result, ensure_ascii=False))
140 f.close()
141
142 print "OK."
143
144f = open('%s/last-run.txt' % OUTPUT_DIRECTORY, 'w')
145f.write('%s GMT' % strftime(TIME_FORMAT_LONG, gmtime()))
146f.close()