Commit | Line | Data |
---|---|---|
0cc5f772 | 1 | # -*- encoding: utf-8 -*- |
23b5b3d5 | 2 | import simplejson, re, datetime, operator, hashlib |
ecc62927 | 3 | from savoirs.globals import * |
d972b61d | 4 | from savoirs.models import Record, ListSet |
0cc5f772 | 5 | |
23b5b3d5 | 6 | class SEPEncoder: |
7 | """ | |
8 | Classe permettant de d'encoder et de décoder les données moissonnées. | |
9 | """ | |
10 | separator = ", " | |
11 | ||
e923e22c | 12 | def propre(self, str): |
13 | """Retoune une chaîne de caractère propre utf-8 | |
14 | Elle permet de corrgier les problèmes d'encodage.""" | |
15 | if str is None: | |
16 | return u"" | |
b3b1060d | 17 | |
18 | if str.__class__.__name__ == 'str': | |
19 | str = str.decode('utf-8') | |
20 | ||
21 | return str.replace(u"\x92", u"´") | |
e923e22c | 22 | |
23b5b3d5 | 23 | def encode(self, field, data): |
e923e22c | 24 | """Encode la structure de donnée moissonnée pour la BD""" |
23b5b3d5 | 25 | if field in META.keys() and META[field]['type'] == 'array': |
26 | return self.separator.join(data) | |
27 | else: | |
28 | return data | |
29 | ||
30 | def decode(self, field, data): | |
e923e22c | 31 | """Décode la structure provenant de la BD""" |
23b5b3d5 | 32 | if field in META.keys() and META[field]['type'] == 'array': |
33 | return data.split(self.separator) | |
34 | else: | |
35 | return data | |
36 | ||
e923e22c | 37 | def menage(self,): |
38 | """Applique sur tous les records, la fonction de corrections | |
39 | de string sur les données moissonnées""" | |
40 | for r in Record.objects.all(): | |
41 | for k in META.keys (): | |
42 | v = getattr (r, k) | |
43 | setattr (r, k, self.propre(v)) | |
44 | r.save() | |
0cc5f772 CR |
45 | |
46 | class SEP: | |
47 | """ | |
23b5b3d5 | 48 | Classe utilisée pour réaliser manipuler les données moisonnées. |
0cc5f772 | 49 | """ |
0cc5f772 | 50 | |
23b5b3d5 | 51 | encoder = SEPEncoder() |
0cc5f772 | 52 | |
23b5b3d5 | 53 | ############################################################################ |
54 | # MÉTHODES INTERNES | |
55 | ############################################################################ | |
0cc5f772 | 56 | |
23b5b3d5 | 57 | def _load (self, id): |
58 | """Recupérer la structure de métadonnées pour un record selon un `id`.""" | |
59 | r = Record.objects.get(id = id) | |
60 | meta = {} | |
61 | for k in META.keys (): | |
62 | if hasattr (r, k): | |
63 | v = getattr (r, k) | |
64 | if v is not None: | |
65 | meta[k] = self.encoder.decode(k, v) | |
66 | return meta | |
67 | ||
d972b61d | 68 | # traitement spécial pour certaines clef de la structure |
69 | def listsets(self, record, value): | |
70 | ||
71 | # doit avoir un id pour créer les relations multivaluées | |
72 | record.save() | |
b3b1060d | 73 | |
d972b61d | 74 | for set in [ls for ls in ListSet.objects.all() if ls.spec in value]: |
75 | record.listsets.add(set) | |
e923e22c | 76 | |
c88d78dc | 77 | def _update_record(self, r, metadata): |
23b5b3d5 | 78 | for k in metadata.keys (): |
d972b61d | 79 | if hasattr(self, k): |
80 | method = getattr(self, k) | |
81 | method(r, metadata[k]) | |
82 | else: | |
83 | setattr (r, k, self.encoder.encode(k, metadata[k])) | |
84 | ||
23b5b3d5 | 85 | r.last_checksum = hashlib.md5(str(metadata)).hexdigest() |
86 | r.last_update = datetime.datetime.today() | |
e923e22c | 87 | |
88 | # stocke des chaînes de caractères propre en BD en provenance | |
89 | # des données moissonnées | |
90 | for k in META.keys (): | |
91 | v = getattr (r, k) | |
92 | setattr (r, k, self.encoder.propre(v)) | |
b3b1060d | 93 | |
23b5b3d5 | 94 | r.save() |
c88d78dc | 95 | |
c88d78dc | 96 | def _save (self, metadata): |
97 | r = Record () | |
98 | self._update_record(r, metadata) | |
23b5b3d5 | 99 | return r.id |
100 | ||
101 | def _modify (self, id, metadata): | |
102 | r = Record.objects.get(id = id) | |
103 | ||
104 | # test si le fichier a été modifié | |
105 | if hashlib.md5(str(metadata)).hexdigest() == r.last_checksum: | |
106 | return False | |
23b5b3d5 | 107 | |
c88d78dc | 108 | self._update_record(r, metadata) |
109 | ||
23b5b3d5 | 110 | return True |
111 | ||
112 | def _combine (self, result_lists, op): | |
113 | scores = {} | |
114 | simple_sets = [] | |
115 | ||
116 | for list in result_lists: | |
117 | simple_sets.append (set([x[0] for x in list])) | |
118 | for (id, score) in list: | |
119 | if scores.get (id) is None: | |
120 | scores[id] = 0 | |
121 | scores[id] += score | |
122 | ||
123 | matches = [] | |
124 | for s in simple_sets: | |
125 | if op == "|": | |
126 | matches = set(matches) | s | |
127 | elif op == "&": | |
128 | if len (matches) == 0: | |
129 | matches = s | |
130 | else: | |
131 | matches = set(matches) & s | |
132 | #print "EE", matches | |
133 | ||
134 | return [(x, scores[x]) for x in matches] | |
135 | ||
136 | ||
137 | def _text_search (self, q, fields = None): | |
138 | if fields is None: | |
139 | fields = [x for x in META.keys() if META[x].get("text_search", False)] | |
140 | ||
141 | w = re.compile (r'\W+', re.U) | |
142 | words = w.split (q) | |
143 | ||
144 | matches = [] | |
145 | suffix = "" | |
146 | if len(fields)==1 and fields[0] == "subject": | |
147 | suffix = " IN BOOLEAN MODE" | |
148 | ||
149 | for k in fields: | |
150 | matches.append ("MATCH(`%s`) AGAINST ('%s'%s)" % (k, " ".join(words), suffix)) | |
151 | m = "+".join (matches) | |
152 | ||
9eda5d6c | 153 | q = "SELECT r.id, (%s) AS score FROM savoirs_record AS r \ |
154 | LEFT JOIN savoirs_record_listsets AS rl ON r.id = rl.record_id \ | |
155 | JOIN savoirs_listset AS l ON rl.listset_id = l.spec \ | |
156 | WHERE (%s) AND r.validated = 1 AND l.validated = 1 \ | |
8346a35e | 157 | GROUP BY r.id \ |
a3a69209 | 158 | HAVING score > 0 ORDER BY score DESC" % (m, m) |
23b5b3d5 | 159 | |
160 | from django.db import connection, transaction | |
161 | cursor = connection.cursor() | |
162 | cursor.execute(q) | |
163 | rc = cursor.fetchall() | |
0cc5f772 CR |
164 | return rc |
165 | ||
23b5b3d5 | 166 | ############################################################################ |
167 | # API | |
168 | ############################################################################ | |
169 | ||
0cc5f772 CR |
170 | def add (self, metadata): |
171 | """Ajouter la ressource définie par `metadata`. Si on trouve une | |
172 | ressource avec le même `identifier`, on le met a jour. | |
173 | ||
174 | Retourne l'id de la ressource créée ou mise à jour. | |
175 | """ | |
23b5b3d5 | 176 | added = updated = False |
0cc5f772 CR |
177 | exists = self.search (q = {URI: metadata[URI]}) |
178 | if len (exists) > 0: | |
8b95ddc9 | 179 | id = exists[0][0] |
23b5b3d5 | 180 | updated = self.update (int(id), metadata) |
0cc5f772 | 181 | else: |
23b5b3d5 | 182 | added = True |
183 | id = self._save (metadata) | |
184 | return {'record_id': id, 'added':added, 'updated':updated} | |
185 | ||
186 | def delete (self, id): | |
187 | """Supprime la ressource identifiée par `id`. | |
188 | """ | |
189 | r = Record.objects.get(id = id) | |
190 | r.delete() | |
0cc5f772 CR |
191 | |
192 | def update (self, id, metadata): | |
193 | """Met a jour la ressource identifiée par `id`, avec les données de | |
194 | `metadata`. Une exception est levée si elle n'existe pas. | |
195 | """ | |
196 | if self.get (int(id)) is not None: | |
23b5b3d5 | 197 | return self._modify (int(id), metadata) |
0cc5f772 CR |
198 | else: |
199 | raise Exception ("Objet inexistant") | |
23b5b3d5 | 200 | return False |
0cc5f772 | 201 | |
23b5b3d5 | 202 | def get (self, id): |
203 | """Recupérer la structure de métadonnées pour la ressource identifiée | |
204 | par `id`. `id` peut être une liste si on veut les structures de | |
205 | plusieurs ressources. | |
0cc5f772 | 206 | """ |
23b5b3d5 | 207 | if isinstance (id, tuple) or isinstance (id, list): |
208 | rc = [] | |
209 | for i in id: | |
210 | try: | |
211 | i = i[0] | |
212 | except: pass | |
213 | rc.append (self._load (int(i))) | |
214 | else: | |
215 | rc = self._load (int(id)) | |
216 | return rc | |
0cc5f772 | 217 | |
23b5b3d5 | 218 | def ids (self): |
219 | """ Retourner la liste complète des ids des ressources.""" | |
220 | return [x.id for x in Record.objects.all()] | |
0cc5f772 | 221 | |
23b5b3d5 | 222 | def search (self, q): |
223 | """Effectue une recherche multi-critères, en fonction du dictionnaire | |
224 | `q`. Retourne une list d'`id`s uniquement. Les données pour chaque | |
225 | résultat doivent être chargées ulterieurement. | |
226 | """ | |
227 | rc = [] | |
228 | sets = [] | |
0cc5f772 | 229 | |
23b5b3d5 | 230 | if len (q) > 0: |
231 | # Recherche "simple" | |
f991eb01 | 232 | ww = q.get ("q", "").strip() |
23b5b3d5 | 233 | if len (ww) > 0: |
234 | s = self._text_search (ww) | |
235 | if len(s) > 0: | |
8f17344b | 236 | rc.extend(s) |
23b5b3d5 | 237 | # Recherche URL |
238 | elif q.get (URI) is not None: | |
239 | s = [] | |
240 | try: | |
241 | s.append((Record.objects.get(uri__iexact = q.get(URI)).id, 1)) | |
242 | rc.append(s) | |
243 | except: pass | |
244 | # Recherche avancée | |
245 | else: | |
f991eb01 | 246 | creator = q.get ("creator", "") |
247 | title = q.get ("title", "") | |
248 | description = q.get ("description", "") | |
249 | subject = q.get ("subject", "") | |
23b5b3d5 | 250 | |
251 | if len (creator) > 0: | |
252 | sets.append (self._text_search (creator, [CREATOR, CONTRIBUTOR])) | |
253 | if len (title) > 0: | |
254 | sets.append (self._text_search (title, [TITLE, ALT_TITLE])) | |
255 | if len (description) > 0: | |
256 | sets.append (self._text_search (description, [DESCRIPTION, ABSTRACT])) | |
257 | if len (subject) > 0: | |
258 | sets.append (self._text_search (subject, [SUBJECT,])) | |
259 | rc = self._combine (sets, q.get ("operator", "|")) | |
260 | rc.sort (key = operator.itemgetter(1), reverse = True) | |
261 | ||
262 | if len(rc) > 0: | |
263 | rc = [x[0] for x in rc] | |
264 | ||
265 | else: | |
266 | rc = self.ids() | |
23b5b3d5 | 267 | return rc |