Commit | Line | Data |
---|---|---|
0cc5f772 | 1 | # -*- encoding: utf-8 -*- |
23b5b3d5 | 2 | import simplejson, re, datetime, operator, hashlib |
ecc62927 | 3 | from savoirs.globals import * |
d972b61d | 4 | from savoirs.models import Record, ListSet |
0cc5f772 | 5 | |
23b5b3d5 | 6 | class SEPEncoder: |
7 | """ | |
8 | Classe permettant de d'encoder et de décoder les données moissonnées. | |
9 | """ | |
10 | separator = ", " | |
11 | ||
12 | def encode(self, field, data): | |
13 | if field in META.keys() and META[field]['type'] == 'array': | |
14 | return self.separator.join(data) | |
15 | else: | |
16 | return data | |
17 | ||
18 | def decode(self, field, data): | |
19 | if field in META.keys() and META[field]['type'] == 'array': | |
20 | return data.split(self.separator) | |
21 | else: | |
22 | return data | |
23 | ||
24 | #def migrate(self,): | |
25 | # for r in Record.objects.all(): | |
26 | # for f in META.keys(): | |
27 | # json = getattr(r, f) | |
28 | # if json is not None: | |
29 | # normal = simplejson.loads(json) | |
30 | # new = self.encode(f, normal) | |
31 | # setattr(r, f, new) | |
32 | # r.save() | |
0cc5f772 CR |
33 | |
34 | class SEP: | |
35 | """ | |
23b5b3d5 | 36 | Classe utilisée pour réaliser manipuler les données moisonnées. |
0cc5f772 | 37 | """ |
0cc5f772 | 38 | |
23b5b3d5 | 39 | encoder = SEPEncoder() |
0cc5f772 | 40 | |
23b5b3d5 | 41 | ############################################################################ |
42 | # MÉTHODES INTERNES | |
43 | ############################################################################ | |
0cc5f772 | 44 | |
23b5b3d5 | 45 | def _load (self, id): |
46 | """Recupérer la structure de métadonnées pour un record selon un `id`.""" | |
47 | r = Record.objects.get(id = id) | |
48 | meta = {} | |
49 | for k in META.keys (): | |
50 | if hasattr (r, k): | |
51 | v = getattr (r, k) | |
52 | if v is not None: | |
53 | meta[k] = self.encoder.decode(k, v) | |
54 | return meta | |
55 | ||
d972b61d | 56 | # traitement spécial pour certaines clef de la structure |
57 | def listsets(self, record, value): | |
58 | ||
59 | # doit avoir un id pour créer les relations multivaluées | |
60 | record.save() | |
61 | for set in [ls for ls in ListSet.objects.all() if ls.spec in value]: | |
62 | record.listsets.add(set) | |
63 | ||
c88d78dc | 64 | def _update_record(self, r, metadata): |
23b5b3d5 | 65 | for k in metadata.keys (): |
d972b61d | 66 | if hasattr(self, k): |
67 | method = getattr(self, k) | |
68 | method(r, metadata[k]) | |
69 | else: | |
70 | setattr (r, k, self.encoder.encode(k, metadata[k])) | |
71 | ||
23b5b3d5 | 72 | r.last_checksum = hashlib.md5(str(metadata)).hexdigest() |
73 | r.last_update = datetime.datetime.today() | |
74 | r.save() | |
c88d78dc | 75 | |
76 | ||
77 | def _save (self, metadata): | |
78 | r = Record () | |
79 | self._update_record(r, metadata) | |
23b5b3d5 | 80 | return r.id |
81 | ||
82 | def _modify (self, id, metadata): | |
83 | r = Record.objects.get(id = id) | |
84 | ||
85 | # test si le fichier a été modifié | |
86 | if hashlib.md5(str(metadata)).hexdigest() == r.last_checksum: | |
87 | return False | |
23b5b3d5 | 88 | |
c88d78dc | 89 | self._update_record(r, metadata) |
90 | ||
23b5b3d5 | 91 | return True |
92 | ||
93 | def _combine (self, result_lists, op): | |
94 | scores = {} | |
95 | simple_sets = [] | |
96 | ||
97 | for list in result_lists: | |
98 | simple_sets.append (set([x[0] for x in list])) | |
99 | for (id, score) in list: | |
100 | if scores.get (id) is None: | |
101 | scores[id] = 0 | |
102 | scores[id] += score | |
103 | ||
104 | matches = [] | |
105 | for s in simple_sets: | |
106 | if op == "|": | |
107 | matches = set(matches) | s | |
108 | elif op == "&": | |
109 | if len (matches) == 0: | |
110 | matches = s | |
111 | else: | |
112 | matches = set(matches) & s | |
113 | #print "EE", matches | |
114 | ||
115 | return [(x, scores[x]) for x in matches] | |
116 | ||
117 | ||
118 | def _text_search (self, q, fields = None): | |
119 | if fields is None: | |
120 | fields = [x for x in META.keys() if META[x].get("text_search", False)] | |
121 | ||
122 | w = re.compile (r'\W+', re.U) | |
123 | words = w.split (q) | |
124 | ||
125 | matches = [] | |
126 | suffix = "" | |
127 | if len(fields)==1 and fields[0] == "subject": | |
128 | suffix = " IN BOOLEAN MODE" | |
129 | ||
130 | for k in fields: | |
131 | matches.append ("MATCH(`%s`) AGAINST ('%s'%s)" % (k, " ".join(words), suffix)) | |
132 | m = "+".join (matches) | |
133 | ||
d972b61d | 134 | q = "SELECT id, (%s) AS score FROM savoirs_record \ |
a3a69209 | 135 | WHERE (%s) AND validated = 1 \ |
136 | HAVING score > 0 ORDER BY score DESC" % (m, m) | |
23b5b3d5 | 137 | |
138 | from django.db import connection, transaction | |
139 | cursor = connection.cursor() | |
140 | cursor.execute(q) | |
141 | rc = cursor.fetchall() | |
0cc5f772 CR |
142 | return rc |
143 | ||
23b5b3d5 | 144 | ############################################################################ |
145 | # API | |
146 | ############################################################################ | |
147 | ||
0cc5f772 CR |
148 | def add (self, metadata): |
149 | """Ajouter la ressource définie par `metadata`. Si on trouve une | |
150 | ressource avec le même `identifier`, on le met a jour. | |
151 | ||
152 | Retourne l'id de la ressource créée ou mise à jour. | |
153 | """ | |
23b5b3d5 | 154 | added = updated = False |
0cc5f772 CR |
155 | exists = self.search (q = {URI: metadata[URI]}) |
156 | if len (exists) > 0: | |
8b95ddc9 | 157 | id = exists[0][0] |
23b5b3d5 | 158 | updated = self.update (int(id), metadata) |
0cc5f772 | 159 | else: |
23b5b3d5 | 160 | added = True |
161 | id = self._save (metadata) | |
162 | return {'record_id': id, 'added':added, 'updated':updated} | |
163 | ||
164 | def delete (self, id): | |
165 | """Supprime la ressource identifiée par `id`. | |
166 | """ | |
167 | r = Record.objects.get(id = id) | |
168 | r.delete() | |
0cc5f772 CR |
169 | |
170 | def update (self, id, metadata): | |
171 | """Met a jour la ressource identifiée par `id`, avec les données de | |
172 | `metadata`. Une exception est levée si elle n'existe pas. | |
173 | """ | |
174 | if self.get (int(id)) is not None: | |
23b5b3d5 | 175 | return self._modify (int(id), metadata) |
0cc5f772 CR |
176 | else: |
177 | raise Exception ("Objet inexistant") | |
23b5b3d5 | 178 | return False |
0cc5f772 | 179 | |
23b5b3d5 | 180 | def get (self, id): |
181 | """Recupérer la structure de métadonnées pour la ressource identifiée | |
182 | par `id`. `id` peut être une liste si on veut les structures de | |
183 | plusieurs ressources. | |
0cc5f772 | 184 | """ |
23b5b3d5 | 185 | if isinstance (id, tuple) or isinstance (id, list): |
186 | rc = [] | |
187 | for i in id: | |
188 | try: | |
189 | i = i[0] | |
190 | except: pass | |
191 | rc.append (self._load (int(i))) | |
192 | else: | |
193 | rc = self._load (int(id)) | |
194 | return rc | |
0cc5f772 | 195 | |
23b5b3d5 | 196 | def ids (self): |
197 | """ Retourner la liste complète des ids des ressources.""" | |
198 | return [x.id for x in Record.objects.all()] | |
0cc5f772 | 199 | |
23b5b3d5 | 200 | def search (self, q): |
201 | """Effectue une recherche multi-critères, en fonction du dictionnaire | |
202 | `q`. Retourne une list d'`id`s uniquement. Les données pour chaque | |
203 | résultat doivent être chargées ulterieurement. | |
204 | """ | |
205 | rc = [] | |
206 | sets = [] | |
0cc5f772 | 207 | |
23b5b3d5 | 208 | if len (q) > 0: |
209 | # Recherche "simple" | |
f991eb01 | 210 | ww = q.get ("q", "").strip() |
23b5b3d5 | 211 | if len (ww) > 0: |
212 | s = self._text_search (ww) | |
213 | if len(s) > 0: | |
8f17344b | 214 | rc.extend(s) |
23b5b3d5 | 215 | # Recherche URL |
216 | elif q.get (URI) is not None: | |
217 | s = [] | |
218 | try: | |
219 | s.append((Record.objects.get(uri__iexact = q.get(URI)).id, 1)) | |
220 | rc.append(s) | |
221 | except: pass | |
222 | # Recherche avancée | |
223 | else: | |
f991eb01 | 224 | creator = q.get ("creator", "") |
225 | title = q.get ("title", "") | |
226 | description = q.get ("description", "") | |
227 | subject = q.get ("subject", "") | |
23b5b3d5 | 228 | |
229 | if len (creator) > 0: | |
230 | sets.append (self._text_search (creator, [CREATOR, CONTRIBUTOR])) | |
231 | if len (title) > 0: | |
232 | sets.append (self._text_search (title, [TITLE, ALT_TITLE])) | |
233 | if len (description) > 0: | |
234 | sets.append (self._text_search (description, [DESCRIPTION, ABSTRACT])) | |
235 | if len (subject) > 0: | |
236 | sets.append (self._text_search (subject, [SUBJECT,])) | |
237 | rc = self._combine (sets, q.get ("operator", "|")) | |
238 | rc.sort (key = operator.itemgetter(1), reverse = True) | |
239 | ||
240 | if len(rc) > 0: | |
241 | rc = [x[0] for x in rc] | |
242 | ||
243 | else: | |
244 | rc = self.ids() | |
23b5b3d5 | 245 | return rc |