Commit | Line | Data |
---|---|---|
0cc5f772 | 1 | # -*- encoding: utf-8 -*- |
23b5b3d5 | 2 | import simplejson, re, datetime, operator, hashlib |
ecc62927 | 3 | from savoirs.globals import * |
23b5b3d5 | 4 | from savoirs.models import Record |
0cc5f772 | 5 | |
23b5b3d5 | 6 | class SEPEncoder: |
7 | """ | |
8 | Classe permettant de d'encoder et de décoder les données moissonnées. | |
9 | """ | |
10 | separator = ", " | |
11 | ||
12 | def encode(self, field, data): | |
13 | if field in META.keys() and META[field]['type'] == 'array': | |
14 | return self.separator.join(data) | |
15 | else: | |
16 | return data | |
17 | ||
18 | def decode(self, field, data): | |
19 | if field in META.keys() and META[field]['type'] == 'array': | |
20 | return data.split(self.separator) | |
21 | else: | |
22 | return data | |
23 | ||
24 | #def migrate(self,): | |
25 | # for r in Record.objects.all(): | |
26 | # for f in META.keys(): | |
27 | # json = getattr(r, f) | |
28 | # if json is not None: | |
29 | # normal = simplejson.loads(json) | |
30 | # new = self.encode(f, normal) | |
31 | # setattr(r, f, new) | |
32 | # r.save() | |
0cc5f772 CR |
33 | |
34 | class SEP: | |
35 | """ | |
23b5b3d5 | 36 | Classe utilisée pour réaliser manipuler les données moisonnées. |
0cc5f772 | 37 | """ |
0cc5f772 | 38 | |
23b5b3d5 | 39 | encoder = SEPEncoder() |
0cc5f772 | 40 | |
23b5b3d5 | 41 | ############################################################################ |
42 | # MÉTHODES INTERNES | |
43 | ############################################################################ | |
0cc5f772 | 44 | |
23b5b3d5 | 45 | def _load (self, id): |
46 | """Recupérer la structure de métadonnées pour un record selon un `id`.""" | |
47 | r = Record.objects.get(id = id) | |
48 | meta = {} | |
49 | for k in META.keys (): | |
50 | if hasattr (r, k): | |
51 | v = getattr (r, k) | |
52 | if v is not None: | |
53 | meta[k] = self.encoder.decode(k, v) | |
54 | return meta | |
55 | ||
56 | def _save (self, metadata): | |
57 | r = Record () | |
58 | for k in metadata.keys (): | |
59 | setattr (r, k, self.encoder.encode(k, metadata[k])) | |
60 | r.last_checksum = hashlib.md5(str(metadata)).hexdigest() | |
61 | r.last_update = datetime.datetime.today() | |
62 | r.save() | |
63 | return r.id | |
64 | ||
65 | def _modify (self, id, metadata): | |
66 | r = Record.objects.get(id = id) | |
67 | ||
68 | # test si le fichier a été modifié | |
69 | if hashlib.md5(str(metadata)).hexdigest() == r.last_checksum: | |
70 | return False | |
71 | ||
72 | for k in metadata.keys (): | |
73 | setattr (r, k, self.encoder.encode(k, metadata[k])) | |
74 | ||
75 | r.last_update = datetime.datetime.today() | |
76 | r.save() | |
77 | return True | |
78 | ||
79 | def _combine (self, result_lists, op): | |
80 | scores = {} | |
81 | simple_sets = [] | |
82 | ||
83 | for list in result_lists: | |
84 | simple_sets.append (set([x[0] for x in list])) | |
85 | for (id, score) in list: | |
86 | if scores.get (id) is None: | |
87 | scores[id] = 0 | |
88 | scores[id] += score | |
89 | ||
90 | matches = [] | |
91 | for s in simple_sets: | |
92 | if op == "|": | |
93 | matches = set(matches) | s | |
94 | elif op == "&": | |
95 | if len (matches) == 0: | |
96 | matches = s | |
97 | else: | |
98 | matches = set(matches) & s | |
99 | #print "EE", matches | |
100 | ||
101 | return [(x, scores[x]) for x in matches] | |
102 | ||
103 | ||
104 | def _text_search (self, q, fields = None): | |
105 | if fields is None: | |
106 | fields = [x for x in META.keys() if META[x].get("text_search", False)] | |
107 | ||
108 | w = re.compile (r'\W+', re.U) | |
109 | words = w.split (q) | |
110 | ||
111 | matches = [] | |
112 | suffix = "" | |
113 | if len(fields)==1 and fields[0] == "subject": | |
114 | suffix = " IN BOOLEAN MODE" | |
115 | ||
116 | for k in fields: | |
117 | matches.append ("MATCH(`%s`) AGAINST ('%s'%s)" % (k, " ".join(words), suffix)) | |
118 | m = "+".join (matches) | |
119 | ||
120 | q = "SELECT id, (" + m + ") AS score FROM savoirs_record WHERE (" \ | |
121 | + m + ") HAVING score > 0 ORDER BY score DESC" | |
122 | ||
123 | from django.db import connection, transaction | |
124 | cursor = connection.cursor() | |
125 | cursor.execute(q) | |
126 | rc = cursor.fetchall() | |
0cc5f772 CR |
127 | return rc |
128 | ||
23b5b3d5 | 129 | ############################################################################ |
130 | # API | |
131 | ############################################################################ | |
132 | ||
0cc5f772 CR |
133 | def add (self, metadata): |
134 | """Ajouter la ressource définie par `metadata`. Si on trouve une | |
135 | ressource avec le même `identifier`, on le met a jour. | |
136 | ||
137 | Retourne l'id de la ressource créée ou mise à jour. | |
138 | """ | |
23b5b3d5 | 139 | added = updated = False |
0cc5f772 CR |
140 | exists = self.search (q = {URI: metadata[URI]}) |
141 | if len (exists) > 0: | |
8b95ddc9 | 142 | id = exists[0][0] |
23b5b3d5 | 143 | updated = self.update (int(id), metadata) |
0cc5f772 | 144 | else: |
23b5b3d5 | 145 | added = True |
146 | id = self._save (metadata) | |
147 | return {'record_id': id, 'added':added, 'updated':updated} | |
148 | ||
149 | def delete (self, id): | |
150 | """Supprime la ressource identifiée par `id`. | |
151 | """ | |
152 | r = Record.objects.get(id = id) | |
153 | r.delete() | |
0cc5f772 CR |
154 | |
155 | def update (self, id, metadata): | |
156 | """Met a jour la ressource identifiée par `id`, avec les données de | |
157 | `metadata`. Une exception est levée si elle n'existe pas. | |
158 | """ | |
159 | if self.get (int(id)) is not None: | |
23b5b3d5 | 160 | return self._modify (int(id), metadata) |
0cc5f772 CR |
161 | else: |
162 | raise Exception ("Objet inexistant") | |
23b5b3d5 | 163 | return False |
0cc5f772 | 164 | |
23b5b3d5 | 165 | def get (self, id): |
166 | """Recupérer la structure de métadonnées pour la ressource identifiée | |
167 | par `id`. `id` peut être une liste si on veut les structures de | |
168 | plusieurs ressources. | |
0cc5f772 | 169 | """ |
23b5b3d5 | 170 | if isinstance (id, tuple) or isinstance (id, list): |
171 | rc = [] | |
172 | for i in id: | |
173 | try: | |
174 | i = i[0] | |
175 | except: pass | |
176 | rc.append (self._load (int(i))) | |
177 | else: | |
178 | rc = self._load (int(id)) | |
179 | return rc | |
0cc5f772 | 180 | |
23b5b3d5 | 181 | def ids (self): |
182 | """ Retourner la liste complète des ids des ressources.""" | |
183 | return [x.id for x in Record.objects.all()] | |
0cc5f772 | 184 | |
23b5b3d5 | 185 | def search (self, q): |
186 | """Effectue une recherche multi-critères, en fonction du dictionnaire | |
187 | `q`. Retourne une list d'`id`s uniquement. Les données pour chaque | |
188 | résultat doivent être chargées ulterieurement. | |
189 | """ | |
190 | rc = [] | |
191 | sets = [] | |
0cc5f772 | 192 | |
23b5b3d5 | 193 | if len (q) > 0: |
194 | # Recherche "simple" | |
f991eb01 | 195 | ww = q.get ("q", "").strip() |
23b5b3d5 | 196 | if len (ww) > 0: |
197 | s = self._text_search (ww) | |
198 | if len(s) > 0: | |
8f17344b | 199 | rc.extend(s) |
23b5b3d5 | 200 | # Recherche URL |
201 | elif q.get (URI) is not None: | |
202 | s = [] | |
203 | try: | |
204 | s.append((Record.objects.get(uri__iexact = q.get(URI)).id, 1)) | |
205 | rc.append(s) | |
206 | except: pass | |
207 | # Recherche avancée | |
208 | else: | |
f991eb01 | 209 | creator = q.get ("creator", "") |
210 | title = q.get ("title", "") | |
211 | description = q.get ("description", "") | |
212 | subject = q.get ("subject", "") | |
23b5b3d5 | 213 | |
214 | if len (creator) > 0: | |
215 | sets.append (self._text_search (creator, [CREATOR, CONTRIBUTOR])) | |
216 | if len (title) > 0: | |
217 | sets.append (self._text_search (title, [TITLE, ALT_TITLE])) | |
218 | if len (description) > 0: | |
219 | sets.append (self._text_search (description, [DESCRIPTION, ABSTRACT])) | |
220 | if len (subject) > 0: | |
221 | sets.append (self._text_search (subject, [SUBJECT,])) | |
222 | rc = self._combine (sets, q.get ("operator", "|")) | |
223 | rc.sort (key = operator.itemgetter(1), reverse = True) | |
224 | ||
225 | if len(rc) > 0: | |
226 | rc = [x[0] for x in rc] | |
227 | ||
228 | else: | |
229 | rc = self.ids() | |
23b5b3d5 | 230 | return rc |