# -*- encoding: utf-8 -*-
+import re
from django.contrib import admin
-from models import SourceActualite, Actualite, Discipline, Evenement, Record
+from django.utils.safestring import mark_safe
+from models import SourceActualite, Actualite, Discipline, Evenement, Record, HarvestLog
+from savoirs.globals import META
from savoirs.lib.backend import Backend
admin.site.register(Actualite)
admin.site.register(Discipline)
admin.site.register(Evenement)
+# Ces deux classes permettent d'implémenter la possibilité d'avoir un champs readonly_fields
+# dans l.administration.
+# Ce champs est devenu natif à partir de la version 1.2
+# http://docs.djangoproject.com/en/dev/ref/contrib/admin/#django.contrib.admin.ModelAdmin.readonly_fields
+from django import forms
+class ReadOnlyWidget(forms.Widget):
+ def __init__(self, original_value, display_value):
+ self.original_value = original_value
+ self.display_value = display_value
+
+ super(ReadOnlyWidget, self).__init__()
+
+ def render(self, name, value, attrs=None):
+ if self.display_value is not None:
+ output = self.display_value
+ else:
+ output = unicode(self.original_value)
+
+ is_url = re.match('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', output)
+ if is_url:
+ output = "<a target='_blank' href='%s'>%s</a>" % (output, output)
+ return mark_safe(output)
+
+ def value_from_datadict(self, data, files, name):
+ return self.original_value
+
+class ReadOnlyAdminFields(object):
+ def get_form(self, request, obj=None):
+ form = super(ReadOnlyAdminFields, self).get_form(request, obj)
+
+ if hasattr(self, 'readonly_fields'):
+ for field_name in self.readonly_fields:
+ if field_name in form.base_fields:
+
+ if hasattr(obj, 'get_%s_display' % field_name):
+ display_value = getattr(obj, 'get_%s_display' % field_name)()
+ else:
+ display_value = None
+
+ form.base_fields[field_name].widget = ReadOnlyWidget(getattr(obj, field_name, ''), display_value)
+ form.base_fields[field_name].required = False
+ return form
+
+
+class RecordAdmin(ReadOnlyAdminFields, admin.ModelAdmin):
+ fields = [
+ 'server',
+ 'title',
+ 'creator',
+ 'description',
+ 'modified',
+ 'identifier',
+ 'uri',
+ 'source',
+ 'contributor',
+ 'publisher',
+ 'type',
+ 'format',
+ 'language',
+ 'disciplines',
+ 'thematiques',
+ ]
+
+ search_fields = []
+ readonly_fields = []
-class RecordAdmin(admin.ModelAdmin):
list_filter = ('server',)
list_display = (
#OAI et extra AUF
#'source',
'modified',
'creator',
- 'contributor',
- 'language',
- 'publisher',
+ #'contributor',
+ #'language',
+ #'publisher',
'format',
'type',
#'orig_lang',
)
+ def __init__(self, *args, **kwargs):
+ """Surcharge l'initialisation pour définir les champs de recherche dynamiquement,
+ et les champs en lecture seule uniquement."""
+ self.search_fields = META.keys()
+ self.readonly_fields = META.keys()
+ super(RecordAdmin, self).__init__(*args, **kwargs)
+
def _uri(self, obj):
""" """
return "<a target='_blank' href='%s'>%s</a>" % (obj.uri, obj.uri)
def _description(self, obj):
""" """
- return "%s..." % obj.description[:140]
+ max = 140
+ if obj.description is not None and len(obj.description) > max:
+ return "%s..." % obj.description[:max]
+ else:
+ return obj.description
admin.site.register(Record, RecordAdmin)
+class HarvestLogAdmin(ReadOnlyAdminFields, admin.ModelAdmin):
+ fields = ['context', 'name', 'added', 'updated', 'record']
+ list_display = fields + ['date']
+ admin_order_fields = ['date']
+ search_fields = fields
+ readonly_fields = fields
+ list_filter = ('context',)
+
+admin.site.register(HarvestLog, HarvestLogAdmin)
#####
# Meta fields
SERVER = 'server'
+LAST_CHECKSUM = 'last_checksum'
+LAST_UPDATE = 'last_update'
TITLE = 'title'
ALT_TITLE = 'alt_title'
CREATOR = 'creator'
+++ /dev/null
-# -*- encoding: utf-8 -*-
-import simplejson, re, datetime, operator
-from savoirs.globals import *
-from savoirs.models import Record, HarvestLog
-
-
-class Backend:
- def close (self):
- pass
-
- def add (self, metadata):
- r = Record ()
- for k in metadata.keys ():
- setattr (r, k, simplejson.dumps(metadata[k]))
- r.save()
-
- def delete (self, id):
- r = Record.objects.get(id = id)
- r.delete()
-
- def update (self, id, metadata):
- r = Record.objects.get(id = id)
- for k in metadata.keys ():
- setattr (r, k, simplejson.dumps(metadata[k]))
- r.save()
-
- def get (self, id):
- r = Record.objects.get(id = id)
- meta = {}
- for k in META.keys ():
- if hasattr (r, k):
- v = getattr (r, k)
- if v is not None:
- meta[k] = simplejson.loads(v)
- return meta
-
- def ids (self):
- return [x.id for x in Record.objects.all()]
-
- def _text_search (self, q, fields = None):
- if fields is None:
- fields = [x for x in META.keys() if META[x].get("text_search", False)]
-
- w = re.compile (r'\W+', re.U)
- words = w.split (q)
-
- matches = []
- suffix = ""
- if len(fields)==1 and fields[0] == "subject":
- suffix = " IN BOOLEAN MODE"
-
- for k in fields:
- matches.append ("MATCH(`%s`) AGAINST ('%s'%s)" % (k, " ".join(words), suffix))
- m = "+".join (matches)
-
- q = "SELECT id, (" + m + ") AS score FROM savoirs_record WHERE (" \
- + m + ") HAVING score > 0 ORDER BY score DESC"
-
- from django.db import connection, transaction
- cursor = connection.cursor()
- cursor.execute(q)
- rc = cursor.fetchall()
- return rc
-
- def filter_string_contains (self, set, q, key):
- rc = []
- words = q.get (key)
- if words:
- r = re.compile (r'%s' % words, re.IGNORECASE)
- for k in set:
- str = self.get(k).get(key, "").encode("utf-8")
- if r.search (str) is not None:
- rc.append (k)
- else:
- rc = set
- return rc
-
- def filter_string_equals (self, q, key):
- rc = []
- keys = self.ids ()
- for k in keys:
- str = self.get(k).get(key, "")
- if str.lower() == q[key].lower():
- rc.append ((k, 1))
- return rc
-
- def _score (self, matches):
- rc = 0
- for i in matches:
- for j in i:
- if len (j.strip()) > 0:
- rc += 1
- return rc
-
- def _combine (self, result_lists, op):
- scores = {}
- simple_sets = []
-
- for list in result_lists:
- simple_sets.append (set([x[0] for x in list]))
- for (id, score) in list:
- if scores.get (id) is None:
- scores[id] = 0
- scores[id] += score
-
- matches = []
- for s in simple_sets:
- if op == "|":
- matches = set(matches) | s
- elif op == "&":
- if len (matches) == 0:
- matches = s
- else:
- matches = set(matches) & s
- #print "EE", matches
-
- return [(x, scores[x]) for x in matches]
-
- def search (self, q):
- rc = []
- sets = []
-
- if len (q) > 0:
- # Recherche "simple"
- ww = simplejson.dumps(q.get ("q", "").strip())[1:-1]
- if len (ww) > 0:
- s = self._text_search (ww)
- if len(s) > 0:
- rc.append (s)
- # Recherche URL
- elif q.get (URI) is not None:
- s = []
- try:
- s.append((Record.objects.get(uri__iexact = \
- "\"" + q.get(URI) + "\"").id, 1))
- rc.append(s)
- except: pass
- # Recherche avancée
- else:
- creator = simplejson.dumps(q.get ("creator", ""))[1:-1]
- title = simplejson.dumps(q.get ("title", ""))[1:-1]
- description = simplejson.dumps(q.get ("description", ""))[1:-1]
- subject = simplejson.dumps(q.get ("subject", ""))[1:-1]
-
- if len (creator) > 0:
- sets.append (self._text_search (creator, [CREATOR, CONTRIBUTOR]))
- if len (title) > 0:
- sets.append (self._text_search (title, [TITLE, ALT_TITLE]))
- if len (description) > 0:
- sets.append (self._text_search (description, [DESCRIPTION, ABSTRACT]))
- if len (subject) > 0:
- sets.append (self._text_search (subject, [SUBJECT,]))
- rc = self._combine (sets, q.get ("operator", "|"))
- rc.sort (key = operator.itemgetter(1), reverse = True)
-
- if len(rc) > 0:
- rc = [x[0] for x in rc]
-
- else:
- rc = self.ids()
-
- return rc
-
- def add_log (self, name, count):
- try:
- t = HarvestLog.objects.get(name = name)
- except:
- t = HarvestLog(name = name)
-
- t.count = count
- t.date = datetime.datetime.today()
- t.save()
-
- def logs (self):
- rc = {}
- tmp = HarvestLog.objects.all()
- for r in tmp:
- rc[r.name] = (r.date, r.count)
- return rc
-
-
-
# -*- encoding: utf-8 -*-
import sys, os, time, traceback
from auf_savoirs_en_partage.backend_config import RESOURCES
+from savoirs.models import HarvestLog
from sep import SEP
def import_all ():
traceback.print_exc(file=sys.stdout)
print '-'*60
nodes = []
- print "Ajout de", len(nodes), "references"
- print "S:", time.time ()
+
+ added = updated = 0
for node in nodes:
node['server'] = name
- sep.add (node)
- sep.add_log (name, len(nodes))
- print "F:", time.time ()
+ status = sep.add (node)
+
+ if status['added']:
+ added += 1
+ if status['updated']:
+ updated += 1
+ message = status
+ message.update({'context':'record', 'name':name})
+ HarvestLog.add(message)
+
+ message = {'context':'moisson', 'name':name, 'added':added, 'updated':updated}
+ HarvestLog.add(message)
del (sep)
# -*- encoding: utf-8 -*-
-from exceptions import Exception
-import sys, time
-
-from auf_savoirs_en_partage.backend_config import RESOURCES
+import simplejson, re, datetime, operator, hashlib
from savoirs.globals import *
-from backend import Backend
+from savoirs.models import Record
+class SEPEncoder:
+ """
+ Classe permettant de d'encoder et de décoder les données moissonnées.
+ """
+ separator = ", "
+
+ def encode(self, field, data):
+ if field in META.keys() and META[field]['type'] == 'array':
+ return self.separator.join(data)
+ else:
+ return data
+
+ def decode(self, field, data):
+ if field in META.keys() and META[field]['type'] == 'array':
+ return data.split(self.separator)
+ else:
+ return data
+
+ #def migrate(self,):
+ # for r in Record.objects.all():
+ # for f in META.keys():
+ # json = getattr(r, f)
+ # if json is not None:
+ # normal = simplejson.loads(json)
+ # new = self.encode(f, normal)
+ # setattr(r, f, new)
+ # r.save()
class SEP:
"""
+ Classe utilisée pour réaliser manipuler les données moisonnées.
"""
- backend = None
- def __init__ (self):
- self.backend = Backend ()
-
- def __del__ (self):
- self.backend.close ()
+ encoder = SEPEncoder()
-#############
-# API public
- def search (self, q = {}):
- """Effectue une recherche multi-critères, en fonction du dictionnaire
- `q`. Retourne une list d'`id`s uniquement. Les données pour chaque
- résultat doivent être chargées ulterieurement.
- """
- return self.backend.search (q)
+ ############################################################################
+ # MÉTHODES INTERNES
+ ############################################################################
- def get (self, id):
- """Recupérer la structure de métadonnées pour la ressource identifiée
- par `id`. `id` peut être une liste si on veut les structures de
- plusieurs ressources.
- """
- if isinstance (id, tuple) or isinstance (id, list):
- rc = []
- for i in id:
- try:
- i = i[0]
- except: pass
- rc.append (self.backend.get (int(i)))
- else:
- rc = self.backend.get (int(id))
+ def _load (self, id):
+ """Recupérer la structure de métadonnées pour un record selon un `id`."""
+ r = Record.objects.get(id = id)
+ meta = {}
+ for k in META.keys ():
+ if hasattr (r, k):
+ v = getattr (r, k)
+ if v is not None:
+ meta[k] = self.encoder.decode(k, v)
+ return meta
+
+ def _save (self, metadata):
+ r = Record ()
+ for k in metadata.keys ():
+ setattr (r, k, self.encoder.encode(k, metadata[k]))
+ r.last_checksum = hashlib.md5(str(metadata)).hexdigest()
+ r.last_update = datetime.datetime.today()
+ r.save()
+ return r.id
+
+ def _modify (self, id, metadata):
+ r = Record.objects.get(id = id)
+
+ # test si le fichier a été modifié
+ if hashlib.md5(str(metadata)).hexdigest() == r.last_checksum:
+ return False
+
+ for k in metadata.keys ():
+ setattr (r, k, self.encoder.encode(k, metadata[k]))
+
+ r.last_update = datetime.datetime.today()
+ r.save()
+ return True
+
+ def _combine (self, result_lists, op):
+ scores = {}
+ simple_sets = []
+
+ for list in result_lists:
+ simple_sets.append (set([x[0] for x in list]))
+ for (id, score) in list:
+ if scores.get (id) is None:
+ scores[id] = 0
+ scores[id] += score
+
+ matches = []
+ for s in simple_sets:
+ if op == "|":
+ matches = set(matches) | s
+ elif op == "&":
+ if len (matches) == 0:
+ matches = s
+ else:
+ matches = set(matches) & s
+ #print "EE", matches
+
+ return [(x, scores[x]) for x in matches]
+
+
+ def _text_search (self, q, fields = None):
+ if fields is None:
+ fields = [x for x in META.keys() if META[x].get("text_search", False)]
+
+ w = re.compile (r'\W+', re.U)
+ words = w.split (q)
+
+ matches = []
+ suffix = ""
+ if len(fields)==1 and fields[0] == "subject":
+ suffix = " IN BOOLEAN MODE"
+
+ for k in fields:
+ matches.append ("MATCH(`%s`) AGAINST ('%s'%s)" % (k, " ".join(words), suffix))
+ m = "+".join (matches)
+
+ q = "SELECT id, (" + m + ") AS score FROM savoirs_record WHERE (" \
+ + m + ") HAVING score > 0 ORDER BY score DESC"
+
+ from django.db import connection, transaction
+ cursor = connection.cursor()
+ cursor.execute(q)
+ rc = cursor.fetchall()
return rc
+ ############################################################################
+ # API
+ ############################################################################
+
def add (self, metadata):
"""Ajouter la ressource définie par `metadata`. Si on trouve une
ressource avec le même `identifier`, on le met a jour.
Retourne l'id de la ressource créée ou mise à jour.
"""
+ added = updated = False
exists = self.search (q = {URI: metadata[URI]})
if len (exists) > 0:
id = exists[0][0]
- return self.update (int(id), metadata)
+ updated = self.update (int(id), metadata)
else:
- return self.backend.add (metadata)
+ added = True
+ id = self._save (metadata)
+ return {'record_id': id, 'added':added, 'updated':updated}
+
+ def delete (self, id):
+ """Supprime la ressource identifiée par `id`.
+ """
+ r = Record.objects.get(id = id)
+ r.delete()
def update (self, id, metadata):
"""Met a jour la ressource identifiée par `id`, avec les données de
`metadata`. Une exception est levée si elle n'existe pas.
"""
if self.get (int(id)) is not None:
- self.backend.update (int(id), metadata)
+ return self._modify (int(id), metadata)
else:
raise Exception ("Objet inexistant")
+ return False
- def delete (self, id):
- """Supprime la ressource identifiée par `id`.
+ def get (self, id):
+ """Recupérer la structure de métadonnées pour la ressource identifiée
+ par `id`. `id` peut être une liste si on veut les structures de
+ plusieurs ressources.
"""
- self.backend.delete (int(id))
+ if isinstance (id, tuple) or isinstance (id, list):
+ rc = []
+ for i in id:
+ try:
+ i = i[0]
+ except: pass
+ rc.append (self._load (int(i)))
+ else:
+ rc = self._load (int(id))
+ return rc
- def add_log (self, name, count):
- if hasattr (self.backend, 'add_log'):
- self.backend.add_log (name, count)
+ def ids (self):
+ """ Retourner la liste complète des ids des ressources."""
+ return [x.id for x in Record.objects.all()]
- def logs (self):
- rc = {}
- if hasattr (self.backend, 'logs'):
- rc = self.backend.logs()
- return rc
+ def search (self, q):
+ """Effectue une recherche multi-critères, en fonction du dictionnaire
+ `q`. Retourne une list d'`id`s uniquement. Les données pour chaque
+ résultat doivent être chargées ulterieurement.
+ """
+ rc = []
+ sets = []
+ if len (q) > 0:
+ # Recherche "simple"
+ ww = q.get ("q", "").strip()[1:-1]
+ if len (ww) > 0:
+ s = self._text_search (ww)
+ if len(s) > 0:
+ rc.append (s)
+ # Recherche URL
+ elif q.get (URI) is not None:
+ s = []
+ try:
+ s.append((Record.objects.get(uri__iexact = q.get(URI)).id, 1))
+ rc.append(s)
+ except: pass
+ # Recherche avancée
+ else:
+ creator = q.get ("creator", "")[1:-1]
+ title = q.get ("title", "")[1:-1]
+ description = q.get ("description", "")[1:-1]
+ subject = q.get ("subject", "")[1:-1]
+
+ if len (creator) > 0:
+ sets.append (self._text_search (creator, [CREATOR, CONTRIBUTOR]))
+ if len (title) > 0:
+ sets.append (self._text_search (title, [TITLE, ALT_TITLE]))
+ if len (description) > 0:
+ sets.append (self._text_search (description, [DESCRIPTION, ABSTRACT]))
+ if len (subject) > 0:
+ sets.append (self._text_search (subject, [SUBJECT,]))
+ rc = self._combine (sets, q.get ("operator", "|"))
+ rc.sort (key = operator.itemgetter(1), reverse = True)
+
+ if len(rc) > 0:
+ rc = [x[0] for x in rc]
+
+ else:
+ rc = self.ids()
+
+ return rc
objects = ActiveManager()
class Record(models.Model):
-
- #OAI et extra AUF
+
+ #fonctionnement interne
id = models.AutoField(primary_key = True)
- server = models.CharField(max_length = 255, editable=False)
- title = models.TextField(null = True, blank = True, editable=False)
- creator = models.TextField(null = True, blank = True, editable=False)
- description = models.TextField(null = True, blank = True, editable=False)
- modified = models.CharField(max_length = 255, null = True, blank = True, editable=False)
- identifier = models.CharField(max_length = 255, null = True, blank = True, unique = True, editable=False)
- uri = models.CharField(max_length = 255, null = True, blank = True, unique = True, editable=False)
- source = models.TextField(null = True, blank = True, editable=False)
- contributor = models.TextField(null = True, blank = True, editable=False)
- subject = models.TextField(null = True, blank = True, editable=False)
- publisher = models.TextField(null = True, blank = True, editable=False)
- type = models.TextField(null = True, blank = True, editable=False)
- format = models.TextField(null = True, blank = True, editable=False)
- language = models.TextField(null = True, blank = True, editable=False)
+ server = models.CharField(max_length = 255)
+ last_update = models.CharField(max_length = 255)
+ last_checksum = models.CharField(max_length = 255)
+
+ #OAI
+ title = models.TextField(null = True, blank = True)
+ creator = models.TextField(null = True, blank = True)
+ description = models.TextField(null = True, blank = True)
+ modified = models.CharField(max_length = 255, null = True, blank = True)
+ identifier = models.CharField(max_length = 255, null = True, blank = True, unique = True)
+ uri = models.CharField(max_length = 255, null = True, blank = True, unique = True)
+ source = models.TextField(null = True, blank = True)
+ contributor = models.TextField(null = True, blank = True)
+ subject = models.TextField(null = True, blank = True)
+ publisher = models.TextField(null = True, blank = True)
+ type = models.TextField(null = True, blank = True)
+ format = models.TextField(null = True, blank = True)
+ language = models.TextField(null = True, blank = True)
#SEP 2 (aucune données récoltées)
- alt_title = models.TextField(null = True, blank = True, editable=False)
- abstract = models.TextField(null = True, blank = True, editable=False)
- creation = models.CharField(max_length = 255, null = True, blank = True, editable=False)
- issued = models.CharField(max_length = 255, null = True, blank = True, editable=False)
- isbn = models.TextField(null = True, blank = True, editable=False)
- orig_lang = models.TextField(null = True, blank = True, editable=False)
+ alt_title = models.TextField(null = True, blank = True)
+ abstract = models.TextField(null = True, blank = True)
+ creation = models.CharField(max_length = 255, null = True, blank = True)
+ issued = models.CharField(max_length = 255, null = True, blank = True)
+ isbn = models.TextField(null = True, blank = True)
+ orig_lang = models.TextField(null = True, blank = True)
# Metadata AUF multivaluées
disciplines = models.ManyToManyField(Discipline)
thematiques = models.ManyToManyField(Thematique)
def __unicode__(self):
- return "%s" % self.title
+ return "R[%s] %s" % (self.id, self.title)
# Ces fonctions sont utilisées pour travailler directement sur les données JSON enregistrées tel quel
# sur la base de données. Lorsque le modèle est initialisé, les fields sont décodés, et lorsque l'objet
# est sauvegardé, on s'assure de remettre les données encodées en JSON.
# TODO : a terme, les données ne seront plus stockées au format JSON dans la BD et ces fonctions seront
# donc obsolètes.
-
- def save(self, *args, **kwargs):
-
- for field_name in [f for f in self._meta.get_all_field_names() if f in META.keys()]:
- v = getattr (self, field_name, None)
- setattr(self, field_name, simplejson.dumps(v))
-
- super(Record, self).save(*args, **kwargs)
-
-def decode_json(instance, **kwargs):
- for field_name in [f for f in instance._meta.get_all_field_names() if f in META.keys()]:
- json = getattr(instance, field_name)
- data = "-"
- v = getattr (instance, field_name, None)
- if v is not None:
- data = simplejson.loads(v)
- if not isinstance(data, basestring):
- decoded_value = u",".join(data)
- else:
- decoded_value = data
- setattr(instance, field_name, decoded_value)
-
-models.signals.post_init.connect(decode_json, Record)
+#
+# def save(self, *args, **kwargs):
+#
+# for field_name in [f for f in self._meta.get_all_field_names() if f in META.keys()]:
+# v = getattr (self, field_name, None)
+# setattr(self, field_name, simplejson.dumps(v))
+#
+# super(Record, self).save(*args, **kwargs)
+#
+#def decode_json(instance, **kwargs):
+# for field_name in [f for f in instance._meta.get_all_field_names() if f in META.keys()]:
+# json = getattr(instance, field_name)
+# data = "-"
+# v = getattr (instance, field_name, None)
+# if v is not None:
+# data = simplejson.loads(v)
+# if not isinstance(data, basestring):
+# decoded_value = u",".join(data)
+# else:
+# decoded_value = data
+# setattr(instance, field_name, decoded_value)
+#
+#models.signals.post_init.connect(decode_json, Record)
class HarvestLog(models.Model):
- name = models.CharField(max_length = 255, primary_key = True)
+ context = models.CharField(max_length = 255)
+ name = models.CharField(max_length = 255)
date = models.DateTimeField(auto_now = True)
- count = models.IntegerField(null = True, blank = True)
+ added = models.IntegerField(null = True, blank = True)
+ updated = models.IntegerField(null = True, blank = True)
+ record = models.ForeignKey(Record, null = True, blank = True)
+
+ @staticmethod
+ def add(message):
+ logger = HarvestLog()
+ if message.has_key('record_id'):
+ message['record'] = Record.objects.get(id=message['record_id'])
+ del(message['record_id'])
+
+ for k,v in message.items():
+ setattr(logger, k, v)
+ logger.save()