Optimisation de la requête des ressources valides
[auf_savoirs_en_partage_django.git] / auf_savoirs_en_partage / savoirs / lib / harvest.py
CommitLineData
0cc5f772
CR
1# -*- encoding: utf-8 -*-
2import sys, os, time, traceback
0cc5f772 3from auf_savoirs_en_partage.backend_config import RESOURCES
3d2e04ed 4from savoirs.models import HarvestLog, Record
0cc5f772
CR
5from sep import SEP
6
68cb6a91 7class HarvestStats:
8
3d2e04ed 9 libelles = {'site':50, 'url':50, 'type':10, 'acces':10, 'ref_importees':20, 'date':30, 'obs':10}
10 separateur = " "
68cb6a91 11
12 def dernier_logs_moisson(self, site):
13 return HarvestLog.objects.filter(name=site, context='moisson').order_by('-date')
14
15 def date(self, site):
16 logs = self.dernier_logs_moisson(site)
17 if len(logs) > 0:
18 return str(logs[0].date)
19 else:
20 return "pas d'import"
21
22 def ref_importees(self, site):
d566e9c1 23 records = Record.all_objects.filter(server=site)
3d2e04ed 24 return str(len(records))
68cb6a91 25
3d2e04ed 26 def stats(self,):
27 stats = []
68cb6a91 28 tableau = []
29 for site, options in RESOURCES.items():
30 options['site'] = site
31 tableau.append(options)
32
33 # libelles
34 libelles_formates = []
35 for l, largeur in self.libelles.items():
36 l = "*%s*" % l
3d2e04ed 37 libelles_formates.append(l.ljust(largeur, self.separateur))
38 stats.append(libelles_formates)
68cb6a91 39
40 # lignes
41 for ligne in tableau:
42 ligne_ordonnee = []
43 for l,largeur in self.libelles.items():
44 method = getattr(self, l, None)
45 if method is not None:
46 value = method(ligne['site'])
47 elif ligne.has_key(l):
48 value = ligne[l]
49 else:
50 value = ""
3d2e04ed 51 value = value.ljust(largeur, self.separateur)
68cb6a91 52 ligne_ordonnee.append(value)
3d2e04ed 53 stats.append(ligne_ordonnee)
54 return stats
55
56 def wiki(self,):
57 for s in self.stats():
58 print "|%s|" % "|".join(s)
68cb6a91 59
0cc5f772
CR
60def import_all ():
61 """Cette méthode effectue l'importation des données pour toutes les
62 sources définies dans `conf.py`, et les ajoute dans le système de stockage
63 en passant par SEP (:doc:`../sep/index`)
64 """
65 sep = SEP ()
66
8b95ddc9
CR
67 resources = RESOURCES
68 if len(sys.argv) == 2:
69 name = sys.argv[1]
68cb6a91 70
71 if name == 'stats':
72 stats = HarvestStats()
73 stats.wiki()
74 sys.exit(1)
75
8b95ddc9
CR
76 if RESOURCES.get(name) is not None:
77 resources = {name: RESOURCES.get(name)}
78 else:
79 print "Ressource %s non existante" % name
80 sys.exit(-1)
81
82 for name in resources.keys ():
d566e9c1 83 print "Import:", name.encode('utf-8')
0cc5f772 84 options = RESOURCES[name]
d972b61d 85 options['server'] = name
86
0cc5f772
CR
87 module = 'harvesters.%s.%s' \
88 % (options['type'], options['acces'])
89 __import__ (module)
90 harvester = sys.modules[module]
91 try:
92 nodes = harvester.harvest (options)
93 except:
e5ba3e3b
EMS
94 print >> sys.stderr, "Exception:"
95 print >> sys.stderr, '-'*60
96 traceback.print_exc(file=sys.stderr)
97 print >> sys.stderr, '-'*60
0cc5f772 98 nodes = []
23b5b3d5 99
100 added = updated = 0
0cc5f772 101 for node in nodes:
77427924 102 node['server'] = name
d566e9c1 103 status = sep.add(node)
a85ba76e 104
23b5b3d5 105 if status['added']:
106 added += 1
107 if status['updated']:
108 updated += 1
109 message = status
a85ba76e 110 message.update({'context':'record', 'name':name, 'processed':1})
23b5b3d5 111 HarvestLog.add(message)
112
a85ba76e 113 message = {'context':'moisson', 'name':name, 'added':added, 'updated':updated, 'processed':len(nodes)}
23b5b3d5 114 HarvestLog.add(message)
0cc5f772
CR
115
116 del (sep)
117
118if __name__ == '__main__':
119 import_all()