outil pour generer les stats sur le wiki concernant les moissons
[auf_savoirs_en_partage_django.git] / auf_savoirs_en_partage / savoirs / lib / harvest.py
CommitLineData
0cc5f772
CR
1# -*- encoding: utf-8 -*-
2import sys, os, time, traceback
0cc5f772 3from auf_savoirs_en_partage.backend_config import RESOURCES
23b5b3d5 4from savoirs.models import HarvestLog
0cc5f772
CR
5from sep import SEP
6
68cb6a91 7class HarvestStats:
8
9 libelles = {'site':50, 'url':50, 'type':10, 'acces':10, 'ref_importees':20, 'date':20, 'obs':10}
10
11 def dernier_logs_moisson(self, site):
12 return HarvestLog.objects.filter(name=site, context='moisson').order_by('-date')
13
14 def date(self, site):
15 logs = self.dernier_logs_moisson(site)
16 if len(logs) > 0:
17 return str(logs[0].date)
18 else:
19 return "pas d'import"
20
21 def ref_importees(self, site):
22 logs = self.dernier_logs_moisson(site)
23 if len(logs) > 0:
24 return str(logs[0].processed)
25 else:
26 return "pas d'import"
27
28 def wiki(self,):
29 tableau = []
30 for site, options in RESOURCES.items():
31 options['site'] = site
32 tableau.append(options)
33
34 # libelles
35 libelles_formates = []
36 for l, largeur in self.libelles.items():
37 l = "*%s*" % l
38 libelles_formates.append(l.ljust(largeur))
39 print "|%s|" % "|".join(libelles_formates)
40
41 # lignes
42 for ligne in tableau:
43 ligne_ordonnee = []
44 for l,largeur in self.libelles.items():
45 method = getattr(self, l, None)
46 if method is not None:
47 value = method(ligne['site'])
48 elif ligne.has_key(l):
49 value = ligne[l]
50 else:
51 value = ""
52 value = value.ljust(largeur)
53 ligne_ordonnee.append(value)
54 print "|%s|" % "|".join(ligne_ordonnee)
55
0cc5f772
CR
56def import_all ():
57 """Cette méthode effectue l'importation des données pour toutes les
58 sources définies dans `conf.py`, et les ajoute dans le système de stockage
59 en passant par SEP (:doc:`../sep/index`)
60 """
61 sep = SEP ()
62
8b95ddc9
CR
63 resources = RESOURCES
64 if len(sys.argv) == 2:
65 name = sys.argv[1]
68cb6a91 66
67 if name == 'stats':
68 stats = HarvestStats()
69 stats.wiki()
70 sys.exit(1)
71
8b95ddc9
CR
72 if RESOURCES.get(name) is not None:
73 resources = {name: RESOURCES.get(name)}
74 else:
75 print "Ressource %s non existante" % name
76 sys.exit(-1)
77
78 for name in resources.keys ():
0cc5f772
CR
79 print "Import:", name
80 options = RESOURCES[name]
d972b61d 81 options['server'] = name
82
0cc5f772
CR
83 module = 'harvesters.%s.%s' \
84 % (options['type'], options['acces'])
85 __import__ (module)
86 harvester = sys.modules[module]
87 try:
88 nodes = harvester.harvest (options)
89 except:
90 print "Exception:"
91 print '-'*60
92 traceback.print_exc(file=sys.stdout)
93 print '-'*60
94 nodes = []
23b5b3d5 95
96 added = updated = 0
0cc5f772 97 for node in nodes:
77427924 98 node['server'] = name
a85ba76e 99
100 try:
101 status = sep.add (node)
102 except:
103 message.update({'context':'error', 'name':name, 'processed':0})
104 HarvestLog.add(message)
105 continue
106
23b5b3d5 107 if status['added']:
108 added += 1
109 if status['updated']:
110 updated += 1
111 message = status
a85ba76e 112 message.update({'context':'record', 'name':name, 'processed':1})
23b5b3d5 113 HarvestLog.add(message)
114
a85ba76e 115 message = {'context':'moisson', 'name':name, 'added':added, 'updated':updated, 'processed':len(nodes)}
23b5b3d5 116 HarvestLog.add(message)
0cc5f772
CR
117
118 del (sep)
119
120if __name__ == '__main__':
121 import_all()