Corrections, ajout recherche avancée
[auf_savoirs_en_partage_django.git] / auf_savoirs_en_partage_django / savoirs / recherche.py
1 # -*- encoding: utf-8 -*-
2 import urllib, httplib, time, simplejson, pprint, math, re
3 from django.conf import settings
4 from auf_savoirs_en_partage_backend.sep.io import SEP
5 from savoirs import configuration
6
7 def google_search (page, q, data):
8 params = {'q': q,
9 'rsz': 'large',
10 'v': '1.0',
11 'start': page * configuration['resultats_par_page'],
12 }
13 if not settings.DEBUG:
14 #TODO: corriger ts
15 params['cref'] = "http://savoirsenpartage.auf.org/recherche.xml?%s" \
16 % int(time.time())
17
18 url = "/ajax/services/search/web?" + \
19 urllib.urlencode (params)
20 handle = httplib.HTTPConnection ('ajax.googleapis.com')
21 handle.request ("GET", url)
22 r = handle.getresponse ()
23 response = simplejson.loads(r.read ())
24 #print pprint.pformat (params)
25 #print pprint.pformat (response)
26 handle.close ()
27
28 if len (response['responseData']['results']) > 0:
29 for i in response['responseData']['cursor']['pages']:
30 p = int (i['label']) - 1
31 if p > data['last_page']:
32 data['last_page'] = p
33
34 for r in response['responseData']['results']:
35 data['results'].append( {'uri': r['url'],
36 'content': r['content'],
37 'title': r['title']} )
38
39 data['more_link'] = response['responseData']['cursor']['moreResultsUrl']
40
41
42 def sep_build_content (regexp, description):
43 maxlen = 200
44 content = description
45 if len (description) > maxlen:
46 start = 0
47 loc = regexp.search (description)
48 if loc:
49 start = loc.start ()
50
51 f = start - (maxlen / 2)
52 t = 0
53 if f < 0:
54 t = -f
55 f = 0
56 t += start + (maxlen / 2)
57 if f > 0:
58 while description[f] != '.' and f > 0:
59 f -= 1
60 if f > 0:
61 f += 1
62 if t < len (description):
63 while t < len (description) and description[t] != '.':
64 t += 1
65 t += 1
66 content = description[f:t]
67 if f > 0:
68 content = "(...) " + content
69 if t < (len (description) - 1):
70 content = content + " (...)"
71 content = regexp.sub (r'<b>\1</b>', content)
72 return content
73
74
75 def sep_search (page, q, data):
76 f = page * configuration['resultats_par_page']
77 t = f + 8
78 s = SEP ()
79 matches = s.search (q)
80 data['last_page'] = math.ceil (float(len (matches)) / \
81 float(configuration['resultats_par_page'])) - 1
82 set = s.get (matches[f:t])
83 regexp = re.compile (r'(%s)' % q, re.IGNORECASE)
84 for r in set:
85 uri = r.get ("source", "")
86 if len (uri) == 0:
87 uri = r.get ("uri")
88 title = regexp.sub (r'<b>\1</b>', r.get ("title", ""))
89 content = sep_build_content (regexp, r.get ("description", ""))
90
91 data['results'].append ({'uri': uri, 'id': r.get("uri"), 'title': title, 'content': content})
92
93
94 def cherche (page, q, engin=None):
95 rc = {'results': [], 'last_page': 0, 'more_link': ''}
96
97 if engin is None:
98 engin = configuration['engin_recherche']
99
100 if engin == 'google':
101 google_search (page, q, rc)
102
103 elif engin == 'sep':
104 sep_search (page, {'q': q.encode ('utf-8')}, rc)
105
106 elif engin == 'avancee':
107 sep_search (page, q, rc)
108
109 return rc