Corrections, ajout recherche avancée
[auf_savoirs_en_partage_django.git] / auf_savoirs_en_partage_django / savoirs / recherche.py
CommitLineData
d15017b2
CR
1# -*- encoding: utf-8 -*-
2import urllib, httplib, time, simplejson, pprint, math, re
40a5ebfb 3from django.conf import settings
d15017b2
CR
4from auf_savoirs_en_partage_backend.sep.io import SEP
5from savoirs import configuration
6
7def google_search (page, q, data):
8 params = {'q': q,
9 'rsz': 'large',
10 'v': '1.0',
11 'start': page * configuration['resultats_par_page'],
d15017b2 12 }
40a5ebfb
CR
13 if not settings.DEBUG:
14 #TODO: corriger ts
15 params['cref'] = "http://savoirsenpartage.auf.org/recherche.xml?%s" \
16 % int(time.time())
17
d15017b2
CR
18 url = "/ajax/services/search/web?" + \
19 urllib.urlencode (params)
20 handle = httplib.HTTPConnection ('ajax.googleapis.com')
21 handle.request ("GET", url)
22 r = handle.getresponse ()
23 response = simplejson.loads(r.read ())
5ecd5424
CR
24 #print pprint.pformat (params)
25 #print pprint.pformat (response)
d15017b2
CR
26 handle.close ()
27
a008c7e5
CR
28 if len (response['responseData']['results']) > 0:
29 for i in response['responseData']['cursor']['pages']:
30 p = int (i['label']) - 1
31 if p > data['last_page']:
32 data['last_page'] = p
d15017b2 33
a008c7e5
CR
34 for r in response['responseData']['results']:
35 data['results'].append( {'uri': r['url'],
36 'content': r['content'],
37 'title': r['title']} )
d15017b2 38
a008c7e5 39 data['more_link'] = response['responseData']['cursor']['moreResultsUrl']
d15017b2
CR
40
41
42def sep_build_content (regexp, description):
43 maxlen = 200
4013f53d 44 content = description
d15017b2 45 if len (description) > maxlen:
544b4522 46 start = 0
d15017b2 47 loc = regexp.search (description)
4013f53d 48 if loc:
544b4522
CR
49 start = loc.start ()
50
51 f = start - (maxlen / 2)
52 t = 0
53 if f < 0:
54 t = -f
55 f = 0
56 t += start + (maxlen / 2)
57 if f > 0:
58 while description[f] != '.' and f > 0:
59 f -= 1
d15017b2 60 if f > 0:
544b4522
CR
61 f += 1
62 if t < len (description):
63 while t < len (description) and description[t] != '.':
d15017b2 64 t += 1
544b4522
CR
65 t += 1
66 content = description[f:t]
67 if f > 0:
68 content = "(...) " + content
69 if t < (len (description) - 1):
70 content = content + " (...)"
d15017b2
CR
71 content = regexp.sub (r'<b>\1</b>', content)
72 return content
73
74
75def sep_search (page, q, data):
76 f = page * configuration['resultats_par_page']
77 t = f + 8
78 s = SEP ()
544b4522 79 matches = s.search (q)
d15017b2
CR
80 data['last_page'] = math.ceil (float(len (matches)) / \
81 float(configuration['resultats_par_page'])) - 1
82 set = s.get (matches[f:t])
83 regexp = re.compile (r'(%s)' % q, re.IGNORECASE)
84 for r in set:
85 uri = r.get ("source", "")
86 if len (uri) == 0:
87 uri = r.get ("uri")
4013f53d 88 title = regexp.sub (r'<b>\1</b>', r.get ("title", ""))
40a5ebfb 89 content = sep_build_content (regexp, r.get ("description", ""))
d15017b2 90
5ecd5424 91 data['results'].append ({'uri': uri, 'id': r.get("uri"), 'title': title, 'content': content})
d15017b2 92
d15017b2 93
a008c7e5 94def cherche (page, q, engin=None):
d15017b2 95 rc = {'results': [], 'last_page': 0, 'more_link': ''}
40a5ebfb 96
a008c7e5
CR
97 if engin is None:
98 engin = configuration['engin_recherche']
d15017b2
CR
99
100 if engin == 'google':
101 google_search (page, q, rc)
102
103 elif engin == 'sep':
544b4522
CR
104 sep_search (page, {'q': q.encode ('utf-8')}, rc)
105
106 elif engin == 'avancee':
d15017b2
CR
107 sep_search (page, q, rc)
108
109 return rc