Ajout formulaire, liens pour login
[auf_savoirs_en_partage_django.git] / auf_savoirs_en_partage_django / savoirs / recherche.py
1 # -*- encoding: utf-8 -*-
2 import urllib, httplib, time, simplejson, pprint, math, re
3 from django.conf import settings
4 from auf_savoirs_en_partage_backend.sep.io import SEP
5 from savoirs import configuration
6
7 def google_search (page, q, data):
8 params = {'q': q,
9 'rsz': 'large',
10 'v': '1.0',
11 'start': page * configuration['resultats_par_page'],
12 }
13 if not settings.DEBUG:
14 #TODO: corriger ts
15 params['cref'] = "http://savoirsenpartage.auf.org/recherche.xml?%s" \
16 % int(time.time())
17
18 url = "/ajax/services/search/web?" + \
19 urllib.urlencode (params)
20 handle = httplib.HTTPConnection ('ajax.googleapis.com')
21 handle.request ("GET", url)
22 r = handle.getresponse ()
23 response = simplejson.loads(r.read ())
24 #print pprint.pformat (params)
25 #print pprint.pformat (response)
26 handle.close ()
27
28 for i in response['responseData']['cursor']['pages']:
29 p = int (i['label']) - 1
30 if p > data['last_page']:
31 data['last_page'] = p
32
33 for r in response['responseData']['results']:
34 data['results'].append( {'uri': r['url'],
35 'content': r['content'],
36 'title': r['title']} )
37
38 data['more_link'] = response['responseData']['cursor']['moreResultsUrl']
39
40
41 def sep_build_content (regexp, description):
42 maxlen = 200
43 content = description
44 if len (description) > maxlen:
45 loc = regexp.search (description)
46 if loc:
47 f = loc.start () - (maxlen / 2)
48 t = 0
49 if f < 0:
50 t = -f
51 f = 0
52 t += loc.start () + (maxlen / 2)
53 if f > 0:
54 while description[f] != '.' and f > 0:
55 f -= 1
56 if f > 0:
57 f += 1
58 if t < len (description):
59 while t < len (description) and description[t] != '.':
60 t += 1
61 t += 1
62 content = description[f:t]
63 if f > 0:
64 content = "(...) " + content
65 if t < (len (description) - 1):
66 content = content + " (...)"
67 content = regexp.sub (r'<b>\1</b>', content)
68 return content
69
70
71 def sep_search (page, q, data):
72 f = page * configuration['resultats_par_page']
73 t = f + 8
74 s = SEP ()
75 matches = s.search ({'q': q.encode ('utf-8')})
76 data['last_page'] = math.ceil (float(len (matches)) / \
77 float(configuration['resultats_par_page'])) - 1
78 set = s.get (matches[f:t])
79 print set
80 regexp = re.compile (r'(%s)' % q, re.IGNORECASE)
81 for r in set:
82 uri = r.get ("source", "")
83 if len (uri) == 0:
84 uri = r.get ("uri")
85 title = regexp.sub (r'<b>\1</b>', r.get ("title", ""))
86 content = sep_build_content (regexp, r.get ("description", ""))
87
88 data['results'].append ({'uri': uri, 'id': r.get("uri"), 'title': title, 'content': content})
89
90
91 def cherche (page, q):
92 rc = {'results': [], 'last_page': 0, 'more_link': ''}
93
94 engin = configuration['engin_recherche']
95
96 if engin == 'google':
97 google_search (page, q, rc)
98
99 elif engin == 'sep':
100 sep_search (page, q, rc)
101
102 return rc