1 # -*- encoding: utf-8 -*-
2 import urllib
, httplib
, time
, simplejson
, pprint
, math
, re
3 from django
.conf
import settings
4 from auf_savoirs_en_partage_backend
.sep
.io
import SEP
5 from savoirs
import configuration
7 def google_search (page
, q
, data
):
11 'start': page
* configuration
['resultats_par_page'],
13 if not settings
.DEBUG
:
15 params
['cref'] = "http://savoirsenpartage.auf.org/recherche.xml?%s" \
18 url
= "/ajax/services/search/web?" + \
19 urllib
.urlencode (params
)
20 handle
= httplib
.HTTPConnection ('ajax.googleapis.com')
21 handle
.request ("GET", url
)
22 r
= handle
.getresponse ()
23 response
= simplejson
.loads(r
.read ())
24 #print pprint.pformat (params)
25 #print pprint.pformat (response)
28 for i
in response
['responseData']['cursor']['pages']:
29 p
= int (i
['label']) - 1
30 if p
> data
['last_page']:
33 for r
in response
['responseData']['results']:
34 data
['results'].append( {'uri': r
['url'],
35 'content': r
['content'],
36 'title': r
['title']} )
38 data
['more_link'] = response
['responseData']['cursor']['moreResultsUrl']
41 def sep_build_content (regexp
, description
):
44 if len (description
) > maxlen
:
45 loc
= regexp
.search (description
)
47 f
= loc
.start () - (maxlen
/ 2)
52 t
+= loc
.start () + (maxlen
/ 2)
54 while description
[f
] != '.' and f
> 0:
58 if t
< len (description
):
59 while t
< len (description
) and description
[t
] != '.':
62 content
= description
[f
:t
]
64 content
= "(...) " + content
65 if t
< (len (description
) - 1):
66 content
= content
+ " (...)"
67 content
= regexp
.sub (r
'<b>\1</b>', content
)
71 def sep_search (page
, q
, data
):
72 f
= page
* configuration
['resultats_par_page']
75 matches
= s
.search ({'q': q
.encode ('utf-8')})
76 data
['last_page'] = math
.ceil (float(len (matches
)) / \
77 float(configuration
['resultats_par_page'])) - 1
78 set = s
.get (matches
[f
:t
])
80 regexp
= re
.compile (r
'(%s)' % q
, re
.IGNORECASE
)
82 uri
= r
.get ("source", "")
85 title
= regexp
.sub (r
'<b>\1</b>', r
.get ("title", ""))
86 content
= sep_build_content (regexp
, r
.get ("description", ""))
88 data
['results'].append ({'uri': uri
, 'id': r
.get("uri"), 'title': title
, 'content': content
})
91 def cherche (page
, q
):
92 rc
= {'results': [], 'last_page': 0, 'more_link': ''}
94 engin
= configuration
['engin_recherche']
97 google_search (page
, q
, rc
)
100 sep_search (page
, q
, rc
)