1 # -*- encoding: utf-8 -*-
2 import urllib
, httplib
, time
, simplejson
, pprint
, math
, re
3 from django
.conf
import settings
4 from auf_savoirs_en_partage_backend
.sep
.io
import SEP
5 from auf_savoirs_en_partage_backend
.sep
.utils
import smart_str
6 from savoirs
import configuration
9 def google_search (page
, q
, data
):
13 'start': page
* configuration
['resultats_par_page'],
15 if not settings
.DEBUG
:
17 params
['cref'] = "http://savoirsenpartage.auf.org/recherche.xml?%s" \
20 url
= "/ajax/services/search/web?" + \
21 urllib
.urlencode (params
)
22 handle
= httplib
.HTTPConnection ('ajax.googleapis.com')
23 handle
.request ("GET", url
)
24 r
= handle
.getresponse ()
25 response
= simplejson
.loads(r
.read ())
26 #print pprint.pformat (params)
27 #print pprint.pformat (response)
30 if len (response
['responseData']['results']) > 0:
31 for i
in response
['responseData']['cursor']['pages']:
32 p
= int (i
['label']) - 1
33 if p
> data
['last_page']:
36 for r
in response
['responseData']['results']:
37 data
['results'].append( {'uri': r
['url'],
38 'content': r
['content'],
39 'title': r
['title']} )
41 data
['more_link'] = response
['responseData']['cursor']['moreResultsUrl']
44 def sep_build_content (regexp
, description
):
47 if len (description
) > maxlen
:
49 loc
= regexp
.search (description
)
53 f
= start
- (maxlen
/ 2)
58 t
+= start
+ (maxlen
/ 2)
60 while description
[f
] != '.' and f
> 0:
64 if t
< len (description
):
65 while t
< len (description
) and description
[t
] != '.':
68 content
= description
[f
:t
]
70 content
= "(...) " + content
71 if t
< (len (description
) - 1):
72 content
= content
+ " (...)"
73 content
= regexp
.sub (r
'<b>\1</b>', content
)
79 w
= re
.compile (r
'\W+', re
.U
)
82 words
.extend(w
.split (smart_str(q
[k
]).decode("utf-8")))
83 words
= filter (lambda x
: len(x
)>2, words
)
84 words
.sort (lambda x
,y
: len(y
)-len(x
))
86 patt
= "|".join (words
)
87 patt
= "([\W]{1})(" + patt
+ ")([\W]{1})"
88 return re
.compile (patt
, re
.I|re
.U
)
91 if string
is not None:
92 return r
.sub (r
'\1<b>\2</b>\3', string
)
96 def sep_search (page
, q
, data
):
97 f
= page
* configuration
['resultats_par_page']
100 matches
= s
.search (q
)
101 data
['last_page'] = math
.ceil (float(len (matches
)) / \
102 float(configuration
['resultats_par_page'])) - 1
103 set = s
.get (matches
[f
:t
])
104 regexp
= make_regexp (q
)
107 uri
= r
.get ("source", "")
111 title
= regexp
.sub (r
'<b>\1</b>', r
.get ("title", ""))
113 content
= sep_build_content (regexp
, r
.get ("description", ""))
115 contributeurs
= r
.get('contributor')
116 if contributeurs
is not None:
117 contributeurs
= "; ".join (contributeurs
)
119 subject
= r
.get ('subject')
120 if subject
is not None:
121 subject
= ", ".join (subject
)
123 data
['results'].append ({'uri': uri
,
124 'id': r
.get("uri"), \
125 'title': hl(regexp
, title
),
126 'content': hl(regexp
, content
), \
127 'creator': hl(regexp
, r
.get('creator')),
128 'contributors': hl(regexp
, contributeurs
),
129 'subject': hl(regexp
, subject
),
130 'modified': r
.get('modified'),
131 'isbn': r
.get('isbn'),
135 def cherche (page
, q
, engin
=None):
136 rc
= {'results': [], 'last_page': 0, 'more_link': ''}
139 engin
= configuration
['engin_recherche']
141 if engin
== 'google':
142 google_search (page
, q
, rc
)
145 sep_search (page
, {'q': q
.encode ('utf-8')}, rc
)
147 elif engin
== 'avancee':
148 sep_search (page
, q
, rc
)