c7ef9dc0aa980a1fed53c5bba0d3712350a19577
1 # -*- encoding: utf-8 -*-
2 import urllib
, httplib
, time
, simplejson
, pprint
, math
, re
3 from django
.core
.urlresolvers
import reverse
4 from django
.conf
import settings
5 from auf_savoirs_en_partage
.backend_config
import RESOURCES
7 from utils
import smart_str
8 from savoirs
.globals import configuration
10 def google_search (page
, q
):
11 data
= {'results': [], 'last_page': 0, 'more_link': ''}
15 'start': page
* configuration
['resultats_par_page'],
18 params
['cref'] = configuration
['google_xml'] % time
.time()
19 url
= "/ajax/services/search/web?" + urllib
.urlencode (params
)
21 handle
= httplib
.HTTPConnection ('ajax.googleapis.com')
22 handle
.request ("GET", url
)
23 r
= handle
.getresponse ()
24 response
= simplejson
.loads(r
.read ())
25 #print pprint.pformat (params)
26 #print pprint.pformat (response)
29 if len (response
['responseData']['results']) > 0:
30 for i
in response
['responseData']['cursor']['pages']:
31 p
= int (i
['label']) - 1
32 if p
> data
['last_page']:
35 for r
in response
['responseData']['results']:
36 data
['results'].append( {'uri': r
['url'],
37 'content': r
['content'],
38 'title': r
['title']} )
40 data
['more_link'] = response
['responseData']['cursor']['moreResultsUrl']
43 def sep_build_content (regexp
, description
):
46 if len (description
) > maxlen
:
48 loc
= regexp
.search (description
)
52 f
= start
- (maxlen
/ 2)
57 t
+= start
+ (maxlen
/ 2)
59 while description
[f
] != '.' and f
> 0:
63 if t
< len (description
):
64 while t
< len (description
) and description
[t
] != '.':
67 content
= description
[f
:t
]
69 content
= "(...) " + content
70 if t
< (len (description
) - 1):
71 content
= content
+ " (...)"
72 content
= regexp
.sub (r
'\1<b>\2</b>\3', content
)
78 w
= re
.compile (r
'\W+', re
.U
)
81 words
.extend(w
.split (smart_str(q
[k
]).decode("utf-8")))
82 words
= filter (lambda x
: len(x
)>2, words
)
83 words
.sort (lambda x
,y
: len(y
)-len(x
))
85 patt
= "|".join (words
)
86 patt
= "([\W|-]{1})(" + patt
+ ")([\W|-]{1})"
87 return re
.compile (patt
, re
.I|re
.U
)
90 if string
is not None:
91 return r
.sub (r
'\1<b>\2</b>\3', string
)
95 def sep_search (page
, q
, data
):
96 f
= page
* configuration
['resultats_par_page']
100 matches
= s
.search (q
)
101 data
['last_page'] = math
.ceil (float(len (matches
)) / \
102 float(configuration
['resultats_par_page'])) - 1
103 set = s
.get (matches
[f
:t
])
104 regexp
= make_regexp (q
)
107 uri
= r
.get ("uri", "")
109 uri
= r
.get ("source")
111 serveur
= RESOURCES
[r
.get('server')]['url']
113 # Récupère la source si ce n'est pas une URL
114 source
= r
.get("source", None)
115 if source
is not None and source
.startswith('http'):
118 title
= r
.get ("title", "")
119 content
= sep_build_content (regexp
, r
.get ("description", ""))
121 contributeurs
= r
.get('contributor')
122 if contributeurs
is not None:
123 contributeurs
= "; ".join (contributeurs
)
125 subject
= r
.get ('subject')
126 if subject
is not None:
127 subject
= ", ".join (subject
)
129 data
['results'].append ({
131 'getServeurURL': serveur
,
134 'title': hl(regexp
, title
),
135 'content': content
, \
136 'creator': '; '.join([hl(regexp
, x
) for x
in r
.get('creator', [])]),
137 'contributors': hl(regexp
, contributeurs
),
138 'subject': hl(regexp
, subject
),
139 'modified': r
.get('modified'),
140 'isbn': r
.get('isbn'),
141 'admin_url': reverse('admin:savoirs_record_change', args
=[r
.get('id')])
145 def cherche (page
, q
, engin
=None):
146 rc
= {'results': [], 'last_page': 0, 'more_link': ''}
149 engin
= configuration
['engin_recherche']
151 if engin
== 'google':
152 rc
= google_search (page
, q
)
155 sep_search (page
, {'q': q
}, rc
)
157 elif engin
== 'avancee':
158 sep_search (page
, q
, rc
)
162 def build_search_regexp(query
):
163 """Construit une expression régulière qui peut servir à chercher les
164 mots-clés donnés dans 'query'."""
165 words
= query
.split()
170 part
= re
.escape(word
.lower())
171 # Les expressions régulières ne connaissent pas la version
172 # en majuscules des caractères accentués. :(
173 part
= part
.replace(u
'à', u
'[àÀ]')
174 part
= part
.replace(u
'â', u
'[âÂ]')
175 part
= part
.replace(u
'é', u
'[éÉ]')
176 part
= part
.replace(u
'ê', u
'[êÊ]')
177 part
= part
.replace(u
'î', u
'[îÎ]')
178 part
= part
.replace(u
'ç', u
'[çÇ]')
180 # Faire ceci après avoir traité les caractères accentués...
181 part
= part
.replace('a', u
'[aàâÀÂ]')
182 part
= part
.replace('e', u
'[eéèëêÉÊ]')
183 part
= part
.replace('i', u
'[iïîÎ]')
184 part
= part
.replace('o', u
'[oô]')
185 part
= part
.replace('u', u
'[uûüù]')
186 part
= part
.replace('c', u
'[cç]')
189 return re
.compile('|'.join(parts
), re
.I
)