Adapté les requêtes de l'indexeur sphinx à la nouvelle structure de la table
[auf_savoirs_en_partage_django.git] / auf_savoirs_en_partage / scripts / sphinx.conf.py.in
CommitLineData
5212238e
EMS
1#!/usr/bin/python
2# coding: utf-8
3#
4# Ce script compose dynamiquement une configuration pour Sphinx. Il suffit
5# que le fichier soit exécutable pour que Sphinx exécute le script plutôt
6# que de le lire comme un fichier statique.
7#
8# Le manuel de sphinx se trouve au http://sphinxsearch.com/docs/manual-0.9.9.html
9
10import glob
11import os
12import sys
13
14sys.path[0:0] = [
15 "%(directory)s",
16 "%(directory)s/auf_savoirs_en_partage",
17 ]
18os.environ['DJANGO_SETTINGS_MODULE'] = 'production'
19for d in glob.glob("%(directory)s/eggs/*"):
20 sys.path[0:0] = [d,]
21for d in glob.glob("%(directory)s/parts/*"):
22 sys.path[0:0] = [d,]
23
24from django.conf import settings
25
26SQL_HOST = settings.DATABASE_HOST
27SQL_USER = settings.DATABASE_USER
28SQL_PASS = settings.DATABASE_PASSWORD
29SQL_DB = settings.DATABASE_NAME
4134daa0 30DATA_DIR = '/var/lib/sphinxsearch/data'
5212238e
EMS
31
32def multiline(s):
33 """Place un marqueur de continuation avant chaque saut de ligne."""
34 return s.replace("\n", "\\\n")
35
acd5cd8f
EMS
36def emit_source(name, sql_query, sql_query_info=None, sql_attr_multi=None, sql_attr_uint=None,
37 sql_attr_str2ordinal=None):
5212238e
EMS
38 print '''
39source %%(name)s
40{
41 type = mysql
42 sql_host = %%(sql_host)s
43 sql_user = %%(sql_user)s
44 sql_pass = %%(sql_pass)s
45 sql_db = %%(sql_db)s
46 sql_query_pre = SET NAMES utf8
47 sql_query_pre = SET SESSION query_cache_type=OFF
48 sql_query = %%(sql_query)s
49''' %% dict(name=name, sql_host=SQL_HOST, sql_user=SQL_USER,
50 sql_pass=SQL_PASS, sql_db=SQL_DB, sql_query=multiline(sql_query))
51
52 if sql_query_info:
53 print ' sql_query_info = ' + sql_query_info
54
55 if sql_attr_multi:
56 for attr in sql_attr_multi:
57 print ' sql_attr_multi = uint %%s from field' %% attr
58
59 if sql_attr_uint:
60 for attr in sql_attr_uint:
61 print ' sql_attr_uint = ' + attr
62
acd5cd8f
EMS
63 if sql_attr_str2ordinal:
64 for attr in sql_attr_str2ordinal:
65 print ' sql_attr_str2ordinal = ' + attr
66
5212238e
EMS
67 print '}'
68
69def emit_index(name):
70 print '''
71index %%(name)s
72{
73 morphology = libstemmer_fr
74 charset_type = utf-8
75 charset_table = 0..9, A..Z->a..z, _, a..z, \\
76 U+C0->a, U+C2->a, U+E0->a, U+E2->a, \\
77 U+C7->c, U+E7->c, \\
78 U+C8->e, U+C9->e, U+CA->e, U+E8->e, U+E9->e, U+EA->e, U+EB->e, \\
79 U+CE->i, U+EE->i, U+EF->i, \\
80 U+D4->o, U+F4->o, \\
81 U+F9->u, U+FB->u, U+FC->u
82 source = %%(name)s
83 path = %%(path)s
42a890cf 84 stopwords = %(directory)s/auf_savoirs_en_partage/stopwords.txt
5212238e
EMS
85}''' %% dict(name=name, path=os.path.join(DATA_DIR, name))
86
4134daa0 87emit_source('savoirsenpartage_ressources',
5212238e
EMS
88 '''SELECT r.id AS id,
89 r.title AS title,
90 r.description AS description,
91 r.creator AS creator,
92 r.contributor AS contributor,
93 r.subject AS subject,
94 r.publisher AS publisher,
95 GROUP_CONCAT(DISTINCT d.nom_discipline) AS disciplines,
96 GROUP_CONCAT(DISTINCT d.id_discipline) AS discipline_ids,
97 GROUP_CONCAT(DISTINCT p.nom) AS pays,
98 GROUP_CONCAT(DISTINCT reg.nom) AS regions,
99 GROUP_CONCAT(DISTINCT reg.id) AS region_ids
100 FROM savoirs_record r
101 LEFT JOIN savoirs_record_disciplines rd ON rd.record_id = r.id
102 LEFT JOIN discipline d ON d.id_discipline = rd.discipline_id
103 LEFT JOIN savoirs_record_pays rp ON rp.record_id = r.id
104 LEFT JOIN ref_pays p ON p.id = rp.pays_id
105 LEFT JOIN savoirs_record_regions rr ON rr.record_id = r.id
106 LEFT JOIN ref_region reg ON reg.id = rr.region_id OR reg.id = p.region
107 LEFT JOIN savoirs_record_listsets rl ON rl.record_id = r.id
108 LEFT JOIN savoirs_listset l ON l.spec = rl.listset_id
109 WHERE r.validated AND (l.spec IS NULL OR l.validated)
110 GROUP BY r.id''',
111 sql_query_info='SELECT * from savoirs_record WHERE id=$id',
112 sql_attr_multi=['discipline_ids', 'region_ids']
113 )
114
4134daa0 115emit_source('savoirsenpartage_actualites',
5212238e
EMS
116 '''SELECT a.id_actualite AS id,
117 a.titre_actualite AS titre,
118 a.texte_actualite AS texte,
119 TO_DAYS(a.date_actualite) AS date,
120 GROUP_CONCAT(DISTINCT r.nom) AS regions,
121 GROUP_CONCAT(DISTINCT d.nom_discipline) AS disciplines
122 FROM actualite a
123 LEFT JOIN actualite_regions ar ON ar.actualite_id = a.id_actualite
124 LEFT JOIN ref_region r ON r.id = ar.region_id
125 LEFT JOIN actualite_disciplines ad ON ad.actualite_id = a.id_actualite
126 LEFT JOIN discipline d ON d.id_discipline = ad.discipline_id
127 WHERE a.visible_actualite
128 GROUP BY a.id_actualite''',
129 sql_query_info='SELECT * from actualite WHERE id_actualite=$id',
130 sql_attr_uint=['date']
131 )
132
4134daa0 133emit_source('savoirsenpartage_evenements',
5212238e
EMS
134 '''SELECT e.id AS id,
135 e.titre AS titre,
136 e.mots_cles AS mots_cles,
137 e.type AS type,
138 e.lieu AS lieu,
139 e.description AS description,
140 e.contact AS contact,
141 CONCAT_WS(',', d.nom_discipline, d2.nom_discipline) AS disciplines,
142 GROUP_CONCAT(DISTINCT r.nom) AS regions,
143 TO_DAYS(DATE(e.debut)) AS debut
144 FROM savoirs_evenement e
145 LEFT JOIN discipline d ON d.id_discipline = e.discipline_id
146 LEFT JOIN discipline d2 ON d2.id_discipline = e.discipline_secondaire_id
147 LEFT JOIN savoirs_evenement_regions er ON er.evenement_id = e.id
148 LEFT JOIN ref_region r ON r.id = er.region_id
149 WHERE e.approuve
150 GROUP BY e.id''',
151 sql_query_info='SELECT * from savoirs_evenement WHERE id=$id',
152 sql_attr_uint=['debut'])
153
4134daa0 154emit_source('savoirsenpartage_chercheurs',
edbcaa99 155 '''SELECT c.personne_ptr_id AS id,
5212238e
EMS
156 p.nom AS nom,
157 p.prenom AS prenom,
acd5cd8f 158 CONCAT(p.nom, ' ', p.prenom) AS nom_complet,
5212238e
EMS
159 c.theme_recherche AS theme_recherche,
160 c.groupe_recherche AS groupe_recherche,
161 c.mots_cles AS mots_cles,
edbcaa99
EMS
162 c.membre_instance_auf AS membre_instance_auf,
163 c.expert_oif AS expert_oif,
164 c.membre_association_francophone AS membre_association_francophone,
165 c.membre_reseau_institutionnel AS membre_reseau_institutionnel,
5212238e 166 IFNULL(et.nom, etablissement_autre_nom) AS etablissement,
acd5cd8f 167 IFNULL(et.nom, etablissement_autre_nom) AS etablissement_attr,
5212238e 168 pays.nom AS pays,
acd5cd8f 169 pays.nom AS pays_attr,
5212238e
EMS
170 pays.id AS pays_id,
171 r.nom AS region,
172 r.id AS region_id,
173 GROUP_CONCAT(DISTINCT d.nom_discipline) AS disciplines,
42a890cf 174 GROUP_CONCAT(DISTINCT pub.titre) AS publications,
5212238e
EMS
175 t.titre AS these,
176 GROUP_CONCAT(DISTINCT g.nom) AS groupes,
177 GROUP_CONCAT(DISTINCT ex.nom) AS expertises,
178 GROUP_CONCAT(DISTINCT g.id) AS groupe_ids,
179 TO_DAYS(c.date_modification) AS date_modification,
180 CASE pays WHEN 'Nord' THEN 1
181 WHEN 'Sud' THEN 2
182 END AS nord_sud,
183 CASE statut WHEN 'enseignant' THEN 1
184 WHEN 'etudiant' THEN 2
185 WHEN 'independant' THEN 3
186 END AS statut,
d9da735f 187 (ex.id IS NOT NULL) AS expert
5212238e 188 FROM chercheurs_chercheur c
edbcaa99 189 INNER JOIN chercheurs_personne p ON c.personne_ptr_id = p.id
5212238e 190 LEFT JOIN ref_etablissement et ON et.id = c.etablissement
acd5cd8f 191 LEFT JOIN ref_pays pays ON pays.code = IFNULL(et.pays, c.etablissement_autre_pays)
5212238e
EMS
192 LEFT JOIN ref_region r ON pays.region = r.id
193 LEFT JOIN discipline d ON d.id_discipline = c.discipline
edbcaa99
EMS
194 LEFT JOIN chercheurs_publication pub ON pub.chercheur_id = c.personne_ptr_id
195 LEFT JOIN chercheurs_these t ON t.chercheur_id = c.personne_ptr_id
196 LEFT JOIN chercheurs_chercheurgroupe cg ON cg.chercheur = c.personne_ptr_id
5212238e 197 LEFT JOIN chercheurs_groupe g ON g.id = cg.groupe
edbcaa99
EMS
198 LEFT JOIN chercheurs_expertise ex ON ex.chercheur_id = c.personne_ptr_id
199 GROUP BY c.personne_ptr_id''',
5212238e
EMS
200 sql_query_info='SELECT * from chercheurs_chercheur WHERE id=$id',
201 sql_attr_multi=['groupe_ids'],
edbcaa99
EMS
202 sql_attr_uint=['pays_id', 'region_id', 'nord_sud',
203 'date_modification', 'statut', 'expert',
204 'membre_instance_auf', 'expert_oif',
205 'membre_association_francophone',
206 'membre_reseau_institutionnel'],
acd5cd8f 207 sql_attr_str2ordinal=['nom_complet', 'etablissement_attr', 'pays_attr'])
5212238e 208
4134daa0 209emit_source('savoirsenpartage_sites',
5212238e
EMS
210 '''SELECT s.id AS id,
211 s.titre AS titre,
212 s.description AS description,
213 s.editeur AS editeur,
214 s.auteur AS auteur,
215 s.mots_cles AS mots_cles,
216 GROUP_CONCAT(DISTINCT d.nom_discipline) AS disciplines,
217 GROUP_CONCAT(DISTINCT p.nom) AS pays,
218 GROUP_CONCAT(DISTINCT p.id) AS pays_ids,
a611bc4a
EMS
219 GROUP_CONCAT(DISTINCT r.nom) AS regions,
220 TO_DAYS(s.date_maj) AS date_maj
5212238e
EMS
221 FROM sitotheque_site s
222 LEFT JOIN sitotheque_site_discipline sd ON sd.site_id = s.id
223 LEFT JOIN discipline d ON d.id_discipline = sd.discipline_id
224 LEFT JOIN ref_pays p ON p.id = s.pays
225 LEFT JOIN ref_region r ON r.id = p.region
226 GROUP BY s.id''',
227 'SELECT * FROM sitotheque_site WHERE id=$id',
a611bc4a
EMS
228 sql_attr_multi=['pays_ids'],
229 sql_attr_uint=['date_maj']
230 )
5212238e 231
4134daa0
EMS
232emit_index('savoirsenpartage_actualites')
233emit_index('savoirsenpartage_ressources')
234emit_index('savoirsenpartage_evenements')
235emit_index('savoirsenpartage_chercheurs')
236emit_index('savoirsenpartage_sites')