bug3
[aidenligne_francais_universite.git] / fluxbb / include / search_idx.php
1 <?php
2 /***********************************************************************
3
4 Copyright (C) 2002-2005 Rickard Andersson (rickard@punbb.org)
5
6 This file is part of PunBB.
7
8 PunBB is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 2 of the License,
11 or (at your option) any later version.
12
13 PunBB is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston,
21 MA 02111-1307 USA
22
23 ************************************************************************/
24
25
26 // The contents of this file are very much inspired by the file functions_search.php
27 // from the phpBB Group forum software phpBB2 (http://www.phpbb.com).
28
29
30 // Make sure no one attempts to run this script "directly"
31 if (!defined('PUN'))
32 exit;
33
34
35 //
36 // "Cleans up" a text string and returns an array of unique words
37 // This function depends on the current locale setting
38 //
39 function split_words($text)
40 {
41 global $pun_user;
42 static $noise_match, $noise_replace, $stopwords;
43
44 if (empty($noise_match))
45 {
46 $noise_match = array('[quote', '[code', '[url', '[img', '[email', '[color', '[colour', 'quote]', 'code]', 'url]', 'img]', 'email]', 'color]', 'colour]', '^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '+', '[', ']', '{', '}', ':', '\\', '/', '=', '#', ';', '!', '*');
47 $noise_replace = array('', '', '', '', '', '', '', '', '', '', '', '', '', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' ');
48
49 $stopwords = (array)@file(PUN_ROOT.'lang/'.$pun_user['language'].'/stopwords.txt');
50 $stopwords = array_map('trim', $stopwords);
51 }
52
53 // Clean up
54 $patterns[] = '#&[\#a-z0-9]+?;#i';
55 $patterns[] = '#\b[\w]+:\/\/[a-z0-9\.\-]+(\/[a-z0-9\?\.%_\-\+=&\/~]+)?#';
56 $patterns[] = '#\[\/?[a-z\*=\+\-]+(\:?[0-9a-z]+)?:[a-z0-9]{10,}(\:[a-z0-9]+)?=?.*?\]#';
57 $text = preg_replace($patterns, ' ', ' '.strtolower($text).' ');
58
59 // Filter out junk
60 $text = str_replace($noise_match, $noise_replace, $text);
61
62 // Strip out extra whitespace between words
63 $text = trim(preg_replace('#\s+#', ' ', $text));
64
65 // Fill an array with all the words
66 $words = explode(' ', $text);
67
68 if (!empty($words))
69 {
70 while (list($i, $word) = @each($words))
71 {
72 $words[$i] = trim($word, '.');
73 $num_chars = pun_strlen($word);
74
75 if ($num_chars < 3 || $num_chars > 20 || in_array($word, $stopwords))
76 unset($words[$i]);
77 }
78 }
79
80 return array_unique($words);
81 }
82
83
84 //
85 // Updates the search index with the contents of $post_id (and $subject)
86 //
87 function update_search_index($mode, $post_id, $message, $subject = null)
88 {
89 global $db_type, $db;
90
91 // Split old and new post/subject to obtain array of 'words'
92 $words_message = split_words($message);
93 $words_subject = ($subject) ? split_words($subject) : array();
94
95 if ($mode == 'edit')
96 {
97 $result = $db->query('SELECT w.id, w.word, m.subject_match FROM '.$db->prefix.'search_words AS w INNER JOIN '.$db->prefix.'search_matches AS m ON w.id=m.word_id WHERE m.post_id='.$post_id, true) or error('Unable to fetch search index words', __FILE__, __LINE__, $db->error());
98
99 // Declare here to stop array_keys() and array_diff() from complaining if not set
100 $cur_words['post'] = array();
101 $cur_words['subject'] = array();
102
103 while ($row = $db->fetch_row($result))
104 {
105 $match_in = ($row[2]) ? 'subject' : 'post';
106 $cur_words[$match_in][$row[1]] = $row[0];
107 }
108
109 $db->free_result($result);
110
111 $words['add']['post'] = array_diff($words_message, array_keys($cur_words['post']));
112 $words['add']['subject'] = array_diff($words_subject, array_keys($cur_words['subject']));
113 $words['del']['post'] = array_diff(array_keys($cur_words['post']), $words_message);
114 $words['del']['subject'] = array_diff(array_keys($cur_words['subject']), $words_subject);
115 }
116 else
117 {
118 $words['add']['post'] = $words_message;
119 $words['add']['subject'] = $words_subject;
120 $words['del']['post'] = array();
121 $words['del']['subject'] = array();
122 }
123
124 unset($words_message);
125 unset($words_subject);
126
127 // Get unique words from the above arrays
128 $unique_words = array_unique(array_merge($words['add']['post'], $words['add']['subject']));
129
130 if (!empty($unique_words))
131 {
132 $result = $db->query('SELECT id, word FROM '.$db->prefix.'search_words WHERE word IN('.implode(',', preg_replace('#^(.*)$#', '\'\1\'', $unique_words)).')', true) or error('Unable to fetch search index words', __FILE__, __LINE__, $db->error());
133
134 $word_ids = array();
135 while ($row = $db->fetch_row($result))
136 $word_ids[$row[1]] = $row[0];
137
138 $db->free_result($result);
139
140 $new_words = array_diff($unique_words, array_keys($word_ids));
141 unset($unique_words);
142
143 if (!empty($new_words))
144 {
145 switch ($db_type)
146 {
147 case 'mysql':
148 case 'mysqli':
149 $db->query('INSERT INTO '.$db->prefix.'search_words (word) VALUES'.implode(',', preg_replace('#^(.*)$#', '(\'\1\')', $new_words))) or error('Unable to insert search index words', __FILE__, __LINE__, $db->error());
150 break;
151
152 default:
153 while (list(, $word) = @each($new_words))
154 $db->query('INSERT INTO '.$db->prefix.'search_words (word) VALUES(\''.$word.'\')') or error('Unable to insert search index words', __FILE__, __LINE__, $db->error());
155 break;
156 }
157 }
158
159 unset($new_words);
160 }
161
162 // Delete matches (only if editing a post)
163 while (list($match_in, $wordlist) = @each($words['del']))
164 {
165 $subject_match = ($match_in == 'subject') ? 1 : 0;
166
167 if (!empty($wordlist))
168 {
169 $sql = '';
170 while (list(, $word) = @each($wordlist))
171 $sql .= (($sql != '') ? ',' : '').$cur_words[$match_in][$word];
172
173 $db->query('DELETE FROM '.$db->prefix.'search_matches WHERE word_id IN('.$sql.') AND post_id='.$post_id.' AND subject_match='.$subject_match) or error('Unable to delete search index word matches', __FILE__, __LINE__, $db->error());
174 }
175 }
176
177 // Add new matches
178 while (list($match_in, $wordlist) = @each($words['add']))
179 {
180 $subject_match = ($match_in == 'subject') ? 1 : 0;
181
182 if (!empty($wordlist))
183 $db->query('INSERT INTO '.$db->prefix.'search_matches (post_id, word_id, subject_match) SELECT '.$post_id.', id, '.$subject_match.' FROM '.$db->prefix.'search_words WHERE word IN('.implode(',', preg_replace('#^(.*)$#', '\'\1\'', $wordlist)).')') or error('Unable to insert search index word matches', __FILE__, __LINE__, $db->error());
184 }
185
186 unset($words);
187 }
188
189
190 //
191 // Strip search index of indexed words in $post_ids
192 //
193 function strip_search_index($post_ids)
194 {
195 global $db_type, $db;
196
197 switch ($db_type)
198 {
199 case 'mysql':
200 case 'mysqli':
201 {
202 $result = $db->query('SELECT word_id FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.') GROUP BY word_id') or error('Unable to fetch search index word match', __FILE__, __LINE__, $db->error());
203
204 if ($db->num_rows($result))
205 {
206 $word_ids = '';
207 while ($row = $db->fetch_row($result))
208 $word_ids .= ($word_ids != '') ? ','.$row[0] : $row[0];
209
210 $result = $db->query('SELECT word_id FROM '.$db->prefix.'search_matches WHERE word_id IN('.$word_ids.') GROUP BY word_id HAVING COUNT(word_id)=1') or error('Unable to fetch search index word match', __FILE__, __LINE__, $db->error());
211
212 if ($db->num_rows($result))
213 {
214 $word_ids = '';
215 while ($row = $db->fetch_row($result))
216 $word_ids .= ($word_ids != '') ? ','.$row[0] : $row[0];
217
218 $db->query('DELETE FROM '.$db->prefix.'search_words WHERE id IN('.$word_ids.')') or error('Unable to delete search index word', __FILE__, __LINE__, $db->error());
219 }
220 }
221
222 break;
223 }
224
225 default:
226 $db->query('DELETE FROM '.$db->prefix.'search_words WHERE id IN(SELECT word_id FROM '.$db->prefix.'search_matches WHERE word_id IN(SELECT word_id FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.') GROUP BY word_id) GROUP BY word_id HAVING COUNT(word_id)=1)') or error('Unable to delete from search index', __FILE__, __LINE__, $db->error());
227 break;
228 }
229
230 $db->query('DELETE FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.')') or error('Unable to delete search index word match', __FILE__, __LINE__, $db->error());
231 }