314c311a |
1 | <?php |
2 | /** |
3 | * Fichier de la classe XMLImport |
4 | * |
5 | * PHP versions 4 et 5 |
6 | * |
7 | * LODEL - Logiciel d'Edition ELectronique. |
8 | * |
9 | * Copyright (c) 2001-2002, Ghislain Picard, Marin Dacos |
10 | * Copyright (c) 2003, Ghislain Picard, Marin Dacos, Luc Santeramo, Nicolas Nutten, Anne Gentil-Beccot |
11 | * Copyright (c) 2004, Ghislain Picard, Marin Dacos, Luc Santeramo, Anne Gentil-Beccot, Bruno Cénou |
12 | * Copyright (c) 2005, Ghislain Picard, Marin Dacos, Luc Santeramo, Gautier Poupeau, Jean Lamy, Bruno Cénou |
13 | * Copyright (c) 2006, Marin Dacos, Luc Santeramo, Bruno Cénou, Jean Lamy, Mikaël Cixous, Sophie Malafosse |
14 | * Copyright (c) 2007, Marin Dacos, Bruno Cénou, Sophie Malafosse, Pierre-Alain Mignot |
15 | * Copyright (c) 2008, Marin Dacos, Bruno Cénou, Pierre-Alain Mignot, Inès Secondat de Montesquieu, Jean-François Rivière |
16 | * Copyright (c) 2009, Marin Dacos, Bruno Cénou, Pierre-Alain Mignot, Inès Secondat de Montesquieu, Jean-François Rivière |
17 | * |
18 | * Home page: http://www.lodel.org |
19 | * |
20 | * E-Mail: lodel@lodel.org |
21 | * |
22 | * All Rights Reserved |
23 | * |
24 | * This program is free software; you can redistribute it and/or modify |
25 | * it under the terms of the GNU General Public License as published by |
26 | * the Free Software Foundation; either version 2 of the License, or |
27 | * (at your option) any later version. |
28 | * |
29 | * This program is distributed in the hope that it will be useful, |
30 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
31 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
32 | * GNU General Public License for more details. |
33 | * |
34 | * You should have received a copy of the GNU General Public License |
35 | * along with this program; if not, write to the Free Software |
36 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
37 | * |
38 | * @author Ghislain Picard |
39 | * @author Jean Lamy |
40 | * @copyright 2001-2002, Ghislain Picard, Marin Dacos |
41 | * @copyright 2003, Ghislain Picard, Marin Dacos, Luc Santeramo, Nicolas Nutten, Anne Gentil-Beccot |
42 | * @copyright 2004, Ghislain Picard, Marin Dacos, Luc Santeramo, Anne Gentil-Beccot, Bruno Cénou |
43 | * @copyright 2005, Ghislain Picard, Marin Dacos, Luc Santeramo, Gautier Poupeau, Jean Lamy, Bruno Cénou |
44 | * @copyright 2006, Marin Dacos, Luc Santeramo, Bruno Cénou, Jean Lamy, Mikaël Cixous, Sophie Malafosse |
45 | * @copyright 2007, Marin Dacos, Bruno Cénou, Sophie Malafosse, Pierre-Alain Mignot |
46 | * @copyright 2008, Marin Dacos, Bruno Cénou, Pierre-Alain Mignot, Inès Secondat de Montesquieu, Jean-François Rivière |
47 | * @copyright 2009, Marin Dacos, Bruno Cénou, Pierre-Alain Mignot, Inès Secondat de Montesquieu, Jean-François Rivière |
48 | * @licence http://www.gnu.org/copyleft/gpl.html |
49 | * @version CVS:$Id: |
50 | * @package lodel |
51 | * @since Fichier ajouté depuis la version 0.8 |
52 | */ |
53 | |
54 | /** |
55 | * Classe XMLImport |
56 | * |
57 | * Import XMLLodelBasic file in the database |
58 | * |
59 | * @package lodel |
60 | * @author Ghislain Picard |
61 | * @author Jean Lamy |
62 | * @copyright 2005, Ghislain Picard, Marin Dacos, Luc Santeramo, Gautier Poupeau, Jean Lamy, Bruno Cénou |
63 | * @copyright 2006, Marin Dacos, Luc Santeramo, Bruno Cénou, Jean Lamy, Mikaël Cixous, Sophie Malafosse |
64 | * @copyright 2007, Marin Dacos, Bruno Cénou, Sophie Malafosse, Pierre-Alain Mignot |
65 | * @licence http://www.gnu.org/copyleft/gpl.html |
66 | * @since Classe ajoutée depuis la version 0.8 |
67 | */ |
68 | class XMLImportParser |
69 | { |
70 | /** |
71 | * Styles principaux |
72 | * @var array |
73 | */ |
74 | var $commonstyles; |
75 | |
76 | /** |
77 | * Styles contextuels |
78 | * @var array |
79 | */ |
80 | var $contextstyles; |
81 | |
82 | /** |
83 | * Styles courant |
84 | * @var array |
85 | */ |
86 | var $cstyles; |
87 | |
88 | /** |
89 | * Classe du document |
90 | * @var string |
91 | */ |
92 | var $mainclass; |
93 | |
94 | /** |
95 | * Constructeur |
96 | */ |
97 | function XMLImportParser() |
98 | { |
99 | } |
100 | |
101 | /** |
102 | * Initialisation du parser |
103 | * |
104 | * Initialize the parser, getting all the styles defined in the ME : internal styles |
105 | * and characterstyles (synonym styles and different language styles are detected). |
106 | * |
107 | * @param string $class the name of the class of the object (entity) imported |
108 | */ |
109 | function init($class) |
110 | { |
111 | global $phpversion; |
112 | if (!$this->commonstyles) { |
113 | defined('INC_FUNC') || include 'func.php'; |
114 | // get internal styles and prepare them (detect synonym styles, same style in different lang) |
115 | $dao = DAO::getDAO('internalstyles'); |
116 | $iss = $dao->findMany('status > 0'); |
117 | foreach ($iss as $is) { |
118 | // analyse the styles |
119 | foreach (preg_split("/[,;]/", $is->style) as $style) { |
120 | $this->_prepare_style($style, $is); |
121 | if ($style) |
122 | $this->commonstyles[$style] = $is; |
123 | } |
124 | } |
125 | // get characterstyles |
126 | $dao = DAO::getDAO('characterstyles'); |
127 | $css = $dao->findMany('status > 0'); |
128 | foreach ($css as $cs) { |
129 | foreach (preg_split("/[,;]/", $cs->style) as $style) { |
130 | $this->_prepare_style($style, $cs); |
131 | if ($style) |
132 | $this->commonstyles[$style] = $cs; |
133 | } |
134 | } |
135 | } |
136 | $phpversion = explode('.', PHP_VERSION); |
137 | $this->_init_class($class); |
138 | $this->mainclass = $class; |
139 | } //end of init() |
140 | |
141 | /** |
142 | * Analyse du contenu XHTML de l'entité |
143 | * |
144 | * Parse the XHTML contents of the entity and send the data to the $handler object |
145 | * This function is a hard piece of work. |
146 | * I choose not to go to DOM too avoid using lot of memory and processing |
147 | * to build the tree but I'm not sure to perform much better here. |
148 | * @param string $string the string to parse |
149 | * @param object &$handler the handler of the parser |
150 | */ |
151 | function parse($string, &$handler) |
152 | { |
153 | $this->handler = &$handler; // non-reentrant |
154 | # ! Pay attention to the following line ! |
155 | $this->handler->commonstyles = $this->commonstyles; //get the styles used to parse the doc |
156 | $arr = preg_split("/<(\/?)soo:block(>|\s+class=\"[^\"]*\"\s*>)/", $string, -1, PREG_SPLIT_DELIM_CAPTURE); //split the string using the ServOO block <soo:block> |
157 | //$arr contain now all the elements of the doc |
158 | /* the array is constructed like the following |
159 | [1] => |
160 | [2] => tablefieldsvo Object or internalstylesvo Object |
161 | [3] => content |
162 | [4] => / (end of the object) |
163 | [5] => tablefieldsvo Object or internalstylesvo Object |
164 | this explain why in the loop the iterator on the object is incremented by 3. |
165 | */ |
166 | |
167 | unset ($string); // save memory |
168 | $this->_objectize($arr, true); // make object whereever it is possible. |
169 | |
170 | // second pass |
171 | // process the internalstyles |
172 | // this is an hard piece of code doing no so much... but I find no better way. |
173 | $isave = false; |
174 | $n = count($arr); |
175 | for ($i = 1; $i < $n; $i += 3) { //for each block element |
176 | if ($arr[$i] == "/" || !is_object($arr[$i +1])) |
177 | continue; |
178 | $obj = & $arr[$i +1]; |
179 | $class = strtolower(get_class($obj)); |
180 | if (!$isave && $class == 'internalstylesvo') { |
181 | $forcenext = false; |
182 | if ($obj->surrounding == "-*") { |
183 | // check what is the previous on. |
184 | if ($arr[$i -3] == "/" && strtolower(get_class($arr[$i -2])) == 'tablefieldsvo') { |
185 | // good, put the closing tag further |
186 | $closing = array_splice($arr, $i -3, 3); |
187 | $i += 3; |
188 | array_splice($arr, $i, 0, $closing); // put after the closing internalstyles |
189 | } else { |
190 | $forcenext = true; |
191 | } |
192 | continue; |
193 | } |
194 | if ($forcenext || $obj->surrounding == "*-") { |
195 | $isave = $i; // where to insert the next opening |
196 | } else { |
197 | // surrounding is a proper tag |
198 | $obj = & $this->commonstyles[$obj->surrounding]; |
199 | array_splice($arr, $i, 0, array ('', $obj, '')); // opening tag |
200 | $i += 3; |
201 | $n += 3; |
202 | array_splice($arr, $i +6, 0, array ("/", $obj, '')); // closing tag after the closing internal tag |
203 | $n += 3; |
204 | } |
205 | } else |
206 | if ($class == 'tablefieldsvo' && $isave) { |
207 | // put the opening at $isave |
208 | $arr[$i -1] .= $arr[$i +2]; // copy data. This is not the most efficient, must the nicest way to do |
209 | $arr[$i +2] = ''; |
210 | $opening = array_splice($arr, $i, 3); |
211 | array_splice($arr, $isave, 0, $opening); |
212 | $isave = false; |
213 | } |
214 | } // end for |
215 | |
216 | // deal with the non-greedy internalstyle |
217 | // data using such style and containing table, lists, ... are modified : those elements are excluded |
218 | // a little bit hard, isn't it ? |
219 | for ($i = 1; $i < $n; $i += 3) { |
220 | if (!is_object($arr[$i +1])) |
221 | continue; |
222 | $obj = & $arr[$i +1]; |
223 | $class = strtolower(get_class($obj)); |
224 | if ($class != 'internalstylesvo' || $obj->greedy) |
225 | continue; |
226 | if ($arr[$i] == "/") { |
227 | // closing |
228 | } else { |
229 | $arr2 = preg_split("/(<\/?)((?:table|ul|ol|dl|dd|pre|blockquote|object)\b[^>]*>)/", $arr[$i +2], -1, PREG_SPLIT_DELIM_CAPTURE); |
230 | $arr[$i +2] = $arr2[0]; |
231 | $m = count($arr2); |
232 | $blockel = 0; |
233 | for ($j = 1; $j < $m; $j += 3) { |
234 | if ($arr2[$j] == '</') { // closing |
235 | $arr[$i +2] .= $arr2[$j].$arr2[$j +1]; |
236 | $blockel --; |
237 | if ($blockel == 0) { |
238 | array_splice($arr, $i +3, 0, array ("", $obj, "")); // opening tag |
239 | $i += 3; |
240 | $n += 3; |
241 | } |
242 | } else { |
243 | // opening |
244 | $blockel ++; |
245 | if ($blockel == 1) { |
246 | array_splice($arr, $i +3, 0, array ("/", $obj, '')); // closing tag |
247 | $i += 3; |
248 | $n += 3; |
249 | } |
250 | $arr[$i +2] .= $arr2[$j].$arr2[$j +1]; |
251 | } |
252 | $arr[$i +2] .= $arr2[$j +2]; |
253 | } |
254 | } // opening |
255 | } |
256 | // proper parser. Launch the handlers |
257 | $datastack = array (); |
258 | $classstack = array (array ($this->mainclass, 'entities')); |
259 | $handler->openClass($classstack[0]); |
260 | $this->nbdoc = 0; |
261 | |
262 | for ($i = 1; $i < $n; $i += 3) { |
263 | $this->_parseOneStep($arr, $i, $datastack, $classstack, 'block'); |
264 | |
265 | $larr = preg_split("/<(\/)?soo:inline(>|\s+class=\"[^\"]*\"\s*>)/", $arr[$i +2], -1, PREG_SPLIT_DELIM_CAPTURE); |
266 | $nj = count($larr); |
267 | $datastack[0] .= $larr[0]; |
268 | if ($nj > 1) { |
269 | $this->_objectize($larr, false); |
270 | for ($j = 1; $j < $nj; $j += 3) { |
271 | $this->_parseOneStep($larr, $j, $datastack, $classstack, 'inline'); |
272 | $datastack[0] .= $larr[$j +2]; |
273 | } |
274 | } |
275 | } |
276 | // close the last tags |
277 | while ($classstack) { |
278 | $handler->closeClass(array_shift($classstack), $this->nbdoc > 1); |
279 | } |
280 | |
281 | } // end of function parser |
282 | |
283 | /** |
284 | * Première étape du parser |
285 | * |
286 | * do one step of the parser. |
287 | * 1/ call the handler corresponding to the current style/tag/object |
288 | * 2/ change the context if required |
289 | * 3/ feed the datastack |
290 | * |
291 | * @param array &$arr ?? |
292 | * @param integer $i ?? |
293 | * @param array &$datastack pile des données |
294 | * @param array &$classstack pile des classes utilisées |
295 | * @param string $level can be inline or ? |
296 | * @access private |
297 | */ |
298 | function _parseOneStep(& $arr, $i, & $datastack, & $classstack, $level) |
299 | { |
300 | //echo $classstack[0]; |
301 | $opening = $arr[$i] != "/"; |
302 | $obj = & $arr[$i +1]; |
303 | if (((!$opening && isset($arr[$i +4]) && $obj == $arr[$i +4]) || ($opening && isset($arr[$i -2]) && $obj == $arr[$i -2])) && (strtolower(get_class($obj)) != 'internalstylesvo' || $obj->greedy)) { |
304 | // current closing equals next opening |
305 | // or current opening equals last closing |
306 | return; |
307 | } |
308 | if(!isset($datastack[0])) $datastack[0] = ''; |
309 | if (!is_object($obj)) { |
310 | // unknow style |
311 | if ($opening) { |
312 | if ($level == 'inline') { |
313 | array_unshift($datastack, ''); |
314 | } else { |
315 | $datastack[0] = ""; |
316 | } |
317 | } elseif ($obj == 'documents') { |
318 | // do nothing |
319 | } else { |
320 | if ($level == 'inline') { |
321 | $data = array_shift($datastack); |
322 | $datastack[0] .= $this->handler->unknownCharacterStyle($obj, $data); |
323 | } else { |
324 | // close up to the base |
325 | while (count($classstack) > 1) { |
326 | $this->handler->closeClass($classstack[0]); |
327 | array_shift($classstack); |
328 | } |
329 | $datastack[0] = $this->handler->unknownParagraphStyle($obj, $datastack[0]); |
330 | } |
331 | } |
332 | return; |
333 | } |
334 | $class = strtolower(get_class($obj)); |
335 | switch ($class) { |
336 | case 'internalstylesvo' : |
337 | case 'characterstylesvo' : |
338 | if ($opening) { |
339 | array_unshift($datastack, ''); |
340 | } else { |
341 | $call = 'process'. substr($class, 0, -2); |
342 | #echo count($datastack); |
343 | $data = array_shift($datastack); |
344 | $datastack[0] .= $this->handler->$call ($obj, $data); // call the method associated with the object class |
345 | } |
346 | break; |
347 | case 'tablefieldsvo' : |
348 | $cstyles = & $this->contextstyles[$classstack[0][0]]; |
349 | if (empty($cstyles[$obj->style])) { // context change ? |
350 | $this->handler->closeClass($classstack[0]); |
351 | $cl = array_shift($classstack); |
352 | if (empty($this->contextstyles[$cl[0]][$obj->style])) { |
353 | // must be in the context below |
354 | // if not... problem. |
355 | } |
356 | // new context |
357 | } |
358 | if ($opening) { |
359 | if ($obj->g_name == 'dc.title' && count($classstack) == 1) { |
360 | $this->nbdoc++; |
361 | if ($this->nbdoc > 1) { |
362 | $this->handler->closeClass($classstack[0], true); |
363 | $this->handler->openClass($classstack[0], null, true); |
364 | } |
365 | } |
366 | array_unshift($datastack, ''); |
367 | } else { |
368 | $data = array_shift($datastack); |
369 | $datastack[0] .= $this->handler->processTableFields($obj, $data); // call the method associated with the object class |
370 | } |
371 | break; |
372 | case 'entrytypesvo' : |
373 | case 'persontypesvo' : |
374 | if ($opening) { // opening. Switch the lowest context |
375 | // close up to the base |
376 | while (count($classstack) > 1) { |
377 | $this->handler->closeClass($classstack[0]); |
378 | array_shift($classstack); |
379 | } |
380 | array_unshift($datastack, ""); |
381 | |
382 | // change the context |
383 | $classtype = $class == 'entrytypesvo' ? 'entries' : 'persons'; |
384 | array_unshift($classstack, array ($obj->class, $classtype)); |
385 | $this->handler->openClass($classstack[0], $obj); |
386 | } else { |
387 | $call = 'process'. substr($class, 0, -2); |
388 | $this->handler->$call ($obj, $datastack[0]); // call the method associated with the object class |
389 | $datastack[0] = ''; |
390 | } |
391 | break; |
392 | default : |
393 | trigger_error("ERROR: internal error in XMLImportParser::parse. Unknown class $class", E_USER_ERROR); |
394 | } |
395 | } //end of _parse_step_one |
396 | |
397 | /** |
398 | * Initialise la classe en cherchant à savoir ce qu'il faut faire avec les styles |
399 | * détectés. (suivant les champs définies dans le ME). |
400 | * |
401 | * Gather information from tablefield to know what to do with the various styles. |
402 | * class is the context and criteria is the where to select the tablefields |
403 | * |
404 | * @param string $class the name of the class to init |
405 | * @param string $criteria the possible SQL criterions (by default empty) |
406 | */ |
407 | function _init_class($class, $criteria = '') |
408 | { |
409 | global $phpversion; |
410 | // if(!function_exists('clone')) // pour pouvoir utiliser clone en php5 |
411 | // require 'php4.inc.php'; |
412 | |
413 | if (isset($this->contextstyles[$class])) |
414 | return; // already done |
415 | |
416 | // get all the information from the database for all the fields |
417 | $dao = DAO::getDAO('tablefields'); |
418 | if (!$criteria) { |
419 | $criteria = "class='".$class."'"; |
420 | } |
421 | $tfs = $dao->findMany("(".$criteria.") AND status>0"); |
422 | |
423 | // create an assoc array style => tf information |
424 | foreach ($tfs as $tf) { |
425 | // is it an index ? |
426 | if ($tf->type == 'entries' || $tf->type == 'persons') { |
427 | // yes, it's an index. Get the object |
428 | $dao = DAO::getDAO($tf->type == 'entries' ? 'entrytypes' : 'persontypes'); |
429 | $tf = $dao->find("type='".$tf->name."'"); |
430 | $this->_init_class($tf->class, "class='".$tf->class."' OR class='entities_".$tf->class. "'"); |
431 | } |
432 | // analyse the styles of the tablefields |
433 | foreach (preg_split("/[,;]/", $tf->style) as $style) { |
434 | $tf2 = clone ($tf); |
435 | $this->_prepare_style($style, $tf2); |
436 | if ($style) |
437 | $this->commonstyles[$style] = $this->contextstyles[$class][$style] = $tf2; |
438 | } |
439 | } |
440 | } //end of init_class |
441 | |
442 | /** |
443 | * Prépare un style pour stocakge en détectant les synonymes et les langues d'un style |
444 | * Prepare the style for storage detecting synonyms and same language style |
445 | * |
446 | * @param string &$style the name of the style |
447 | * @param object &$object the VO corresponding to the style |
448 | */ |
449 | function _prepare_style(& $style, & $obj) |
450 | { |
451 | $style = strtolower(trim($style)); |
452 | // style synonyme. take the first one |
453 | @list ($style, $lang) = explode(":", $style); |
454 | if ($lang) { |
455 | $obj->lang = $lang; |
456 | $obj->style = $style; |
457 | } else { |
458 | // style synonyme. take the first one |
459 | $obj->style = preg_replace("/[:,;].*$/", '', $obj->style); |
460 | } |
461 | } |
462 | |
463 | /** |
464 | * Remplace un style par un objet quand c'est possible |
465 | * |
466 | * Replace style by object whenever it is possible |
467 | * |
468 | * @param array &$arr an array containing all the elements of a doc |
469 | * @param boolean $blockstyle true if the style is a block style and false if not |
470 | */ |
471 | function _objectize(& $arr, $blockstyle) |
472 | { |
473 | $stylesstack = array (); |
474 | $n = count($arr); |
475 | for ($i = 1; $i < $n; $i += 3) { |
476 | $opening = $arr[$i] != "/"; |
477 | if ($opening) { // opening tag |
478 | if (!preg_match("/class=\"([^\"]*)\"/", $arr[$i +1], $result)) |
479 | trigger_error("ERROR: in _objectize", E_USER_ERROR); |
480 | $name = preg_replace("/\W/", "", makeSortKey($result[1])); |
481 | $obj = & $this->commonstyles[$blockstyle ? $name : ".".$name]; |
482 | if ($obj) { |
483 | $arr[$i +1] = & $obj; |
484 | } else { |
485 | $arr[$i +1] = $name; |
486 | } |
487 | array_push($stylesstack, $arr[$i +1]); |
488 | } else { // closingtag |
489 | $arr[$i +1] = array_pop($stylesstack); |
490 | continue; // nothing to do |
491 | } |
492 | } |
493 | if ($stylesstack) { |
494 | print_r($arr); |
495 | print_r($stylesstack); |
496 | trigger_error("ERROR: XML is likely invalid in XMLImportParser::_objectize", E_USER_ERROR); |
497 | } |
498 | } |
499 | } // end of class XMLImportParser |
500 | ?> |