Première version : mise en route du suivi.
[auf_bulletin.git] / lodel-0.9 / scripts / xmlimport.php
1 <?php
2 /**
3 * Fichier de la classe XMLImport
4 *
5 * PHP versions 4 et 5
6 *
7 * LODEL - Logiciel d'Edition ELectronique.
8 *
9 * Copyright (c) 2001-2002, Ghislain Picard, Marin Dacos
10 * Copyright (c) 2003, Ghislain Picard, Marin Dacos, Luc Santeramo, Nicolas Nutten, Anne Gentil-Beccot
11 * Copyright (c) 2004, Ghislain Picard, Marin Dacos, Luc Santeramo, Anne Gentil-Beccot, Bruno Cénou
12 * Copyright (c) 2005, Ghislain Picard, Marin Dacos, Luc Santeramo, Gautier Poupeau, Jean Lamy, Bruno Cénou
13 * Copyright (c) 2006, Marin Dacos, Luc Santeramo, Bruno Cénou, Jean Lamy, Mikaël Cixous, Sophie Malafosse
14 * Copyright (c) 2007, Marin Dacos, Bruno Cénou, Sophie Malafosse, Pierre-Alain Mignot
15 * Copyright (c) 2008, Marin Dacos, Bruno Cénou, Pierre-Alain Mignot, Inès Secondat de Montesquieu, Jean-François Rivière
16 * Copyright (c) 2009, Marin Dacos, Bruno Cénou, Pierre-Alain Mignot, Inès Secondat de Montesquieu, Jean-François Rivière
17 *
18 * Home page: http://www.lodel.org
19 *
20 * E-Mail: lodel@lodel.org
21 *
22 * All Rights Reserved
23 *
24 * This program is free software; you can redistribute it and/or modify
25 * it under the terms of the GNU General Public License as published by
26 * the Free Software Foundation; either version 2 of the License, or
27 * (at your option) any later version.
28 *
29 * This program is distributed in the hope that it will be useful,
30 * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 * GNU General Public License for more details.
33 *
34 * You should have received a copy of the GNU General Public License
35 * along with this program; if not, write to the Free Software
36 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
37 *
38 * @author Ghislain Picard
39 * @author Jean Lamy
40 * @copyright 2001-2002, Ghislain Picard, Marin Dacos
41 * @copyright 2003, Ghislain Picard, Marin Dacos, Luc Santeramo, Nicolas Nutten, Anne Gentil-Beccot
42 * @copyright 2004, Ghislain Picard, Marin Dacos, Luc Santeramo, Anne Gentil-Beccot, Bruno Cénou
43 * @copyright 2005, Ghislain Picard, Marin Dacos, Luc Santeramo, Gautier Poupeau, Jean Lamy, Bruno Cénou
44 * @copyright 2006, Marin Dacos, Luc Santeramo, Bruno Cénou, Jean Lamy, Mikaël Cixous, Sophie Malafosse
45 * @copyright 2007, Marin Dacos, Bruno Cénou, Sophie Malafosse, Pierre-Alain Mignot
46 * @copyright 2008, Marin Dacos, Bruno Cénou, Pierre-Alain Mignot, Inès Secondat de Montesquieu, Jean-François Rivière
47 * @copyright 2009, Marin Dacos, Bruno Cénou, Pierre-Alain Mignot, Inès Secondat de Montesquieu, Jean-François Rivière
48 * @licence http://www.gnu.org/copyleft/gpl.html
49 * @version CVS:$Id:
50 * @package lodel
51 * @since Fichier ajouté depuis la version 0.8
52 */
53
54 /**
55 * Classe XMLImport
56 *
57 * Import XMLLodelBasic file in the database
58 *
59 * @package lodel
60 * @author Ghislain Picard
61 * @author Jean Lamy
62 * @copyright 2005, Ghislain Picard, Marin Dacos, Luc Santeramo, Gautier Poupeau, Jean Lamy, Bruno Cénou
63 * @copyright 2006, Marin Dacos, Luc Santeramo, Bruno Cénou, Jean Lamy, Mikaël Cixous, Sophie Malafosse
64 * @copyright 2007, Marin Dacos, Bruno Cénou, Sophie Malafosse, Pierre-Alain Mignot
65 * @licence http://www.gnu.org/copyleft/gpl.html
66 * @since Classe ajoutée depuis la version 0.8
67 */
68 class XMLImportParser
69 {
70 /**
71 * Styles principaux
72 * @var array
73 */
74 var $commonstyles;
75
76 /**
77 * Styles contextuels
78 * @var array
79 */
80 var $contextstyles;
81
82 /**
83 * Styles courant
84 * @var array
85 */
86 var $cstyles;
87
88 /**
89 * Classe du document
90 * @var string
91 */
92 var $mainclass;
93
94 /**
95 * Constructeur
96 */
97 function XMLImportParser()
98 {
99 }
100
101 /**
102 * Initialisation du parser
103 *
104 * Initialize the parser, getting all the styles defined in the ME : internal styles
105 * and characterstyles (synonym styles and different language styles are detected).
106 *
107 * @param string $class the name of the class of the object (entity) imported
108 */
109 function init($class)
110 {
111 global $phpversion;
112 if (!$this->commonstyles) {
113 defined('INC_FUNC') || include 'func.php';
114 // get internal styles and prepare them (detect synonym styles, same style in different lang)
115 $dao = DAO::getDAO('internalstyles');
116 $iss = $dao->findMany('status > 0');
117 foreach ($iss as $is) {
118 // analyse the styles
119 foreach (preg_split("/[,;]/", $is->style) as $style) {
120 $this->_prepare_style($style, $is);
121 if ($style)
122 $this->commonstyles[$style] = $is;
123 }
124 }
125 // get characterstyles
126 $dao = DAO::getDAO('characterstyles');
127 $css = $dao->findMany('status > 0');
128 foreach ($css as $cs) {
129 foreach (preg_split("/[,;]/", $cs->style) as $style) {
130 $this->_prepare_style($style, $cs);
131 if ($style)
132 $this->commonstyles[$style] = $cs;
133 }
134 }
135 }
136 $phpversion = explode('.', PHP_VERSION);
137 $this->_init_class($class);
138 $this->mainclass = $class;
139 } //end of init()
140
141 /**
142 * Analyse du contenu XHTML de l'entité
143 *
144 * Parse the XHTML contents of the entity and send the data to the $handler object
145 * This function is a hard piece of work.
146 * I choose not to go to DOM too avoid using lot of memory and processing
147 * to build the tree but I'm not sure to perform much better here.
148 * @param string $string the string to parse
149 * @param object &$handler the handler of the parser
150 */
151 function parse($string, &$handler)
152 {
153 $this->handler = &$handler; // non-reentrant
154 # ! Pay attention to the following line !
155 $this->handler->commonstyles = $this->commonstyles; //get the styles used to parse the doc
156 $arr = preg_split("/<(\/?)soo:block(>|\s+class=\"[^\"]*\"\s*>)/", $string, -1, PREG_SPLIT_DELIM_CAPTURE); //split the string using the ServOO block <soo:block>
157 //$arr contain now all the elements of the doc
158 /* the array is constructed like the following
159 [1] =>
160 [2] => tablefieldsvo Object or internalstylesvo Object
161 [3] => content
162 [4] => / (end of the object)
163 [5] => tablefieldsvo Object or internalstylesvo Object
164 this explain why in the loop the iterator on the object is incremented by 3.
165 */
166
167 unset ($string); // save memory
168 $this->_objectize($arr, true); // make object whereever it is possible.
169
170 // second pass
171 // process the internalstyles
172 // this is an hard piece of code doing no so much... but I find no better way.
173 $isave = false;
174 $n = count($arr);
175 for ($i = 1; $i < $n; $i += 3) { //for each block element
176 if ($arr[$i] == "/" || !is_object($arr[$i +1]))
177 continue;
178 $obj = & $arr[$i +1];
179 $class = strtolower(get_class($obj));
180 if (!$isave && $class == 'internalstylesvo') {
181 $forcenext = false;
182 if ($obj->surrounding == "-*") {
183 // check what is the previous on.
184 if ($arr[$i -3] == "/" && strtolower(get_class($arr[$i -2])) == 'tablefieldsvo') {
185 // good, put the closing tag further
186 $closing = array_splice($arr, $i -3, 3);
187 $i += 3;
188 array_splice($arr, $i, 0, $closing); // put after the closing internalstyles
189 } else {
190 $forcenext = true;
191 }
192 continue;
193 }
194 if ($forcenext || $obj->surrounding == "*-") {
195 $isave = $i; // where to insert the next opening
196 } else {
197 // surrounding is a proper tag
198 $obj = & $this->commonstyles[$obj->surrounding];
199 array_splice($arr, $i, 0, array ('', $obj, '')); // opening tag
200 $i += 3;
201 $n += 3;
202 array_splice($arr, $i +6, 0, array ("/", $obj, '')); // closing tag after the closing internal tag
203 $n += 3;
204 }
205 } else
206 if ($class == 'tablefieldsvo' && $isave) {
207 // put the opening at $isave
208 $arr[$i -1] .= $arr[$i +2]; // copy data. This is not the most efficient, must the nicest way to do
209 $arr[$i +2] = '';
210 $opening = array_splice($arr, $i, 3);
211 array_splice($arr, $isave, 0, $opening);
212 $isave = false;
213 }
214 } // end for
215
216 // deal with the non-greedy internalstyle
217 // data using such style and containing table, lists, ... are modified : those elements are excluded
218 // a little bit hard, isn't it ?
219 for ($i = 1; $i < $n; $i += 3) {
220 if (!is_object($arr[$i +1]))
221 continue;
222 $obj = & $arr[$i +1];
223 $class = strtolower(get_class($obj));
224 if ($class != 'internalstylesvo' || $obj->greedy)
225 continue;
226 if ($arr[$i] == "/") {
227 // closing
228 } else {
229 $arr2 = preg_split("/(<\/?)((?:table|ul|ol|dl|dd|pre|blockquote|object)\b[^>]*>)/", $arr[$i +2], -1, PREG_SPLIT_DELIM_CAPTURE);
230 $arr[$i +2] = $arr2[0];
231 $m = count($arr2);
232 $blockel = 0;
233 for ($j = 1; $j < $m; $j += 3) {
234 if ($arr2[$j] == '</') { // closing
235 $arr[$i +2] .= $arr2[$j].$arr2[$j +1];
236 $blockel --;
237 if ($blockel == 0) {
238 array_splice($arr, $i +3, 0, array ("", $obj, "")); // opening tag
239 $i += 3;
240 $n += 3;
241 }
242 } else {
243 // opening
244 $blockel ++;
245 if ($blockel == 1) {
246 array_splice($arr, $i +3, 0, array ("/", $obj, '')); // closing tag
247 $i += 3;
248 $n += 3;
249 }
250 $arr[$i +2] .= $arr2[$j].$arr2[$j +1];
251 }
252 $arr[$i +2] .= $arr2[$j +2];
253 }
254 } // opening
255 }
256 // proper parser. Launch the handlers
257 $datastack = array ();
258 $classstack = array (array ($this->mainclass, 'entities'));
259 $handler->openClass($classstack[0]);
260 $this->nbdoc = 0;
261
262 for ($i = 1; $i < $n; $i += 3) {
263 $this->_parseOneStep($arr, $i, $datastack, $classstack, 'block');
264
265 $larr = preg_split("/<(\/)?soo:inline(>|\s+class=\"[^\"]*\"\s*>)/", $arr[$i +2], -1, PREG_SPLIT_DELIM_CAPTURE);
266 $nj = count($larr);
267 $datastack[0] .= $larr[0];
268 if ($nj > 1) {
269 $this->_objectize($larr, false);
270 for ($j = 1; $j < $nj; $j += 3) {
271 $this->_parseOneStep($larr, $j, $datastack, $classstack, 'inline');
272 $datastack[0] .= $larr[$j +2];
273 }
274 }
275 }
276 // close the last tags
277 while ($classstack) {
278 $handler->closeClass(array_shift($classstack), $this->nbdoc > 1);
279 }
280
281 } // end of function parser
282
283 /**
284 * Première étape du parser
285 *
286 * do one step of the parser.
287 * 1/ call the handler corresponding to the current style/tag/object
288 * 2/ change the context if required
289 * 3/ feed the datastack
290 *
291 * @param array &$arr ??
292 * @param integer $i ??
293 * @param array &$datastack pile des données
294 * @param array &$classstack pile des classes utilisées
295 * @param string $level can be inline or ?
296 * @access private
297 */
298 function _parseOneStep(& $arr, $i, & $datastack, & $classstack, $level)
299 {
300 //echo $classstack[0];
301 $opening = $arr[$i] != "/";
302 $obj = & $arr[$i +1];
303 if (((!$opening && isset($arr[$i +4]) && $obj == $arr[$i +4]) || ($opening && isset($arr[$i -2]) && $obj == $arr[$i -2])) && (strtolower(get_class($obj)) != 'internalstylesvo' || $obj->greedy)) {
304 // current closing equals next opening
305 // or current opening equals last closing
306 return;
307 }
308 if(!isset($datastack[0])) $datastack[0] = '';
309 if (!is_object($obj)) {
310 // unknow style
311 if ($opening) {
312 if ($level == 'inline') {
313 array_unshift($datastack, '');
314 } else {
315 $datastack[0] = "";
316 }
317 } elseif ($obj == 'documents') {
318 // do nothing
319 } else {
320 if ($level == 'inline') {
321 $data = array_shift($datastack);
322 $datastack[0] .= $this->handler->unknownCharacterStyle($obj, $data);
323 } else {
324 // close up to the base
325 while (count($classstack) > 1) {
326 $this->handler->closeClass($classstack[0]);
327 array_shift($classstack);
328 }
329 $datastack[0] = $this->handler->unknownParagraphStyle($obj, $datastack[0]);
330 }
331 }
332 return;
333 }
334 $class = strtolower(get_class($obj));
335 switch ($class) {
336 case 'internalstylesvo' :
337 case 'characterstylesvo' :
338 if ($opening) {
339 array_unshift($datastack, '');
340 } else {
341 $call = 'process'. substr($class, 0, -2);
342 #echo count($datastack);
343 $data = array_shift($datastack);
344 $datastack[0] .= $this->handler->$call ($obj, $data); // call the method associated with the object class
345 }
346 break;
347 case 'tablefieldsvo' :
348 $cstyles = & $this->contextstyles[$classstack[0][0]];
349 if (empty($cstyles[$obj->style])) { // context change ?
350 $this->handler->closeClass($classstack[0]);
351 $cl = array_shift($classstack);
352 if (empty($this->contextstyles[$cl[0]][$obj->style])) {
353 // must be in the context below
354 // if not... problem.
355 }
356 // new context
357 }
358 if ($opening) {
359 if ($obj->g_name == 'dc.title' && count($classstack) == 1) {
360 $this->nbdoc++;
361 if ($this->nbdoc > 1) {
362 $this->handler->closeClass($classstack[0], true);
363 $this->handler->openClass($classstack[0], null, true);
364 }
365 }
366 array_unshift($datastack, '');
367 } else {
368 $data = array_shift($datastack);
369 $datastack[0] .= $this->handler->processTableFields($obj, $data); // call the method associated with the object class
370 }
371 break;
372 case 'entrytypesvo' :
373 case 'persontypesvo' :
374 if ($opening) { // opening. Switch the lowest context
375 // close up to the base
376 while (count($classstack) > 1) {
377 $this->handler->closeClass($classstack[0]);
378 array_shift($classstack);
379 }
380 array_unshift($datastack, "");
381
382 // change the context
383 $classtype = $class == 'entrytypesvo' ? 'entries' : 'persons';
384 array_unshift($classstack, array ($obj->class, $classtype));
385 $this->handler->openClass($classstack[0], $obj);
386 } else {
387 $call = 'process'. substr($class, 0, -2);
388 $this->handler->$call ($obj, $datastack[0]); // call the method associated with the object class
389 $datastack[0] = '';
390 }
391 break;
392 default :
393 trigger_error("ERROR: internal error in XMLImportParser::parse. Unknown class $class", E_USER_ERROR);
394 }
395 } //end of _parse_step_one
396
397 /**
398 * Initialise la classe en cherchant à savoir ce qu'il faut faire avec les styles
399 * détectés. (suivant les champs définies dans le ME).
400 *
401 * Gather information from tablefield to know what to do with the various styles.
402 * class is the context and criteria is the where to select the tablefields
403 *
404 * @param string $class the name of the class to init
405 * @param string $criteria the possible SQL criterions (by default empty)
406 */
407 function _init_class($class, $criteria = '')
408 {
409 global $phpversion;
410 // if(!function_exists('clone')) // pour pouvoir utiliser clone en php5
411 // require 'php4.inc.php';
412
413 if (isset($this->contextstyles[$class]))
414 return; // already done
415
416 // get all the information from the database for all the fields
417 $dao = DAO::getDAO('tablefields');
418 if (!$criteria) {
419 $criteria = "class='".$class."'";
420 }
421 $tfs = $dao->findMany("(".$criteria.") AND status>0");
422
423 // create an assoc array style => tf information
424 foreach ($tfs as $tf) {
425 // is it an index ?
426 if ($tf->type == 'entries' || $tf->type == 'persons') {
427 // yes, it's an index. Get the object
428 $dao = DAO::getDAO($tf->type == 'entries' ? 'entrytypes' : 'persontypes');
429 $tf = $dao->find("type='".$tf->name."'");
430 $this->_init_class($tf->class, "class='".$tf->class."' OR class='entities_".$tf->class. "'");
431 }
432 // analyse the styles of the tablefields
433 foreach (preg_split("/[,;]/", $tf->style) as $style) {
434 $tf2 = clone ($tf);
435 $this->_prepare_style($style, $tf2);
436 if ($style)
437 $this->commonstyles[$style] = $this->contextstyles[$class][$style] = $tf2;
438 }
439 }
440 } //end of init_class
441
442 /**
443 * Prépare un style pour stocakge en détectant les synonymes et les langues d'un style
444 * Prepare the style for storage detecting synonyms and same language style
445 *
446 * @param string &$style the name of the style
447 * @param object &$object the VO corresponding to the style
448 */
449 function _prepare_style(& $style, & $obj)
450 {
451 $style = strtolower(trim($style));
452 // style synonyme. take the first one
453 @list ($style, $lang) = explode(":", $style);
454 if ($lang) {
455 $obj->lang = $lang;
456 $obj->style = $style;
457 } else {
458 // style synonyme. take the first one
459 $obj->style = preg_replace("/[:,;].*$/", '', $obj->style);
460 }
461 }
462
463 /**
464 * Remplace un style par un objet quand c'est possible
465 *
466 * Replace style by object whenever it is possible
467 *
468 * @param array &$arr an array containing all the elements of a doc
469 * @param boolean $blockstyle true if the style is a block style and false if not
470 */
471 function _objectize(& $arr, $blockstyle)
472 {
473 $stylesstack = array ();
474 $n = count($arr);
475 for ($i = 1; $i < $n; $i += 3) {
476 $opening = $arr[$i] != "/";
477 if ($opening) { // opening tag
478 if (!preg_match("/class=\"([^\"]*)\"/", $arr[$i +1], $result))
479 trigger_error("ERROR: in _objectize", E_USER_ERROR);
480 $name = preg_replace("/\W/", "", makeSortKey($result[1]));
481 $obj = & $this->commonstyles[$blockstyle ? $name : ".".$name];
482 if ($obj) {
483 $arr[$i +1] = & $obj;
484 } else {
485 $arr[$i +1] = $name;
486 }
487 array_push($stylesstack, $arr[$i +1]);
488 } else { // closingtag
489 $arr[$i +1] = array_pop($stylesstack);
490 continue; // nothing to do
491 }
492 }
493 if ($stylesstack) {
494 print_r($arr);
495 print_r($stylesstack);
496 trigger_error("ERROR: XML is likely invalid in XMLImportParser::_objectize", E_USER_ERROR);
497 }
498 }
499 } // end of class XMLImportParser
500 ?>