d279cb0d3705dfd99324ec09a53a23019558fa94
2 // +-----------------------------------------------------------------------+
3 // | Copyright (c) 2002 Richard Heyes |
4 // | All rights reserved. |
6 // | Redistribution and use in source and binary forms, with or without |
7 // | modification, are permitted provided that the following conditions |
10 // | o Redistributions of source code must retain the above copyright |
11 // | notice, this list of conditions and the following disclaimer. |
12 // | o Redistributions in binary form must reproduce the above copyright |
13 // | notice, this list of conditions and the following disclaimer in the |
14 // | documentation and/or other materials provided with the distribution.|
15 // | o The names of the authors may not be used to endorse or promote |
16 // | products derived from this software without specific prior written |
19 // | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 // | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 // | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 // | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 // | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 // | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 // | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 // | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 // | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 // | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 // | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
31 // +-----------------------------------------------------------------------+
32 // | Author: Richard Heyes <richard@phpguru.org> |
33 // +-----------------------------------------------------------------------+
35 // path modified by Ghislain / Pierre-Alain
36 require_once 'PEAR/PEAR.php';
39 * +----------------------------- IMPORTANT ------------------------------+
40 * | Usage of this class compared to native php extensions such as |
41 * | mailparse or imap, is slow and may be feature deficient. If available|
42 * | you are STRONGLY recommended to use the php extensions. |
43 * +----------------------------------------------------------------------+
47 * This class will parse a raw mime email and return
48 * the structure. Returned structure is similar to
49 * that returned by imap_fetchstructure().
51 * USAGE: (assume $input is your raw email)
53 * $decode = new Mail_mimeDecode($input, "\r\n");
54 * $structure = $decode->decode();
55 * print_r($structure);
59 * $params['input'] = $input;
60 * $structure = Mail_mimeDecode::decode($params);
61 * print_r($structure);
64 * - Implement further content types, eg. multipart/parallel,
65 * perhaps even message/partial.
67 * @author Richard Heyes <richard@phpguru.org>
68 * @version $Revision: 4409 $
72 class Mail_mimeDecode
extends PEAR
76 * The raw email to decode
82 * The header part of the input
88 * The body part of the input
94 * If an error occurs, this is used to store the message
100 * Flag to determine whether to include bodies in the
104 var $_include_bodies;
107 * Flag to determine whether to decode bodies
113 * Flag to determine whether to decode headers
116 var $_decode_headers;
119 * If invoked from a class, $this will be set. This has problematic
120 * connotations for calling decode() statically. Hence this variable
121 * is used to determine if we are indeed being called statically or
129 * Sets up the object, initialise the variables, and splits and
130 * stores the header and body of the input.
132 * @param string The input to decode
135 function Mail_mimeDecode($input)
137 list($header, $body) = $this->_splitBodyHeader($input);
139 $this->_input
= $input;
140 $this->_header
= $header;
141 $this->_body
= $body;
142 $this->_decode_bodies
= false
;
143 $this->_include_bodies
= true
;
145 $this->mailMimeDecode
= true
;
149 * Begins the decoding process. If called statically
150 * it will create an object and call the decode() method
153 * @param array An array of various parameters that determine
155 * include_bodies - Whether to include the body in the returned
157 * decode_bodies - Whether to decode the bodies
158 * of the parts. (Transfer encoding)
159 * decode_headers - Whether to decode headers
160 * input - If called statically, this will be treated
162 * @return object Decoded results
165 function decode($params = null
)
168 // Have we been called statically? If so, create an object and pass details to that.
169 if (!isset($this->mailMimeDecode
) AND isset($params['input'])) {
171 $obj = new Mail_mimeDecode($params['input']);
172 $structure = $obj->decode($params);
174 // Called statically but no input
175 } elseif (!isset($this->mailMimeDecode
)) {
176 return PEAR
::raiseError('Called statically and no input given');
178 // Called via an object
180 $this->_include_bodies
= isset($params['include_bodies']) ?
$params['include_bodies'] : false
;
181 $this->_decode_bodies
= isset($params['decode_bodies']) ?
$params['decode_bodies'] : false
;
182 $this->_decode_headers
= isset($params['decode_headers']) ?
$params['decode_headers'] : false
;
184 $structure = $this->_decode($this->_header
, $this->_body
);
185 if ($structure === false
) {
186 $structure = $this->raiseError($this->_error
);
194 * Performs the decoding. Decodes the body string passed to it
195 * If it finds certain content-types it will call itself in a
198 * @param string Header section
199 * @param string Body section
200 * @return object Results of decoding process
203 function _decode($headers, $body, $default_ctype = 'text/plain')
205 $return = new stdClass
;
206 $headers = $this->_parseHeaders($headers);
208 foreach ($headers as $value) {
209 if (isset($return->headers
[strtolower($value['name'])]) AND !is_array($return->headers
[strtolower($value['name'])])) {
210 $return->headers
[strtolower($value['name'])] = array($return->headers
[strtolower($value['name'])]);
211 $return->headers
[strtolower($value['name'])][] = $value['value'];
213 } elseif (isset($return->headers
[strtolower($value['name'])])) {
214 $return->headers
[strtolower($value['name'])][] = $value['value'];
217 $return->headers
[strtolower($value['name'])] = $value['value'];
222 while (list($key, $value) = each($headers)) {
223 $headers[$key]['name'] = strtolower($headers[$key]['name']);
224 switch ($headers[$key]['name']) {
227 $content_type = $this->_parseHeaderValue($headers[$key]['value']);
229 if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) {
230 $return->ctype_primary
= $regs[1];
231 $return->ctype_secondary
= $regs[2];
234 if (isset($content_type['other'])) {
235 while (list($p_name, $p_value) = each($content_type['other'])) {
236 $return->ctype_parameters
[$p_name] = $p_value;
241 case 'content-disposition';
242 $content_disposition = $this->_parseHeaderValue($headers[$key]['value']);
243 $return->disposition
= $content_disposition['value'];
244 if (isset($content_disposition['other'])) {
245 while (list($p_name, $p_value) = each($content_disposition['other'])) {
246 $return->d_parameters
[$p_name] = $p_value;
251 case 'content-transfer-encoding':
252 $content_transfer_encoding = $this->_parseHeaderValue($headers[$key]['value']);
257 if (isset($content_type)) {
258 switch (strtolower($content_type['value'])) {
260 $encoding = isset($content_transfer_encoding) ?
$content_transfer_encoding['value'] : '7bit';
261 $this->_include_bodies ?
$return->body
= ($this->_decode_bodies ?
$this->_decodeBody($body, $encoding) : $body) : null
;
265 $encoding = isset($content_transfer_encoding) ?
$content_transfer_encoding['value'] : '7bit';
266 $this->_include_bodies ?
$return->body
= ($this->_decode_bodies ?
$this->_decodeBody($body, $encoding) : $body) : null
;
269 case 'multipart/parallel':
270 case 'multipart/report': // RFC1892
271 case 'multipart/signed': // PGP
272 case 'multipart/digest':
273 case 'multipart/alternative':
274 case 'multipart/related':
275 case 'multipart/mixed':
276 if(!isset($content_type['other']['boundary'])){
277 $this->_error
= 'No boundary found for ' . $content_type['value'] . ' part';
281 $default_ctype = (strtolower($content_type['value']) === 'multipart/digest') ?
'message/rfc822' : 'text/plain';
283 $parts = $this->_boundarySplit($body, $content_type['other']['boundary']);
284 for ($i = 0; $i < count($parts); $i++
) {
285 list($part_header, $part_body) = $this->_splitBodyHeader($parts[$i]);
286 $part = $this->_decode($part_header, $part_body, $default_ctype);
288 $part = $this->raiseError($this->_error
);
289 $return->parts
[] = $part;
293 case 'message/rfc822':
294 $obj = &new Mail_mimeDecode($body);
295 $return->parts
[] = $obj->decode(array('include_bodies' => $this->_include_bodies
));
300 if(!isset($content_transfer_encoding['value']))
301 $content_transfer_encoding['value'] = '7bit';
302 $this->_include_bodies ?
$return->body
= ($this->_decode_bodies ?
$this->_decodeBody($body, $content_transfer_encoding['value']) : $body) : null
;
307 $ctype = explode('/', $default_ctype);
308 $return->ctype_primary
= $ctype[0];
309 $return->ctype_secondary
= $ctype[1];
310 $this->_include_bodies ?
$return->body
= ($this->_decode_bodies ?
$this->_decodeBody($body) : $body) : null
;
317 * Given the output of the above function, this will return an
318 * array of references to the parts, indexed by mime number.
320 * @param object $structure The structure to go through
321 * @param string $mime_number Internal use only.
322 * @return array Mime numbers
324 function &getMimeNumbers(&$structure, $no_refs = false
, $mime_number = '', $prepend = '')
327 if (!empty($structure->parts
)) {
328 if ($mime_number != '') {
329 $structure->mime_id
= $prepend . $mime_number;
330 $return[$prepend . $mime_number] = &$structure;
332 for ($i = 0; $i < count($structure->parts
); $i++
) {
335 if (!empty($structure->headers
['content-type']) AND substr(strtolower($structure->headers
['content-type']), 0, 8) == 'message/') {
336 $prepend = $prepend . $mime_number . '.';
339 $_mime_number = ($mime_number == '' ?
$i +
1 : sprintf('%s.%s', $mime_number, $i +
1));
342 $arr = &Mail_mimeDecode
::getMimeNumbers($structure->parts
[$i], $no_refs, $_mime_number, $prepend);
343 foreach ($arr as $key => $val) {
344 $no_refs ?
$return[$key] = '' : $return[$key] = &$arr[$key];
348 if ($mime_number == '') {
351 $structure->mime_id
= $prepend . $mime_number;
352 $no_refs ?
$return[$prepend . $mime_number] = '' : $return[$prepend . $mime_number] = &$structure;
359 * Given a string containing a header and body
360 * section, this function will split them (at the first
361 * blank line) and return them.
363 * @param string Input to split apart
364 * @return array Contains header and body section
367 function _splitBodyHeader($input)
369 if (preg_match("/^(.*?)\r?\n\r?\n(.*)/s", $input, $match)) {
370 return array($match[1], $match[2]);
372 $this->_error
= 'Could not split header and body';
377 * Parse headers given in $input and return
380 * @param string Headers to parse
381 * @return array Contains parsed headers
384 function _parseHeaders($input)
389 $input = preg_replace("/\r?\n/", "\r\n", $input);
390 $input = preg_replace("/\r\n(\t| )+/", ' ', $input);
391 $headers = explode("\r\n", trim($input));
393 foreach ($headers as $value) {
394 $hdr_name = substr($value, 0, $pos = strpos($value, ':'));
395 $hdr_value = substr($value, $pos+
1);
396 if($hdr_value[0] == ' ')
397 $hdr_value = substr($hdr_value, 1);
401 'value' => $this->_decode_headers ?
$this->_decodeHeader($hdr_value) : $hdr_value
412 * Function to parse a header value,
413 * extract first part, and any secondary
414 * parts (after ;) This function is not as
415 * robust as it could be. Eg. header comments
416 * in the wrong place will probably break it.
418 * @param string Header value to parse
419 * @return array Contains parsed result
422 function _parseHeaderValue($input)
425 if (($pos = strpos($input, ';')) !== false
) {
427 $return['value'] = trim(substr($input, 0, $pos));
428 $input = trim(substr($input, $pos+
1));
430 if (strlen($input) > 0) {
432 // This splits on a semi-colon, if there's no preceeding backslash
433 // Can't handle if it's in double quotes however. (Of course anyone
434 // sending that needs a good slap).
435 $parameters = preg_split('/\s*(?<!\\\\);\s*/i', $input);
437 for ($i = 0; $i < count($parameters); $i++
) {
438 $param_name = substr($parameters[$i], 0, $pos = strpos($parameters[$i], '='));
439 $param_value = substr($parameters[$i], $pos +
1);
440 if ($param_value[0] == '"') {
441 $param_value = substr($param_value, 1, -1);
443 $return['other'][$param_name] = $param_value;
444 $return['other'][strtolower($param_name)] = $param_value;
448 $return['value'] = trim($input);
455 * This function splits the input based
456 * on the given boundary
458 * @param string Input to parse
459 * @return array Contains array of resulting mime parts
462 function _boundarySplit($input, $boundary)
464 $tmp = explode('--'.$boundary, $input);
466 for ($i=1; $i<count($tmp)-1; $i++
) {
474 * Given a header, this function will decode it
475 * according to RFC2047. Probably not *exactly*
476 * conformant, but it does pass all the given
477 * examples (in RFC2047).
479 * @param string Input header value to decode
480 * @return string Decoded header value
483 function _decodeHeader($input)
485 // Remove white space between encoded-words
486 $input = preg_replace('/(=\?[^?]+\?(q|b)\?[^?]*\?=)(\s)+=\?/i', '\1=?', $input);
488 // For each encoded-word...
489 while (preg_match('/(=\?([^?]+)\?(q|b)\?([^?]*)\?=)/i', $input, $matches)) {
491 $encoded = $matches[1];
492 $charset = $matches[2];
493 $encoding = $matches[3];
496 switch (strtolower($encoding)) {
498 $text = base64_decode($text);
502 $text = str_replace('_', ' ', $text);
503 preg_match_all('/=([a-f0-9]{2})/i', $text, $matches);
504 foreach($matches[1] as $value)
505 $text = str_replace('='.$value, chr(hexdec($value)), $text);
509 $input = str_replace($encoded, $text, $input);
516 * Given a body string and an encoding type,
517 * this function will decode and return it.
519 * @param string Input body to decode
520 * @param string Encoding type to use.
521 * @return string Decoded body
524 function _decodeBody($input, $encoding = '7bit')
531 case 'quoted-printable':
532 return $this->_quotedPrintableDecode($input);
536 return base64_decode($input);
545 * Given a quoted-printable string, this
546 * function will decode and return it.
548 * @param string Input body to decode
549 * @return string Decoded body
552 function _quotedPrintableDecode($input)
554 // Remove soft line breaks
555 $input = preg_replace("/=\r?\n/", '', $input);
557 // Replace encoded characters
558 $input = preg_replace('/=([a-f0-9]{2})/ie', "chr(hexdec('\\1'))", $input);
564 * Checks the input for uuencoded files and returns
565 * an array of them. Can be called statically, eg:
567 * $files =& Mail_mimeDecode::uudecode($some_text);
569 * It will check for the begin 666 ... end syntax
570 * however and won't just blindly decode whatever you
573 * @param string Input body to look for attahcments in
574 * @return array Decoded bodies, filenames and permissions
578 function &uudecode($input)
580 // Find all uuencoded sections
581 preg_match_all("/begin ([0-7]{3}) (.+)\r?\n(.+)\r?\nend/Us", $input, $matches);
583 for ($j = 0; $j < count($matches[3]); $j++
) {
585 $str = $matches[3][$j];
586 $filename = $matches[2][$j];
587 $fileperm = $matches[1][$j];
590 $str = preg_split("/\r?\n/", trim($str));
591 $strlen = count($str);
593 for ($i = 0; $i < $strlen; $i++
) {
596 $len=(int)(((ord(substr($str[$i],0,1)) -32) - ' ') & 077);
598 while (($d +
3 <= $len) AND ($pos +
4 <= strlen($str[$i]))) {
599 $c0 = (ord(substr($str[$i],$pos,1)) ^
0x20);
600 $c1 = (ord(substr($str[$i],$pos+
1,1)) ^
0x20);
601 $c2 = (ord(substr($str[$i],$pos+
2,1)) ^
0x20);
602 $c3 = (ord(substr($str[$i],$pos+
3,1)) ^
0x20);
603 $file .= chr(((($c0 - ' ') & 077) << 2) |
((($c1 - ' ') & 077) >> 4));
605 $file .= chr(((($c1 - ' ') & 077) << 4) |
((($c2 - ' ') & 077) >> 2));
607 $file .= chr(((($c2 - ' ') & 077) << 6) |
(($c3 - ' ') & 077));
613 if (($d +
2 <= $len) && ($pos +
3 <= strlen($str[$i]))) {
614 $c0 = (ord(substr($str[$i],$pos,1)) ^
0x20);
615 $c1 = (ord(substr($str[$i],$pos+
1,1)) ^
0x20);
616 $c2 = (ord(substr($str[$i],$pos+
2,1)) ^
0x20);
617 $file .= chr(((($c0 - ' ') & 077) << 2) |
((($c1 - ' ') & 077) >> 4));
619 $file .= chr(((($c1 - ' ') & 077) << 4) |
((($c2 - ' ') & 077) >> 2));
625 if (($d +
1 <= $len) && ($pos +
2 <= strlen($str[$i]))) {
626 $c0 = (ord(substr($str[$i],$pos,1)) ^
0x20);
627 $c1 = (ord(substr($str[$i],$pos+
1,1)) ^
0x20);
628 $file .= chr(((($c0 - ' ') & 077) << 2) |
((($c1 - ' ') & 077) >> 4));
632 $files[] = array('filename' => $filename, 'fileperm' => $fileperm, 'filedata' => $file);
639 * getSendArray() returns the arguments required for Mail::send()
640 * used to build the arguments for a mail::send() call
643 * $mailtext = Full email (for example generated by a template)
644 * $decoder = new Mail_mimeDecode($mailtext);
645 * $parts = $decoder->getSendArray();
646 * if (!PEAR::isError($parts) {
647 * list($recipents,$headers,$body) = $parts;
648 * $mail = Mail::factory('smtp');
649 * $mail->send($recipents,$headers,$body);
651 * echo $parts->message;
653 * @return mixed array of recipeint, headers,body or Pear_Error
655 * @author Alan Knowles <alan@akbkhome.com>
657 function getSendArray()
659 // prevent warning if this is not set
660 $this->_decode_headers
= FALSE
;
661 $headerlist =$this->_parseHeaders($this->_header
);
664 return $this->raiseError("Message did not contain headers");
666 foreach($headerlist as $item) {
667 $header[$item['name']] = $item['value'];
668 switch (strtolower($item['name'])) {
672 $to = ",".$item['value'];
678 return $this->raiseError("Message did not contain any recipents");
681 return array($to,$header,$this->_body
);
693 * Returns a xml copy of the output of
694 * Mail_mimeDecode::decode. Pass the output in as the
695 * argument. This function can be called statically. Eg:
697 * $output = $obj->decode();
698 * $xml = Mail_mimeDecode::getXML($output);
700 * The DTD used for this should have been in the package. Or
701 * alternatively you can get it from cvs, or here:
702 * http://www.phpguru.org/xmail/xmail.dtd.
704 * @param object Input to convert to xml. This should be the
705 * output of the Mail_mimeDecode::decode function
706 * @return string XML version of input
709 function getXML($input)
712 $output = '<?xml version=\'1.0\'?>' . $crlf .
713 '<!DOCTYPE email SYSTEM "http://www.phpguru.org/xmail/xmail.dtd">' . $crlf .
715 Mail_mimeDecode
::_getXML($input) .
722 * Function that does the actual conversion to xml. Does a single
723 * mimepart at a time.
725 * @param object Input to convert to xml. This is a mimepart object.
726 * It may or may not contain subparts.
727 * @param integer Number of tabs to indent
728 * @return string XML version of input
731 function _getXML($input, $indent = 1)
736 $headers = @(array)$input->headers
;
738 foreach ($headers as $hdr_name => $hdr_value) {
740 // Multiple headers with this name
741 if (is_array($headers[$hdr_name])) {
742 for ($i = 0; $i < count($hdr_value); $i++
) {
743 $output .= Mail_mimeDecode
::_getXML_helper($hdr_name, $hdr_value[$i], $indent);
746 // Only one header of this sort
748 $output .= Mail_mimeDecode
::_getXML_helper($hdr_name, $hdr_value, $indent);
752 if (!empty($input->parts
)) {
753 for ($i = 0; $i < count($input->parts
); $i++
) {
754 $output .= $crlf . str_repeat($htab, $indent) . '<mimepart>' . $crlf .
755 Mail_mimeDecode
::_getXML($input->parts
[$i], $indent+
1) .
756 str_repeat($htab, $indent) . '</mimepart>' . $crlf;
758 } elseif (isset($input->body
)) {
759 $output .= $crlf . str_repeat($htab, $indent) . '<body><![CDATA[' .
760 $input->body
. ']]></body>' . $crlf;
767 * Helper function to _getXML(). Returns xml of a header.
769 * @param string Name of header
770 * @param string Value of header
771 * @param integer Number of tabs to indent
772 * @return string XML version of input
775 function _getXML_helper($hdr_name, $hdr_value, $indent)
781 $new_hdr_value = ($hdr_name != 'received') ? Mail_mimeDecode
::_parseHeaderValue($hdr_value) : array('value' => $hdr_value);
782 $new_hdr_name = str_replace(' ', '-', ucwords(str_replace('-', ' ', $hdr_name)));
784 // Sort out any parameters
785 if (!empty($new_hdr_value['other'])) {
786 foreach ($new_hdr_value['other'] as $paramname => $paramvalue) {
787 $params[] = str_repeat($htab, $indent) . $htab . '<parameter>' . $crlf .
788 str_repeat($htab, $indent) . $htab . $htab . '<paramname>' . htmlspecialchars($paramname) . '</paramname>' . $crlf .
789 str_repeat($htab, $indent) . $htab . $htab . '<paramvalue>' . htmlspecialchars($paramvalue) . '</paramvalue>' . $crlf .
790 str_repeat($htab, $indent) . $htab . '</parameter>' . $crlf;
793 $params = implode('', $params);
798 $return = str_repeat($htab, $indent) . '<header>' . $crlf .
799 str_repeat($htab, $indent) . $htab . '<headername>' . htmlspecialchars($new_hdr_name) . '</headername>' . $crlf .
800 str_repeat($htab, $indent) . $htab . '<headervalue>' . htmlspecialchars($new_hdr_value['value']) . '</headervalue>' . $crlf .
802 str_repeat($htab, $indent) . '</header>' . $crlf;