1 """Some rfc822 functions taken from the new (python2.3) "email" module.
3 __docformat__
= 'restructuredtext'
6 from string
import letters
, digits
7 from binascii
import b2a_base64
, a2b_base64
11 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
13 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
15 (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
17 ''', re
.VERBOSE | re
.IGNORECASE
)
19 hqre
= re
.compile(r
'^[A-z0-9!"#$%%&\'()*+,-./:;<=>?@\[\]^_`{|}~ ]+$')
23 def base64_decode(s
, convert_eols
=None):
24 """Decode a raw base64 string.
26 If convert_eols is set to a string value, all canonical email linefeeds,
27 e.g. "\\r\\n", in the decoded text will be converted to the value of
28 convert_eols. os.linesep is a good choice for convert_eols if you are
29 decoding a text attachment.
31 This function does not parse a full MIME header value encoded with
32 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
33 level email.Header class for that functionality.
35 Taken from 'email' module
42 return dec
.replace(CRLF
, convert_eols
)
45 def unquote_match(match
):
46 """Turn a match in the form ``=AB`` to the ASCII character with value
49 Taken from 'email' module
52 return chr(int(s
[1:3], 16))
55 """Decode a string encoded with RFC 2045 MIME header 'Q' encoding.
57 This function does not parse a full MIME header value encoded with
58 quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
59 the high level email.Header class for that functionality.
61 Taken from 'email' module
63 s
= s
.replace('_', ' ')
64 return re
.sub(r
'=\w{2}', unquote_match
, s
)
66 def _decode_header(header
):
67 """Decode a message header value without converting charset.
69 Returns a list of (decoded_string, charset) pairs containing each of the
70 decoded parts of the header. Charset is None for non-encoded parts of the
71 header, otherwise a lower-case string containing the name of the character
72 set specified in the encoded string.
74 Taken from 'email' module
76 # If no encoding, just return the header
78 if not ecre
.search(header
):
79 return [(header
, None)]
83 for line
in header
.splitlines():
84 # This line might not have an encoding in it
85 if not ecre
.search(line
):
86 decoded
.append((line
, None))
89 parts
= ecre
.split(line
)
94 decoded
.append((unenc
, None))
96 charset
, encoding
= [s
.lower() for s
in parts
[0:2]]
100 dec
= qp_decode(encoded
)
101 elif encoding
== 'b':
102 dec
= base64_decode(encoded
)
106 if decoded
and decoded
[-1][1] == charset
:
107 decoded
[-1] = (decoded
[-1][0] + dec
, decoded
[-1][1])
109 decoded
.append((dec
, charset
))
113 def decode_header(hdr
):
114 """ Decodes rfc2822 encoded header and return utf-8 encoded string
119 for section
in _decode_header(hdr
):
120 charset
= unaliasCharset(section
[1])
121 outs
+= unicode(section
[0], charset
or 'iso-8859-1', 'replace')
122 return outs
.encode('utf-8')
124 def encode_header(header
, charset
='utf-8'):
125 """ Will encode in quoted-printable encoding only if header
126 contains non latin characters
129 # Return empty headers unchanged
133 # return plain header if it is not contains non-ascii characters
134 if hqre
.match(header
):
138 #max_encoded = 76 - len(charset) - 7
140 # Space may be represented as _ instead of =20 for readability
143 # These characters can be included verbatim
144 elif hqre
.match(c
) and c
not in '_=?':
146 # Otherwise, replace with hex value like =E2
148 quoted
+= "=%02X" % ord(c
)
151 return '=?%s?q?%s?=' % (charset
, quoted
)
153 def unaliasCharset(charset
):
155 return charset
.lower().replace("windows-", 'cp')
156 #return charset_table.get(charset.lower(), charset)
160 print encode_header("Contrary, Mary")
161 #print unaliasCharset('Windows-1251')
163 if __name__
== '__main__':