Lines Matching full:charset
20 from email.charset import Charset
30 USASCII = Charset('us-ascii')
31 UTF8 = Charset('utf-8')
33 # Match encoded-word strings in the form =?charset?q?Hello_World?=
36 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
62 """Decode a message header value without converting charset.
64 Returns a list of (decoded_string, charset) pairs containing each of the
65 decoded parts of the header. Charset is None for non-encoded parts of the
93 charset, encoding = [s.lower() for s in parts[0:2]]
112 if decoded and decoded[-1][1] == charset:
115 decoded.append((dec, charset))
126 pairs of the format (decoded_string, charset) where charset is the string
135 for s, charset in decoded_seq:
137 if charset is not None and not isinstance(charset, Charset):
138 charset = Charset(charset)
139 h.append(s, charset)
145 def __init__(self, s=None, charset=None, argument
155 Optional charset serves two purposes: it has the same meaning as the
156 charset argument to the .append() method. It also sets the default
157 character set for all subsequent .append() calls that omit the charset
158 argument. If charset is not provided in the constructor, the us-ascii
159 charset is used both as s's initial charset and as the default for
173 if charset is None:
174 charset = USASCII
175 if not isinstance(charset, Charset):
176 charset = Charset(charset)
177 self._charset = charset
183 self.append(s, charset, errors)
206 for s, charset in self._chunks:
209 # from a charset to None/us-ascii, or from None/us-ascii to a
210 # charset. Only do this for the second and subsequent chunks.
211 nextcs = charset
220 uchunks.append(unicode(s, str(charset)))
233 def append(self, s, charset=None, errors='strict'): argument
236 Optional charset, if given, should be a Charset instance or the name
237 of a character set (which will be converted to a Charset instance). A
238 value of None (the default) means that the charset given in the
242 (i.e. isinstance(s, str) is true), then charset is the encoding of
244 cannot be decoded with that charset. If s is a Unicode string, then
245 charset is a hint specifying the character set of the characters in
248 following charsets in order: us-ascii, the charset hint, utf-8. The
254 if charset is None:
255 charset = self._charset
256 elif not isinstance(charset, Charset):
257 charset = Charset(charset)
258 # If the charset is our faux 8bit charset, leave the string unchanged
259 if charset != '8bit':
262 # charset.
265 # converted to a unicode with the input codec of the charset.
266 incodec = charset.input_codec or 'us-ascii'
271 outcodec = charset.output_codec or 'us-ascii'
277 for charset in USASCII, charset, UTF8:
279 outcodec = charset.output_codec or 'us-ascii'
286 self._chunks.append((s, charset))
288 def _split(self, s, charset, maxlinelen, splitchars): argument
290 splittable = charset.to_splittable(s)
291 encoded = charset.from_splittable(splittable, True)
292 elen = charset.encoded_header_len(encoded)
295 return [(encoded, charset)]
302 if charset == '8bit':
303 return [(s, charset)]
312 # For now, I can only imagine doing this when the charset is us-ascii,
315 elif charset == 'us-ascii':
316 return self._split_ascii(s, charset, maxlinelen, splitchars)
322 first = charset.from_splittable(splittable[:splitpnt], False)
323 last = charset.from_splittable(splittable[splitpnt:], False)
326 first, last = _binsplit(splittable, charset, maxlinelen)
329 fsplittable = charset.to_splittable(first)
330 fencoded = charset.from_splittable(fsplittable, True)
331 chunk = [(fencoded, charset)]
332 return chunk + self._split(last, charset, self._maxlinelen, splitchars)
334 def _split_ascii(self, s, charset, firstlen, splitchars): argument
337 return zip(chunks, [charset]*len(chunks))
342 # Given a list of pairs (string, charset), return a MIME-encoded
358 for header, charset in newchunks:
361 if charset is None or charset.header_encoding is None:
364 s = charset.header_encode(header)
389 If the given charset is not known or an error occurs during
399 for s, charset in self._chunks:
404 if targetlen < charset.encoded_header_len(''):
407 newchunks += self._split(s, charset, targetlen, splitchars)
488 def _binsplit(splittable, charset, maxlinelen): argument
501 chunk = charset.from_splittable(splittable[:m], True)
502 chunklen = charset.encoded_header_len(chunk)
512 first = charset.from_splittable(splittable[:i], False)
513 last = charset.from_splittable(splittable[i:], False)