Lines Matching full:charset
20 from email import charset as _charset
21 Charset = _charset.Charset variable
31 USASCII = Charset('us-ascii')
32 UTF8 = Charset('utf-8')
34 # Match encoded-word strings in the form =?charset?q?Hello_World?=
37 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
62 """Decode a message header value without converting charset.
64 Returns a list of (string, charset) pairs containing each of the decoded
65 parts of the header. Charset is None for non-encoded parts of the header,
77 return [(_charset._encode(string, str(charset)), str(charset))
78 for string, charset in header._chunks]
79 # If no encoding, just return the header with no charset.
83 # (encoded_string, encoding, charset). For unencoded strings, the last
97 charset = parts.pop(0).lower()
100 words.append((encoded, encoding, charset))
112 # form (decoded_word, charset).
114 for encoded_string, encoding, charset in words:
117 decoded_words.append((encoded_string, charset))
120 decoded_words.append((word, charset))
130 decoded_words.append((word, charset))
137 for word, charset in decoded_words:
142 last_charset = charset
143 elif charset != last_charset:
146 last_charset = charset
161 pairs of the format (decoded_string, charset) where charset is the string
170 for s, charset in decoded_seq:
172 if charset is not None and not isinstance(charset, Charset):
173 charset = Charset(charset)
174 h.append(s, charset)
180 def __init__(self, s=None, charset=None, argument
190 Optional charset serves two purposes: it has the same meaning as the
191 charset argument to the .append() method. It also sets the default
192 character set for all subsequent .append() calls that omit the charset
193 argument. If charset is not provided in the constructor, the us-ascii
194 charset is used both as s's initial charset and as the default for
209 if charset is None:
210 charset = USASCII
211 elif not isinstance(charset, Charset):
212 charset = Charset(charset)
213 self._charset = charset
217 self.append(s, charset, errors)
233 for string, charset in self._chunks:
236 # from a charset to None/us-ascii, or from None/us-ascii to a
237 # charset. Only do this for the second and subsequent chunks.
240 nextcs = charset
265 def append(self, s, charset=None, errors='strict'): argument
268 Optional charset, if given, should be a Charset instance or the name
269 of a character set (which will be converted to a Charset instance). A
270 value of None (the default) means that the charset given in the
274 (i.e. isinstance(s, str) is false), then charset is the encoding of
276 cannot be decoded with that charset. If s is a Unicode string, then
277 charset is a hint specifying the character set of the characters in
280 output codec of the charset. If the string cannot be encoded to the
286 if charset is None:
287 charset = self._charset
288 elif not isinstance(charset, Charset):
289 charset = Charset(charset)
291 input_charset = charset.input_codec or 'us-ascii'
298 output_charset = charset.output_codec or 'us-ascii'
305 charset = UTF8
306 self._chunks.append((s, charset))
358 for string, charset in self._chunks:
362 if not hasspace or charset not in (None, 'us-ascii'):
364 elif charset not in (None, 'us-ascii') and not lastspace:
367 lastcs = charset
371 formatter.feed('', lines[0], charset)
373 formatter.feed('', '', charset)
376 if charset.header_encoding is not None:
378 charset)
382 formatter.feed(fws, sline, charset)
399 for string, charset in self._chunks:
400 if charset == last_charset:
406 last_charset = charset
443 def feed(self, fws, string, charset): argument
444 # If the charset has no header encoding (i.e. it is an ASCII encoding)
449 if charset.header_encoding is None:
459 encoded_lines = charset.header_encode_lines(string, self._maxlengths())