diff -rN -u old-quixote/__init__.py new-quixote/__init__.py --- old-quixote/__init__.py 2005-04-11 09:15:30.000000000 -0600 +++ new-quixote/__init__.py 2005-08-29 13:24:08.000000000 -0600 @@ -13,6 +13,16 @@ get_session, get_session_manager, get_user, get_field, get_cookie +# This is the default charset used by the HTTPRequest and HTTPResponse +# classes. +DEFAULT_HTTP_CHARSET = 'iso-8859-1' + +# Setting the following variable to 'ascii' is recommended. The default is +# 'iso-8859-1' for backwards compatibility reasons. If you change it, you +# will need to use unicode strings to store non-ASCII character data (as +# is recommended practice). +DEFAULT_STR_CHARSET = 'iso-8859-1' + def enable_ptl(): """ Installs the import hooks needed to import PTL modules. This must @@ -23,4 +33,4 @@ that, if you use ZODB, you must import ZODB before calling this function. """ - import quixote.ptl.install + import quixote.ptl.install diff -rN -u old-quixote/http_request.py new-quixote/http_request.py --- old-quixote/http_request.py 2005-05-18 17:42:59.000000000 -0600 +++ new-quixote/http_request.py 2005-08-29 12:52:22.000000000 -0600 @@ -13,6 +13,7 @@ import rfc822 from cStringIO import StringIO +from quixote import DEFAULT_HTTP_CHARSET, DEFAULT_STR_CHARSET from quixote.http_response import HTTPResponse from quixote.errors import RequestError @@ -48,7 +49,11 @@ return None def _decode_string(s, charset): - if charset == 'iso-8859-1': + if charset == 'iso-8859-1' == DEFAULT_STR_CHARSET: + # To avoid breaking applications that are not Unicode-safe, return + # a str instance. Applications that change any of DEFAULT_CHARSET, + # DEFAULT_HTTP_CHARSET, or DEFAULT_STR_CHARSET have to be prepared + # for unicode strings. return s try: return s.decode(charset) @@ -139,13 +144,14 @@ when handling an exception. """ - DEFAULT_CHARSET = 'iso-8859-1' + DEFAULT_CHARSET = None # defaults to DEFAULT_HTTP_CHARSET def __init__(self, stdin, environ): self.stdin = stdin self.environ = environ self.form = {} self.session = None + self.charset = self.DEFAULT_CHARSET or DEFAULT_HTTP_CHARSET self.response = HTTPResponse() # The strange treatment of SERVER_PORT_SECURE is because IIS @@ -179,7 +185,7 @@ def process_inputs(self): query = self.get_query() if query: - self.form.update(parse_query(query, self.DEFAULT_CHARSET)) + self.form.update(parse_query(query, self.charset)) length = self.environ.get('CONTENT_LENGTH') or "0" try: length = int(length) @@ -197,7 +203,9 @@ query = self.stdin.read(length) if len(query) != length: raise RequestError('unexpected end of request body') - charset = params.get('charset', self.DEFAULT_CHARSET) + # Use the declared charset if it's provided (most browser's don't + # provide it to avoid breaking old HTTP servers). + charset = params.get('charset', self.charset) self.form.update(parse_query(query, charset)) def _process_multipart(self, length, params): @@ -244,8 +252,7 @@ upload.receive(lines) _add_field_value(self.form, name, upload) else: - value = _decode_string(''.join(lines), - charset or self.DEFAULT_CHARSET) + value = _decode_string(''.join(lines), charset or self.charset) _add_field_value(self.form, name, value) def get_header(self, name, default=None): diff -rN -u old-quixote/http_response.py new-quixote/http_response.py --- old-quixote/http_response.py 2005-05-18 17:42:44.000000000 -0600 +++ new-quixote/http_response.py 2005-08-29 13:20:19.000000000 -0600 @@ -13,6 +13,7 @@ pass import struct from rfc822 import formatdate +import quixote from quixote.html import stringify status_reasons = { @@ -95,8 +96,9 @@ content_type : string the MIME content type of the response (does not include extra params like charset) - charset : string - the character encoding of the the response + charset : string | None + the character encoding of the the response. If none, the 'charset' + parameter of the Context-Type header will not be included. status_code : int HTTP response status code (integer between 100 and 599) reason_phrase : string @@ -134,14 +136,17 @@ """ DEFAULT_CONTENT_TYPE = 'text/html' - DEFAULT_CHARSET = 'iso-8859-1' + DEFAULT_CHARSET = None # defaults to quixote.DEFAULT_HTTP_CHARSET + def __init__(self, status=200, body=None, content_type=None, charset=None): """ Creates a new HTTP response. """ self.content_type = content_type or self.DEFAULT_CONTENT_TYPE - self.charset = charset or self.DEFAULT_CHARSET + self.charset = (charset or + self.DEFAULT_CHARSET or + quixote.DEFAULT_HTTP_CHARSET) self.set_status(status) self.headers = {} @@ -155,17 +160,21 @@ self.buffered = True self.javascript_code = None - def set_content_type(self, content_type, charset='iso-8859-1'): - """(content_type : string, charset : string = 'iso-8859-1') + def set_content_type(self, content_type, charset=None): + """(content_type : string, charset : string = None) Set the content type of the response to the MIME type specified by - 'content_type'. Also sets the charset, defaulting to 'iso-8859-1'. + 'content_type'. If 'charset' is not provided, the charset parameter + for the Content-Type header will not be set. """ self.charset = charset self.content_type = content_type def set_charset(self, charset): - self.charset = str(charset).lower() + if not charset: + self.charset = None + else: + self.charset = str(charset).lower() def set_status(self, status, reason=None): """set_status(status : int, reason : string = None) @@ -220,10 +229,22 @@ def _encode_chunk(self, chunk): """(chunk : str | unicode) -> str """ - if self.charset == 'iso-8859-1' and isinstance(chunk, str): - return chunk # non-ASCII chars are okay + if isinstance(chunk, unicode): + if self.charset is None: + # iso-8859-1 is the default for the HTTP protocol if charset + # is not provided + chunk = chunk.encode('iso-8859-1') + else: + chunk = chunk.encode(self.charset) else: - return chunk.encode(self.charset) + if self.charset is None: + # we assume that the str is in the correct encoding or does + # not contain character data + pass + elif self.charset != quixote.DEFAULT_STR_CHARSET: + s = chunk.decode(quixote.DEFAULT_STR_CHARSET) + chunk = s.encode(self.charset) + return chunk def _compress_body(self, body): """(body: str) -> str @@ -401,9 +422,11 @@ # Content-type if "content-type" not in self.headers: - headers.append(('Content-Type', - '%s; charset=%s' % (self.content_type, - self.charset))) + if self.charset is not None: + value = '%s; charset=%s' % (self.content_type, self.charset) + else: + value = '%s' % self.content_type + headers.append(('Content-Type', value)) # Content-Length if "content-length" not in self.headers: