[Twisted-Python] HTTP versions
Andrew Dalke
twisted-python@twistedmatrix.com
Mon, 2 Jun 2003 03:13:48 -0600
--Apple-Mail-6--1071383028
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
charset=US-ASCII;
format=flowed
Itamar Shtull-Trauring
> I'd have to see the code before deciding.
I've attached a sketch, as it were, of how the new code would look.
I've not tested it at all. The goal is to have something more concrete
to talk about. The major change is in the HTTPChannel, with a few
changes to how the Request constructor is called & works.
I've also reviewed the code and made comments on a few sections which
might also be cleared up. These are marked with "APD". (Some require
python 2.3 modules, others don't.)
Oh, and the requestDone method in http.py currently has
if request != self.requests[0]: raise TypeError
That "!=" should be an "is not"
Andrew
dalke@dalkescientific.com
--Apple-Mail-6--1071383028
Content-Disposition: attachment;
filename=http2.py
Content-Transfer-Encoding: 7bit
Content-Type: application/octet-stream;
x-unix-mode=0644;
name="http2.py"
# -*- test-case-name: twisted.test.test_http -*-
# Twisted, the Framework of Your Internet
# Copyright (C) 2001 Matthew W. Lefkowitz
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of version 2.1 of the GNU Lesser General Public
# License as published by the Free Software Foundation.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""HyperText Transfer Protocol implementation.
This is used by twisted.web.
API Stability: Server HTTP support is semi-stable, client HTTP is unstable.
Future Plans:
- HTTP client support will at some point be refactored to support HTTP/1.1.
- Accept chunked data from clients in server.
- Other missing HTTP features from the RFC.
Maintainer: U{Itamar Shtull-Trauring<mailto:twisted@itamarst.org>}
"""
# system imports
from cStringIO import StringIO
import tempfile
import base64
import cgi
import socket
import math
import time
import calendar
import warnings
import os
import rfc822
# sibling imports
import basic
# twisted imports
from twisted.internet import interfaces, reactor, protocol
from twisted.python import log
protocol_version = "HTTP/1.1"
_CONTINUE = 100
SWITCHING = 101
OK = 200
CREATED = 201
ACCEPTED = 202
NON_AUTHORITATIVE_INFORMATION = 203
NO_CONTENT = 204
RESET_CONTENT = 205
PARTIAL_CONTENT = 206
MULTI_STATUS = 207
MULTIPLE_CHOICE = 300
MOVED_PERMANENTLY = 301
FOUND = 302
SEE_OTHER = 303
NOT_MODIFIED = 304
USE_PROXY = 305
TEMPORARY_REDIRECT = 307
BAD_REQUEST = 400
UNAUTHORIZED = 401
PAYMENT_REQUIRED = 402
FORBIDDEN = 403
NOT_FOUND = 404
NOT_ALLOWED = 405
NOT_ACCEPTABLE = 406
PROXY_AUTH_REQUIRED = 407
REQUEST_TIMEOUT = 408
CONFLICT = 409
GONE = 410
LENGTH_REQUIRED = 411
PRECONDITION_FAILED = 412
REQUEST_ENTITY_TOO_LARGE = 413
REQUEST_URI_TOO_LONG = 414
UNSUPPORTED_MEDIA_TYPE = 415
REQUESTED_RANGE_NOT_SATISFIABLE = 416
EXPECTATION_FAILED = 417
INTERNAL_SERVER_ERROR = 500
NOT_IMPLEMENTED = 501
BAD_GATEWAY = 502
SERVICE_UNAVAILABLE = 503
GATEWAY_TIMEOUT = 504
HTTP_VERSION_NOT_SUPPORTED = 505
INSUFFICIENT_STORAGE_SPACE = 507
NOT_EXTENDED = 510
## APD - could get this from BaseHTTPServer
RESPONSES = {
# 100
_CONTINUE: "Continue",
SWITCHING: "Switching Protocols",
# 200
OK: "OK",
CREATED: "Created",
ACCEPTED: "Accepted",
NON_AUTHORITATIVE_INFORMATION: "Non-Authoritative Information",
NO_CONTENT: "No Content",
RESET_CONTENT: "Reset Content.",
PARTIAL_CONTENT: "Partial Content",
MULTI_STATUS: "Multi-Status",
# 300
MULTIPLE_CHOICE: "Multiple Choices",
MOVED_PERMANENTLY: "Moved Permanently",
FOUND: "Found",
SEE_OTHER: "See Other",
NOT_MODIFIED: "Not Modified",
USE_PROXY: "Use Proxy",
# 306 not defined??
TEMPORARY_REDIRECT: "Temporary Redirect",
# 400
BAD_REQUEST: "Bad Request",
UNAUTHORIZED: "Unauthorized",
PAYMENT_REQUIRED: "Payment Required",
FORBIDDEN: "Forbidden",
NOT_FOUND: "Not Found",
NOT_ALLOWED: "Method Not Allowed",
NOT_ACCEPTABLE: "Not Acceptable",
PROXY_AUTH_REQUIRED: "Proxy Authentication Required",
REQUEST_TIMEOUT: "Request Time-out",
CONFLICT: "Conflict",
GONE: "Gone",
LENGTH_REQUIRED: "Length Required",
PRECONDITION_FAILED: "Precondition Failed",
REQUEST_ENTITY_TOO_LARGE: "Request Entity Too Large",
REQUEST_URI_TOO_LONG: "Request-URI Too Long",
UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type",
REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable",
EXPECTATION_FAILED: "Expectation Failed",
# 500
INTERNAL_SERVER_ERROR: "Internal Server Error",
NOT_IMPLEMENTED: "Not Implemented",
BAD_GATEWAY: "Bad Gateway",
SERVICE_UNAVAILABLE: "Service Unavailable",
GATEWAY_TIMEOUT: "Gateway Time-out",
HTTP_VERSION_NOT_SUPPORTED: "HTTP Version not supported",
INSUFFICIENT_STORAGE_SPACE: "Insufficient Storage Space",
NOT_EXTENDED: "Not Extended"
}
CACHED = """Magic constant returned by http.Request methods to set cache
validation headers when the request is conditional and the value fails
the condition."""
# backwards compatability
responses = RESPONSES
## APD -- this code is essentially identical to the BaseHTTPServer.py
## implementation and basically the same as email.Utils.formatdate
## and rfc822.formatdate. ... and logging.handlers.SMTPHandler.date_time
## We can use rfc822 just fine
datetimeToString = rfc822.formatdate
## APD -- Why is this needed? Is it because of the hit of
## calling time.time or of formatting the string? If it's
## formatting the string, then do the following
##
## _last_time_info = [None, None]
## def getDateTime():
## last_it, last_s = _list_time_info
## t = int(time.time())
## it = int(t)
## if last_it == it:
## return last_s
## s = datetimeToString(t)
## _list_time_info[:] = [it, s]
## return s
##
## If it's calling time.time .. prove it. My Mac (a 1GHz
## machine) can do 348,000 calls per second through Python.
# a hack so we don't need to recalculate log datetime every hit,
# at the price of a small, unimportant, inaccuracy.
_logDateTime = None
_logDateTimeUsers = 0
_resetLogDateTimeID = None
def _resetLogDateTime():
global _logDateTime
global _resetLogDateTime
global _resetLogDateTimeID
_logDateTime = datetimeToLogString()
_resetLogDateTimeID = reactor.callLater(1, _resetLogDateTime)
def _logDateTimeStart():
global _logDateTimeUsers
if not _logDateTimeUsers:
_resetLogDateTime()
_logDateTimeUsers += 1
def _logDateTimeStop():
global _logDateTimeUsers
_logDateTimeUsers -= 1;
if not _logDateTimeUsers and _resetLogDateTimeID:
_resetLogDateTimeID.cancel()
## APD - this rewrite uses datetime from Python 2.3
_epoch = datetime.datetime(1970, 1, 1, 0, 0, 0)
def timegm(year, month, day, hour, minute, second):
"""Convert time tuple in GMT to seconds since epoch, GMT"""
dt = datetime.datetime(year, month, day, hour, minute, second) - _epoch
return dt.days*82800 + dt.seconds
## APD - I'm not touching this one. rfc22 has code to parse
## a timestamp which could be used, but it handles timezone, which
## this one doesn't. I don't have the spec with me to figure out
## which is correct. Also, note that this can raise unexpected
## exceptions based on external input. The 'modified-since' code
## which calls this should handle errors by assuming that it can't
## use the cached value.
def stringToDatetime(dateString):
"""Convert an HTTP date string to seconds since epoch."""
parts = dateString.split(' ')
day = int(parts[1])
month = int(monthname.index(parts[2]))
year = int(parts[3])
hour, min, sec = map(int, parts[4].split(':'))
return int(timegm(year, month, day, hour, min, sec))
def toChunk(data):
"""Convert string to a chunk."""
return "%x\r\n%s\r\n" % (len(data), data)
## APD -- not actually used. BTW, it should check that
## the length is >=0
def fromChunk(data):
"""Convert chunk to string.
Returns tuple (result, remaining), may raise ValueError.
"""
prefix, rest = data.split('\r\n', 1)
length = int(prefix, 16)
if not rest[length:length+2] == '\r\n':
raise ValueError, "chunk must end with CRLF"
return rest[:length], rest[length+2:]
## APD -- not actually used. BTW, the strip isn't needed, the
## code assumes the input is in the right format and will raise
## exceptions if given invalid input (eg, from attack code)
def parseContentRange(header):
"""Parse a content-range header into (start, end, realLength).
realLength might be None if real length is not known ('*').
"""
kind, other = header.strip().split()
if kind.lower() != "bytes":
raise ValueError, "a range of type %r is not supported"
startend, realLength = other.split("*")
start, end = map(int, startend.split("-"))
return (start, end, realLength)
class StringTransport:
"""
I am a StringIO wrapper that conforms for the transport API. I support
the `writeSequence' method.
"""
def __init__(self):
self.s = StringIO()
def writeSequence(self, seq):
self.s.write(''.join(seq))
## APD -- This trick isn't needed because getattr is only
## called after lookup failures. Since self.s exists, there's
## no recursion, and you can do.
##
## def __getattr__(self, attr):
## return getattr(self.s, attr)
##
## (setattr is a different story)
def __getattr__(self, attr):
return getattr(self.__dict__['s'], attr)
class HTTPClient(basic.LineReceiver):
"""A client for HTTP 1.0
Notes:
You probably want to send a 'Host' header with the name of
the site you're connecting to, in order to not break name
based virtual hosting.
"""
length = None
firstLine = 1
__buffer = ''
def sendCommand(self, command, path):
self.transport.write('%s %s HTTP/1.0\r\n' % (command, path))
def sendHeader(self, name, value):
self.transport.write('%s: %s\r\n' % (name, value))
def endHeaders(self):
self.transport.write('\r\n')
def lineReceived(self, line):
if self.firstLine:
self.firstLine = 0
try:
version, status, message = line.split(None, 2)
except ValueError:
# sometimes there is no message
version, status = line.split(None, 1)
message = ""
self.handleStatus(version, status, message)
return
if line:
key, val = line.split(': ', 1)
self.handleHeader(key, val)
if key.lower() == 'content-length':
self.length = int(val)
else:
self.handleEndHeaders()
self.setRawMode()
def connectionLost(self, reason):
self.handleResponseEnd()
def handleResponseEnd(self):
if self.__buffer:
b = self.__buffer
self.__buffer = ''
self.handleResponse(b)
def handleResponsePart(self, data):
self.__buffer += data
def connectionMade(self):
pass
handleStatus = handleHeader = handleEndHeaders = lambda *args: None
def rawDataReceived(self, data):
if self.length is not None:
data, rest = data[:self.length], data[self.length:]
self.length -= len(data)
else:
rest = ''
self.handleResponsePart(data)
if self.length == 0:
self.handleResponseEnd()
self.setLineMode(rest)
# response codes that must have empty bodies
NO_BODY_CODES = (204, 304)
class Request:
"""A HTTP request.
@cvar method: The HTTP method that was used.
@cvar uri: The full URI that was requested (includes arguments).
@ivar path: The path only (arguments not included).
@ivar args: All of the arguments, including URL and POST arguments.
@type args: A mapping of strings (the argument names) to lists of values.
i.e., ?foo=bar&foo=baz&quux=spam results in
{'foo': ['bar', 'baz'], 'quux': ['spam']}.
"""
__implements__ = interfaces.IConsumer
producer = None
finished = 0
code = OK
code_message = RESPONSES[OK]
method = "(no method yet)"
clientproto = "(no clientproto yet)"
uri = "(no uri yet)"
startedWriting = 0
chunked = 0
sentLength = 0 # content-length of response, or total bytes sent via chunking
etag = None
lastModified = None
def __init__(self, channel, queued, length, received_headers):
"""
@param channel: the channel we're connected to.
@param queued: are we in the request queue, or can we start writing to
the transport?
"""
self.channel = channel
self.queued = queued
self.received_headers = received_headers
self.received_cookies = {}
self.parseCookies()
self.headers = {} # outgoing headers
self.cookies = [] # outgoing cookies
self.gotLength(length)
if queued:
self.transport = StringTransport()
else:
self.transport = self.channel.transport
[...rest of Request is unchanged ...]
class _ParseHTTPHeaders:
# Bit of theiving going on ;)
parse_request = BaseHTPPServer.BaseHTTPRequestHandler.parse_request
protocol_version = protocol_version
def __init__(self, raw_requestline, rfile):
self.raw_requestline = raw_requestline
self.rfile = rfile
self.error_code = None
self.error_message = None
self.valid = self.parse_request()
def send_error(self, code, message=None):
self.error_code = code
self.error_message = message
class HTTPChannel(basic.LineReceiver):
"""A receiver for HTTP requests."""
__content = None # Why is this here?
# set in instances or subclasses
requestFactory = Request
def __init__(self):
# the request queue
self.requests = []
self._reset()
self.closed = 0
def _reset(self):
self.length = 0
self._raw_requestline = None
self._header = StringIO()
self._first_line = 1
self._parsed_header = None
def dataReceived(self, data):
# if this connection is not persistent, drop any data which
# the client (illegally) sent after the last request.
if self.closed:
return
basic.LineReceiver.dataReceived(self, data)
def lineReceived(self, line):
if self.__first_line:
# IE sends an extraneous empty line (\r\n) after a POST request;
# eat up such a line, but only ONCE
if not line and self.__first_line == 1:
self.__first_line = 2
return
self._raw_requestline = line + "\n"
self.__first_line = 0
else:
self._header.write(line)
self._header.write("\n")
if not line:
# end of the headers, so process them
header = self._header
header.seek(0)
p = _ParseHTTPHeaders(self._raw_requestline, header)
if not p.valid:
self.send_error(p.request_version, p.error_code, p.error_message)
return
self._parsed_header = p
try:
self.length = length = int(p.headers["content-length"])
except (KeyError, ValueError):
self.send_error(p.request_version, LENGTH_REQUIRED)
return
# create a new Request object
request = self.requestFactory(self, len(self.requests), self.length, p.headers)
self.requests.append( (request, p.close_connection) )
if self.length == 0:
self.allContentReceived()
else:
# the payload can be any sort of data, so switch over
# to raw mode. Future data events go to rawDataReceived
self.setRawMode()
def send_error(self, version, code, message = None):
if message is None:
message = RESPONSES[code]
self.transport.write("%s %s %s\r\n\r\n" % (version, code, message))
self.transport.loseConnection()
def allContentReceived(self):
p = self._parsed_headers
# reset ALL state variables, so we don't interfere with next request
self._reset()
req = self.requests[-1][0]
req.requestReceived(p.command, p.path, p.version)
def rawDataReceived(self, data):
if len(data) < self.length:
self.requests[-1][0].handleContentChunk(data)
self.length = self.length - len(data)
else:
self.requests[-1][0].handleContentChunk(data[:self.length])
extraneous = data[self.length:]
self.allContentReceived()
self.setLineMode(extraneous)
def requestDone(self, request):
"""Called by first request in queue when it is done."""
if request is not self.requests[0][0]:
raise TypeError
close_connection = self.requests[0][1]
del self.requests[0]
if close_connection:
self.transport.loseConection()
self.closed = 1
else:
# notify next request it can start writing
if self.requests:
self.requests[0][0].noLongerQueued()
def connectionLost(self, reason):
for request in self.requests:
request.connectionLost(reason)
class HTTPFactory(protocol.ServerFactory):
"""Factory for HTTP server."""
protocol = HTTPChannel
logPath = None
def __init__(self, logPath=None):
if logPath is not None:
logPath = os.path.abspath(logPath)
self.logPath = logPath
def startFactory(self):
_logDateTimeStart()
if self.logPath:
self.logFile = self._openLogFile(self.logPath)
else:
self.logFile = log.logfile
def stopFactory(self):
if hasattr(self, "logFile"):
if self.logFile != log.logfile:
self.logFile.close()
del self.logFile
_logDateTimeStop()
def _openLogFile(self, path):
"""Override in subclasses, e.g. to use twisted.python.logfile."""
f = open(path, "a")
f.seek(2, 0)
return f
def log(self, request):
"""Log a request's result to the logfile, by default in combined log format."""
line = '%s - - %s "%s" %d %s "%s" "%s"\n' % (
request.getClientIP(),
# request.getUser() or "-", # the remote user is almost never important
_logDateTime,
repr(request),
request.code,
request.sentLength or "-",
request.getHeader("referer") or "-",
request.getHeader("user-agent") or "-")
self.logFile.write(line)
--Apple-Mail-6--1071383028--