| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144 |
- #
- # Copyright 2009 Facebook
- #
- # Licensed under the Apache License, Version 2.0 (the "License"); you may
- # not use this file except in compliance with the License. You may obtain
- # a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- # License for the specific language governing permissions and limitations
- # under the License.
- """HTTP utility code shared by clients and servers.
- This module also defines the `HTTPServerRequest` class which is exposed
- via `tornado.web.RequestHandler.request`.
- """
- import calendar
- import collections
- import copy
- import datetime
- import email.utils
- from http.client import responses
- import http.cookies
- import re
- from ssl import SSLError
- import time
- import unicodedata
- from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
- from tornado.escape import native_str, parse_qs_bytes, utf8
- from tornado.log import gen_log
- from tornado.util import ObjectDict, unicode_type
- # responses is unused in this file, but we re-export it to other files.
- # Reference it so pyflakes doesn't complain.
- responses
- import typing
- from typing import (
- Tuple,
- Iterable,
- List,
- Mapping,
- Iterator,
- Dict,
- Union,
- Optional,
- Awaitable,
- Generator,
- AnyStr,
- )
- if typing.TYPE_CHECKING:
- from typing import Deque # noqa: F401
- from asyncio import Future # noqa: F401
- import unittest # noqa: F401
- # RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line
- # terminator and ignore any preceding CR.
- _CRLF_RE = re.compile(r"\r?\n")
- class _NormalizedHeaderCache(dict):
- """Dynamic cached mapping of header names to Http-Header-Case.
- Implemented as a dict subclass so that cache hits are as fast as a
- normal dict lookup, without the overhead of a python function
- call.
- >>> normalized_headers = _NormalizedHeaderCache(10)
- >>> normalized_headers["coNtent-TYPE"]
- 'Content-Type'
- """
- def __init__(self, size: int) -> None:
- super(_NormalizedHeaderCache, self).__init__()
- self.size = size
- self.queue = collections.deque() # type: Deque[str]
- def __missing__(self, key: str) -> str:
- normalized = "-".join([w.capitalize() for w in key.split("-")])
- self[key] = normalized
- self.queue.append(key)
- if len(self.queue) > self.size:
- # Limit the size of the cache. LRU would be better, but this
- # simpler approach should be fine. In Python 2.7+ we could
- # use OrderedDict (or in 3.2+, @functools.lru_cache).
- old_key = self.queue.popleft()
- del self[old_key]
- return normalized
- _normalized_headers = _NormalizedHeaderCache(1000)
- class HTTPHeaders(collections.abc.MutableMapping):
- """A dictionary that maintains ``Http-Header-Case`` for all keys.
- Supports multiple values per key via a pair of new methods,
- `add()` and `get_list()`. The regular dictionary interface
- returns a single value per key, with multiple values joined by a
- comma.
- >>> h = HTTPHeaders({"content-type": "text/html"})
- >>> list(h.keys())
- ['Content-Type']
- >>> h["Content-Type"]
- 'text/html'
- >>> h.add("Set-Cookie", "A=B")
- >>> h.add("Set-Cookie", "C=D")
- >>> h["set-cookie"]
- 'A=B,C=D'
- >>> h.get_list("set-cookie")
- ['A=B', 'C=D']
- >>> for (k,v) in sorted(h.get_all()):
- ... print('%s: %s' % (k,v))
- ...
- Content-Type: text/html
- Set-Cookie: A=B
- Set-Cookie: C=D
- """
- @typing.overload
- def __init__(self, __arg: Mapping[str, List[str]]) -> None:
- pass
- @typing.overload # noqa: F811
- def __init__(self, __arg: Mapping[str, str]) -> None:
- pass
- @typing.overload # noqa: F811
- def __init__(self, *args: Tuple[str, str]) -> None:
- pass
- @typing.overload # noqa: F811
- def __init__(self, **kwargs: str) -> None:
- pass
- def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
- self._dict = {} # type: typing.Dict[str, str]
- self._as_list = {} # type: typing.Dict[str, typing.List[str]]
- self._last_key = None
- if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):
- # Copy constructor
- for k, v in args[0].get_all():
- self.add(k, v)
- else:
- # Dict-style initialization
- self.update(*args, **kwargs)
- # new public methods
- def add(self, name: str, value: str) -> None:
- """Adds a new value for the given key."""
- norm_name = _normalized_headers[name]
- self._last_key = norm_name
- if norm_name in self:
- self._dict[norm_name] = (
- native_str(self[norm_name]) + "," + native_str(value)
- )
- self._as_list[norm_name].append(value)
- else:
- self[norm_name] = value
- def get_list(self, name: str) -> List[str]:
- """Returns all values for the given header as a list."""
- norm_name = _normalized_headers[name]
- return self._as_list.get(norm_name, [])
- def get_all(self) -> Iterable[Tuple[str, str]]:
- """Returns an iterable of all (name, value) pairs.
- If a header has multiple values, multiple pairs will be
- returned with the same name.
- """
- for name, values in self._as_list.items():
- for value in values:
- yield (name, value)
- def parse_line(self, line: str) -> None:
- """Updates the dictionary with a single header line.
- >>> h = HTTPHeaders()
- >>> h.parse_line("Content-Type: text/html")
- >>> h.get('content-type')
- 'text/html'
- """
- if line[0].isspace():
- # continuation of a multi-line header
- if self._last_key is None:
- raise HTTPInputError("first header line cannot start with whitespace")
- new_part = " " + line.lstrip()
- self._as_list[self._last_key][-1] += new_part
- self._dict[self._last_key] += new_part
- else:
- try:
- name, value = line.split(":", 1)
- except ValueError:
- raise HTTPInputError("no colon in header line")
- self.add(name, value.strip())
- @classmethod
- def parse(cls, headers: str) -> "HTTPHeaders":
- """Returns a dictionary from HTTP header text.
- >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
- >>> sorted(h.items())
- [('Content-Length', '42'), ('Content-Type', 'text/html')]
- .. versionchanged:: 5.1
- Raises `HTTPInputError` on malformed headers instead of a
- mix of `KeyError`, and `ValueError`.
- """
- h = cls()
- for line in _CRLF_RE.split(headers):
- if line:
- h.parse_line(line)
- return h
- # MutableMapping abstract method implementations.
- def __setitem__(self, name: str, value: str) -> None:
- norm_name = _normalized_headers[name]
- self._dict[norm_name] = value
- self._as_list[norm_name] = [value]
- def __getitem__(self, name: str) -> str:
- return self._dict[_normalized_headers[name]]
- def __delitem__(self, name: str) -> None:
- norm_name = _normalized_headers[name]
- del self._dict[norm_name]
- del self._as_list[norm_name]
- def __len__(self) -> int:
- return len(self._dict)
- def __iter__(self) -> Iterator[typing.Any]:
- return iter(self._dict)
- def copy(self) -> "HTTPHeaders":
- # defined in dict but not in MutableMapping.
- return HTTPHeaders(self)
- # Use our overridden copy method for the copy.copy module.
- # This makes shallow copies one level deeper, but preserves
- # the appearance that HTTPHeaders is a single container.
- __copy__ = copy
- def __str__(self) -> str:
- lines = []
- for name, value in self.get_all():
- lines.append("%s: %s\n" % (name, value))
- return "".join(lines)
- __unicode__ = __str__
- class HTTPServerRequest(object):
- """A single HTTP request.
- All attributes are type `str` unless otherwise noted.
- .. attribute:: method
- HTTP request method, e.g. "GET" or "POST"
- .. attribute:: uri
- The requested uri.
- .. attribute:: path
- The path portion of `uri`
- .. attribute:: query
- The query portion of `uri`
- .. attribute:: version
- HTTP version specified in request, e.g. "HTTP/1.1"
- .. attribute:: headers
- `.HTTPHeaders` dictionary-like object for request headers. Acts like
- a case-insensitive dictionary with additional methods for repeated
- headers.
- .. attribute:: body
- Request body, if present, as a byte string.
- .. attribute:: remote_ip
- Client's IP address as a string. If ``HTTPServer.xheaders`` is set,
- will pass along the real IP address provided by a load balancer
- in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.
- .. versionchanged:: 3.1
- The list format of ``X-Forwarded-For`` is now supported.
- .. attribute:: protocol
- The protocol used, either "http" or "https". If ``HTTPServer.xheaders``
- is set, will pass along the protocol used by a load balancer if
- reported via an ``X-Scheme`` header.
- .. attribute:: host
- The requested hostname, usually taken from the ``Host`` header.
- .. attribute:: arguments
- GET/POST arguments are available in the arguments property, which
- maps arguments names to lists of values (to support multiple values
- for individual names). Names are of type `str`, while arguments
- are byte strings. Note that this is different from
- `.RequestHandler.get_argument`, which returns argument values as
- unicode strings.
- .. attribute:: query_arguments
- Same format as ``arguments``, but contains only arguments extracted
- from the query string.
- .. versionadded:: 3.2
- .. attribute:: body_arguments
- Same format as ``arguments``, but contains only arguments extracted
- from the request body.
- .. versionadded:: 3.2
- .. attribute:: files
- File uploads are available in the files property, which maps file
- names to lists of `.HTTPFile`.
- .. attribute:: connection
- An HTTP request is attached to a single HTTP connection, which can
- be accessed through the "connection" attribute. Since connections
- are typically kept open in HTTP/1.1, multiple requests can be handled
- sequentially on a single connection.
- .. versionchanged:: 4.0
- Moved from ``tornado.httpserver.HTTPRequest``.
- """
- path = None # type: str
- query = None # type: str
- # HACK: Used for stream_request_body
- _body_future = None # type: Future[None]
- def __init__(
- self,
- method: str = None,
- uri: str = None,
- version: str = "HTTP/1.0",
- headers: HTTPHeaders = None,
- body: bytes = None,
- host: str = None,
- files: Dict[str, List["HTTPFile"]] = None,
- connection: "HTTPConnection" = None,
- start_line: "RequestStartLine" = None,
- server_connection: object = None,
- ) -> None:
- if start_line is not None:
- method, uri, version = start_line
- self.method = method
- self.uri = uri
- self.version = version
- self.headers = headers or HTTPHeaders()
- self.body = body or b""
- # set remote IP and protocol
- context = getattr(connection, "context", None)
- self.remote_ip = getattr(context, "remote_ip", None)
- self.protocol = getattr(context, "protocol", "http")
- self.host = host or self.headers.get("Host") or "127.0.0.1"
- self.host_name = split_host_and_port(self.host.lower())[0]
- self.files = files or {}
- self.connection = connection
- self.server_connection = server_connection
- self._start_time = time.time()
- self._finish_time = None
- if uri is not None:
- self.path, sep, self.query = uri.partition("?")
- self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)
- self.query_arguments = copy.deepcopy(self.arguments)
- self.body_arguments = {} # type: Dict[str, List[bytes]]
- @property
- def cookies(self) -> Dict[str, http.cookies.Morsel]:
- """A dictionary of ``http.cookies.Morsel`` objects."""
- if not hasattr(self, "_cookies"):
- self._cookies = http.cookies.SimpleCookie()
- if "Cookie" in self.headers:
- try:
- parsed = parse_cookie(self.headers["Cookie"])
- except Exception:
- pass
- else:
- for k, v in parsed.items():
- try:
- self._cookies[k] = v
- except Exception:
- # SimpleCookie imposes some restrictions on keys;
- # parse_cookie does not. Discard any cookies
- # with disallowed keys.
- pass
- return self._cookies
- def full_url(self) -> str:
- """Reconstructs the full URL for this request."""
- return self.protocol + "://" + self.host + self.uri
- def request_time(self) -> float:
- """Returns the amount of time it took for this request to execute."""
- if self._finish_time is None:
- return time.time() - self._start_time
- else:
- return self._finish_time - self._start_time
- def get_ssl_certificate(
- self, binary_form: bool = False
- ) -> Union[None, Dict, bytes]:
- """Returns the client's SSL certificate, if any.
- To use client certificates, the HTTPServer's
- `ssl.SSLContext.verify_mode` field must be set, e.g.::
- ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
- ssl_ctx.load_cert_chain("foo.crt", "foo.key")
- ssl_ctx.load_verify_locations("cacerts.pem")
- ssl_ctx.verify_mode = ssl.CERT_REQUIRED
- server = HTTPServer(app, ssl_options=ssl_ctx)
- By default, the return value is a dictionary (or None, if no
- client certificate is present). If ``binary_form`` is true, a
- DER-encoded form of the certificate is returned instead. See
- SSLSocket.getpeercert() in the standard library for more
- details.
- http://docs.python.org/library/ssl.html#sslsocket-objects
- """
- try:
- if self.connection is None:
- return None
- # TODO: add a method to HTTPConnection for this so it can work with HTTP/2
- return self.connection.stream.socket.getpeercert( # type: ignore
- binary_form=binary_form
- )
- except SSLError:
- return None
- def _parse_body(self) -> None:
- parse_body_arguments(
- self.headers.get("Content-Type", ""),
- self.body,
- self.body_arguments,
- self.files,
- self.headers,
- )
- for k, v in self.body_arguments.items():
- self.arguments.setdefault(k, []).extend(v)
- def __repr__(self) -> str:
- attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")
- args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs])
- return "%s(%s)" % (self.__class__.__name__, args)
- class HTTPInputError(Exception):
- """Exception class for malformed HTTP requests or responses
- from remote sources.
- .. versionadded:: 4.0
- """
- pass
- class HTTPOutputError(Exception):
- """Exception class for errors in HTTP output.
- .. versionadded:: 4.0
- """
- pass
- class HTTPServerConnectionDelegate(object):
- """Implement this interface to handle requests from `.HTTPServer`.
- .. versionadded:: 4.0
- """
- def start_request(
- self, server_conn: object, request_conn: "HTTPConnection"
- ) -> "HTTPMessageDelegate":
- """This method is called by the server when a new request has started.
- :arg server_conn: is an opaque object representing the long-lived
- (e.g. tcp-level) connection.
- :arg request_conn: is a `.HTTPConnection` object for a single
- request/response exchange.
- This method should return a `.HTTPMessageDelegate`.
- """
- raise NotImplementedError()
- def on_close(self, server_conn: object) -> None:
- """This method is called when a connection has been closed.
- :arg server_conn: is a server connection that has previously been
- passed to ``start_request``.
- """
- pass
- class HTTPMessageDelegate(object):
- """Implement this interface to handle an HTTP request or response.
- .. versionadded:: 4.0
- """
- # TODO: genericize this class to avoid exposing the Union.
- def headers_received(
- self,
- start_line: Union["RequestStartLine", "ResponseStartLine"],
- headers: HTTPHeaders,
- ) -> Optional[Awaitable[None]]:
- """Called when the HTTP headers have been received and parsed.
- :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`
- depending on whether this is a client or server message.
- :arg headers: a `.HTTPHeaders` instance.
- Some `.HTTPConnection` methods can only be called during
- ``headers_received``.
- May return a `.Future`; if it does the body will not be read
- until it is done.
- """
- pass
- def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:
- """Called when a chunk of data has been received.
- May return a `.Future` for flow control.
- """
- pass
- def finish(self) -> None:
- """Called after the last chunk of data has been received."""
- pass
- def on_connection_close(self) -> None:
- """Called if the connection is closed without finishing the request.
- If ``headers_received`` is called, either ``finish`` or
- ``on_connection_close`` will be called, but not both.
- """
- pass
- class HTTPConnection(object):
- """Applications use this interface to write their responses.
- .. versionadded:: 4.0
- """
- def write_headers(
- self,
- start_line: Union["RequestStartLine", "ResponseStartLine"],
- headers: HTTPHeaders,
- chunk: bytes = None,
- ) -> "Future[None]":
- """Write an HTTP header block.
- :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.
- :arg headers: a `.HTTPHeaders` instance.
- :arg chunk: the first (optional) chunk of data. This is an optimization
- so that small responses can be written in the same call as their
- headers.
- The ``version`` field of ``start_line`` is ignored.
- Returns a future for flow control.
- .. versionchanged:: 6.0
- The ``callback`` argument was removed.
- """
- raise NotImplementedError()
- def write(self, chunk: bytes) -> "Future[None]":
- """Writes a chunk of body data.
- Returns a future for flow control.
- .. versionchanged:: 6.0
- The ``callback`` argument was removed.
- """
- raise NotImplementedError()
- def finish(self) -> None:
- """Indicates that the last body data has been written.
- """
- raise NotImplementedError()
- def url_concat(
- url: str,
- args: Union[
- None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]
- ],
- ) -> str:
- """Concatenate url and arguments regardless of whether
- url has existing query parameters.
- ``args`` may be either a dictionary or a list of key-value pairs
- (the latter allows for multiple values with the same key.
- >>> url_concat("http://example.com/foo", dict(c="d"))
- 'http://example.com/foo?c=d'
- >>> url_concat("http://example.com/foo?a=b", dict(c="d"))
- 'http://example.com/foo?a=b&c=d'
- >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])
- 'http://example.com/foo?a=b&c=d&c=d2'
- """
- if args is None:
- return url
- parsed_url = urlparse(url)
- if isinstance(args, dict):
- parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
- parsed_query.extend(args.items())
- elif isinstance(args, list) or isinstance(args, tuple):
- parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
- parsed_query.extend(args)
- else:
- err = "'args' parameter should be dict, list or tuple. Not {0}".format(
- type(args)
- )
- raise TypeError(err)
- final_query = urlencode(parsed_query)
- url = urlunparse(
- (
- parsed_url[0],
- parsed_url[1],
- parsed_url[2],
- parsed_url[3],
- final_query,
- parsed_url[5],
- )
- )
- return url
- class HTTPFile(ObjectDict):
- """Represents a file uploaded via a form.
- For backwards compatibility, its instance attributes are also
- accessible as dictionary keys.
- * ``filename``
- * ``body``
- * ``content_type``
- """
- pass
- def _parse_request_range(
- range_header: str,
- ) -> Optional[Tuple[Optional[int], Optional[int]]]:
- """Parses a Range header.
- Returns either ``None`` or tuple ``(start, end)``.
- Note that while the HTTP headers use inclusive byte positions,
- this method returns indexes suitable for use in slices.
- >>> start, end = _parse_request_range("bytes=1-2")
- >>> start, end
- (1, 3)
- >>> [0, 1, 2, 3, 4][start:end]
- [1, 2]
- >>> _parse_request_range("bytes=6-")
- (6, None)
- >>> _parse_request_range("bytes=-6")
- (-6, None)
- >>> _parse_request_range("bytes=-0")
- (None, 0)
- >>> _parse_request_range("bytes=")
- (None, None)
- >>> _parse_request_range("foo=42")
- >>> _parse_request_range("bytes=1-2,6-10")
- Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).
- See [0] for the details of the range header.
- [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges
- """
- unit, _, value = range_header.partition("=")
- unit, value = unit.strip(), value.strip()
- if unit != "bytes":
- return None
- start_b, _, end_b = value.partition("-")
- try:
- start = _int_or_none(start_b)
- end = _int_or_none(end_b)
- except ValueError:
- return None
- if end is not None:
- if start is None:
- if end != 0:
- start = -end
- end = None
- else:
- end += 1
- return (start, end)
- def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:
- """Returns a suitable Content-Range header:
- >>> print(_get_content_range(None, 1, 4))
- bytes 0-0/4
- >>> print(_get_content_range(1, 3, 4))
- bytes 1-2/4
- >>> print(_get_content_range(None, None, 4))
- bytes 0-3/4
- """
- start = start or 0
- end = (end or total) - 1
- return "bytes %s-%s/%s" % (start, end, total)
- def _int_or_none(val: str) -> Optional[int]:
- val = val.strip()
- if val == "":
- return None
- return int(val)
- def parse_body_arguments(
- content_type: str,
- body: bytes,
- arguments: Dict[str, List[bytes]],
- files: Dict[str, List[HTTPFile]],
- headers: HTTPHeaders = None,
- ) -> None:
- """Parses a form request body.
- Supports ``application/x-www-form-urlencoded`` and
- ``multipart/form-data``. The ``content_type`` parameter should be
- a string and ``body`` should be a byte string. The ``arguments``
- and ``files`` parameters are dictionaries that will be updated
- with the parsed contents.
- """
- if content_type.startswith("application/x-www-form-urlencoded"):
- if headers and "Content-Encoding" in headers:
- gen_log.warning(
- "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
- )
- return
- try:
- uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True)
- except Exception as e:
- gen_log.warning("Invalid x-www-form-urlencoded body: %s", e)
- uri_arguments = {}
- for name, values in uri_arguments.items():
- if values:
- arguments.setdefault(name, []).extend(values)
- elif content_type.startswith("multipart/form-data"):
- if headers and "Content-Encoding" in headers:
- gen_log.warning(
- "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
- )
- return
- try:
- fields = content_type.split(";")
- for field in fields:
- k, sep, v = field.strip().partition("=")
- if k == "boundary" and v:
- parse_multipart_form_data(utf8(v), body, arguments, files)
- break
- else:
- raise ValueError("multipart boundary not found")
- except Exception as e:
- gen_log.warning("Invalid multipart/form-data: %s", e)
- def parse_multipart_form_data(
- boundary: bytes,
- data: bytes,
- arguments: Dict[str, List[bytes]],
- files: Dict[str, List[HTTPFile]],
- ) -> None:
- """Parses a ``multipart/form-data`` body.
- The ``boundary`` and ``data`` parameters are both byte strings.
- The dictionaries given in the arguments and files parameters
- will be updated with the contents of the body.
- .. versionchanged:: 5.1
- Now recognizes non-ASCII filenames in RFC 2231/5987
- (``filename*=``) format.
- """
- # The standard allows for the boundary to be quoted in the header,
- # although it's rare (it happens at least for google app engine
- # xmpp). I think we're also supposed to handle backslash-escapes
- # here but I'll save that until we see a client that uses them
- # in the wild.
- if boundary.startswith(b'"') and boundary.endswith(b'"'):
- boundary = boundary[1:-1]
- final_boundary_index = data.rfind(b"--" + boundary + b"--")
- if final_boundary_index == -1:
- gen_log.warning("Invalid multipart/form-data: no final boundary")
- return
- parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
- for part in parts:
- if not part:
- continue
- eoh = part.find(b"\r\n\r\n")
- if eoh == -1:
- gen_log.warning("multipart/form-data missing headers")
- continue
- headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
- disp_header = headers.get("Content-Disposition", "")
- disposition, disp_params = _parse_header(disp_header)
- if disposition != "form-data" or not part.endswith(b"\r\n"):
- gen_log.warning("Invalid multipart/form-data")
- continue
- value = part[eoh + 4 : -2]
- if not disp_params.get("name"):
- gen_log.warning("multipart/form-data value missing name")
- continue
- name = disp_params["name"]
- if disp_params.get("filename"):
- ctype = headers.get("Content-Type", "application/unknown")
- files.setdefault(name, []).append(
- HTTPFile(
- filename=disp_params["filename"], body=value, content_type=ctype
- )
- )
- else:
- arguments.setdefault(name, []).append(value)
- def format_timestamp(
- ts: Union[int, float, tuple, time.struct_time, datetime.datetime]
- ) -> str:
- """Formats a timestamp in the format used by HTTP.
- The argument may be a numeric timestamp as returned by `time.time`,
- a time tuple as returned by `time.gmtime`, or a `datetime.datetime`
- object.
- >>> format_timestamp(1359312200)
- 'Sun, 27 Jan 2013 18:43:20 GMT'
- """
- if isinstance(ts, (int, float)):
- time_num = ts
- elif isinstance(ts, (tuple, time.struct_time)):
- time_num = calendar.timegm(ts)
- elif isinstance(ts, datetime.datetime):
- time_num = calendar.timegm(ts.utctimetuple())
- else:
- raise TypeError("unknown timestamp type: %r" % ts)
- return email.utils.formatdate(time_num, usegmt=True)
- RequestStartLine = collections.namedtuple(
- "RequestStartLine", ["method", "path", "version"]
- )
- def parse_request_start_line(line: str) -> RequestStartLine:
- """Returns a (method, path, version) tuple for an HTTP 1.x request line.
- The response is a `collections.namedtuple`.
- >>> parse_request_start_line("GET /foo HTTP/1.1")
- RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
- """
- try:
- method, path, version = line.split(" ")
- except ValueError:
- # https://tools.ietf.org/html/rfc7230#section-3.1.1
- # invalid request-line SHOULD respond with a 400 (Bad Request)
- raise HTTPInputError("Malformed HTTP request line")
- if not re.match(r"^HTTP/1\.[0-9]$", version):
- raise HTTPInputError(
- "Malformed HTTP version in HTTP Request-Line: %r" % version
- )
- return RequestStartLine(method, path, version)
- ResponseStartLine = collections.namedtuple(
- "ResponseStartLine", ["version", "code", "reason"]
- )
- def parse_response_start_line(line: str) -> ResponseStartLine:
- """Returns a (version, code, reason) tuple for an HTTP 1.x response line.
- The response is a `collections.namedtuple`.
- >>> parse_response_start_line("HTTP/1.1 200 OK")
- ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
- """
- line = native_str(line)
- match = re.match("(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)", line)
- if not match:
- raise HTTPInputError("Error parsing response start line")
- return ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
- # _parseparam and _parse_header are copied and modified from python2.7's cgi.py
- # The original 2.7 version of this code did not correctly support some
- # combinations of semicolons and double quotes.
- # It has also been modified to support valueless parameters as seen in
- # websocket extension negotiations, and to support non-ascii values in
- # RFC 2231/5987 format.
- def _parseparam(s: str) -> Generator[str, None, None]:
- while s[:1] == ";":
- s = s[1:]
- end = s.find(";")
- while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
- end = s.find(";", end + 1)
- if end < 0:
- end = len(s)
- f = s[:end]
- yield f.strip()
- s = s[end:]
- def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:
- r"""Parse a Content-type like header.
- Return the main content-type and a dictionary of options.
- >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"
- >>> ct, d = _parse_header(d)
- >>> ct
- 'form-data'
- >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')
- True
- >>> d['foo']
- 'b\\a"r'
- """
- parts = _parseparam(";" + line)
- key = next(parts)
- # decode_params treats first argument special, but we already stripped key
- params = [("Dummy", "value")]
- for p in parts:
- i = p.find("=")
- if i >= 0:
- name = p[:i].strip().lower()
- value = p[i + 1 :].strip()
- params.append((name, native_str(value)))
- decoded_params = email.utils.decode_params(params)
- decoded_params.pop(0) # get rid of the dummy again
- pdict = {}
- for name, decoded_value in decoded_params:
- value = email.utils.collapse_rfc2231_value(decoded_value)
- if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
- value = value[1:-1]
- pdict[name] = value
- return key, pdict
- def _encode_header(key: str, pdict: Dict[str, str]) -> str:
- """Inverse of _parse_header.
- >>> _encode_header('permessage-deflate',
- ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})
- 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'
- """
- if not pdict:
- return key
- out = [key]
- # Sort the parameters just to make it easy to test.
- for k, v in sorted(pdict.items()):
- if v is None:
- out.append(k)
- else:
- # TODO: quote if necessary.
- out.append("%s=%s" % (k, v))
- return "; ".join(out)
- def encode_username_password(
- username: Union[str, bytes], password: Union[str, bytes]
- ) -> bytes:
- """Encodes a username/password pair in the format used by HTTP auth.
- The return value is a byte string in the form ``username:password``.
- .. versionadded:: 5.1
- """
- if isinstance(username, unicode_type):
- username = unicodedata.normalize("NFC", username)
- if isinstance(password, unicode_type):
- password = unicodedata.normalize("NFC", password)
- return utf8(username) + b":" + utf8(password)
- def doctests():
- # type: () -> unittest.TestSuite
- import doctest
- return doctest.DocTestSuite()
- def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:
- """Returns ``(host, port)`` tuple from ``netloc``.
- Returned ``port`` will be ``None`` if not present.
- .. versionadded:: 4.1
- """
- match = re.match(r"^(.+):(\d+)$", netloc)
- if match:
- host = match.group(1)
- port = int(match.group(2)) # type: Optional[int]
- else:
- host = netloc
- port = None
- return (host, port)
- def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:
- """Generator converting a result of ``parse_qs`` back to name-value pairs.
- .. versionadded:: 5.0
- """
- for k, vs in qs.items():
- for v in vs:
- yield (k, v)
- _OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
- _QuotePatt = re.compile(r"[\\].")
- _nulljoin = "".join
- def _unquote_cookie(s: str) -> str:
- """Handle double quotes and escaping in cookie values.
- This method is copied verbatim from the Python 3.5 standard
- library (http.cookies._unquote) so we don't have to depend on
- non-public interfaces.
- """
- # If there aren't any doublequotes,
- # then there can't be any special characters. See RFC 2109.
- if s is None or len(s) < 2:
- return s
- if s[0] != '"' or s[-1] != '"':
- return s
- # We have to assume that we must decode this string.
- # Down to work.
- # Remove the "s
- s = s[1:-1]
- # Check for special sequences. Examples:
- # \012 --> \n
- # \" --> "
- #
- i = 0
- n = len(s)
- res = []
- while 0 <= i < n:
- o_match = _OctalPatt.search(s, i)
- q_match = _QuotePatt.search(s, i)
- if not o_match and not q_match: # Neither matched
- res.append(s[i:])
- break
- # else:
- j = k = -1
- if o_match:
- j = o_match.start(0)
- if q_match:
- k = q_match.start(0)
- if q_match and (not o_match or k < j): # QuotePatt matched
- res.append(s[i:k])
- res.append(s[k + 1])
- i = k + 2
- else: # OctalPatt matched
- res.append(s[i:j])
- res.append(chr(int(s[j + 1 : j + 4], 8)))
- i = j + 4
- return _nulljoin(res)
- def parse_cookie(cookie: str) -> Dict[str, str]:
- """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
- This function attempts to mimic browser cookie parsing behavior;
- it specifically does not follow any of the cookie-related RFCs
- (because browsers don't either).
- The algorithm used is identical to that used by Django version 1.9.10.
- .. versionadded:: 4.4.2
- """
- cookiedict = {}
- for chunk in cookie.split(str(";")):
- if str("=") in chunk:
- key, val = chunk.split(str("="), 1)
- else:
- # Assume an empty name per
- # https://bugzilla.mozilla.org/show_bug.cgi?id=169091
- key, val = str(""), chunk
- key, val = key.strip(), val.strip()
- if key or val:
- # unquote using Python's algorithm.
- cookiedict[key] = _unquote_cookie(val)
- return cookiedict
|