httputil.py 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144
  1. #
  2. # Copyright 2009 Facebook
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  5. # not use this file except in compliance with the License. You may obtain
  6. # a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. # License for the specific language governing permissions and limitations
  14. # under the License.
  15. """HTTP utility code shared by clients and servers.
  16. This module also defines the `HTTPServerRequest` class which is exposed
  17. via `tornado.web.RequestHandler.request`.
  18. """
  19. import calendar
  20. import collections
  21. import copy
  22. import datetime
  23. import email.utils
  24. from http.client import responses
  25. import http.cookies
  26. import re
  27. from ssl import SSLError
  28. import time
  29. import unicodedata
  30. from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
  31. from tornado.escape import native_str, parse_qs_bytes, utf8
  32. from tornado.log import gen_log
  33. from tornado.util import ObjectDict, unicode_type
  34. # responses is unused in this file, but we re-export it to other files.
  35. # Reference it so pyflakes doesn't complain.
  36. responses
  37. import typing
  38. from typing import (
  39. Tuple,
  40. Iterable,
  41. List,
  42. Mapping,
  43. Iterator,
  44. Dict,
  45. Union,
  46. Optional,
  47. Awaitable,
  48. Generator,
  49. AnyStr,
  50. )
  51. if typing.TYPE_CHECKING:
  52. from typing import Deque # noqa: F401
  53. from asyncio import Future # noqa: F401
  54. import unittest # noqa: F401
  55. # RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line
  56. # terminator and ignore any preceding CR.
  57. _CRLF_RE = re.compile(r"\r?\n")
  58. class _NormalizedHeaderCache(dict):
  59. """Dynamic cached mapping of header names to Http-Header-Case.
  60. Implemented as a dict subclass so that cache hits are as fast as a
  61. normal dict lookup, without the overhead of a python function
  62. call.
  63. >>> normalized_headers = _NormalizedHeaderCache(10)
  64. >>> normalized_headers["coNtent-TYPE"]
  65. 'Content-Type'
  66. """
  67. def __init__(self, size: int) -> None:
  68. super(_NormalizedHeaderCache, self).__init__()
  69. self.size = size
  70. self.queue = collections.deque() # type: Deque[str]
  71. def __missing__(self, key: str) -> str:
  72. normalized = "-".join([w.capitalize() for w in key.split("-")])
  73. self[key] = normalized
  74. self.queue.append(key)
  75. if len(self.queue) > self.size:
  76. # Limit the size of the cache. LRU would be better, but this
  77. # simpler approach should be fine. In Python 2.7+ we could
  78. # use OrderedDict (or in 3.2+, @functools.lru_cache).
  79. old_key = self.queue.popleft()
  80. del self[old_key]
  81. return normalized
  82. _normalized_headers = _NormalizedHeaderCache(1000)
  83. class HTTPHeaders(collections.abc.MutableMapping):
  84. """A dictionary that maintains ``Http-Header-Case`` for all keys.
  85. Supports multiple values per key via a pair of new methods,
  86. `add()` and `get_list()`. The regular dictionary interface
  87. returns a single value per key, with multiple values joined by a
  88. comma.
  89. >>> h = HTTPHeaders({"content-type": "text/html"})
  90. >>> list(h.keys())
  91. ['Content-Type']
  92. >>> h["Content-Type"]
  93. 'text/html'
  94. >>> h.add("Set-Cookie", "A=B")
  95. >>> h.add("Set-Cookie", "C=D")
  96. >>> h["set-cookie"]
  97. 'A=B,C=D'
  98. >>> h.get_list("set-cookie")
  99. ['A=B', 'C=D']
  100. >>> for (k,v) in sorted(h.get_all()):
  101. ... print('%s: %s' % (k,v))
  102. ...
  103. Content-Type: text/html
  104. Set-Cookie: A=B
  105. Set-Cookie: C=D
  106. """
  107. @typing.overload
  108. def __init__(self, __arg: Mapping[str, List[str]]) -> None:
  109. pass
  110. @typing.overload # noqa: F811
  111. def __init__(self, __arg: Mapping[str, str]) -> None:
  112. pass
  113. @typing.overload # noqa: F811
  114. def __init__(self, *args: Tuple[str, str]) -> None:
  115. pass
  116. @typing.overload # noqa: F811
  117. def __init__(self, **kwargs: str) -> None:
  118. pass
  119. def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
  120. self._dict = {} # type: typing.Dict[str, str]
  121. self._as_list = {} # type: typing.Dict[str, typing.List[str]]
  122. self._last_key = None
  123. if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):
  124. # Copy constructor
  125. for k, v in args[0].get_all():
  126. self.add(k, v)
  127. else:
  128. # Dict-style initialization
  129. self.update(*args, **kwargs)
  130. # new public methods
  131. def add(self, name: str, value: str) -> None:
  132. """Adds a new value for the given key."""
  133. norm_name = _normalized_headers[name]
  134. self._last_key = norm_name
  135. if norm_name in self:
  136. self._dict[norm_name] = (
  137. native_str(self[norm_name]) + "," + native_str(value)
  138. )
  139. self._as_list[norm_name].append(value)
  140. else:
  141. self[norm_name] = value
  142. def get_list(self, name: str) -> List[str]:
  143. """Returns all values for the given header as a list."""
  144. norm_name = _normalized_headers[name]
  145. return self._as_list.get(norm_name, [])
  146. def get_all(self) -> Iterable[Tuple[str, str]]:
  147. """Returns an iterable of all (name, value) pairs.
  148. If a header has multiple values, multiple pairs will be
  149. returned with the same name.
  150. """
  151. for name, values in self._as_list.items():
  152. for value in values:
  153. yield (name, value)
  154. def parse_line(self, line: str) -> None:
  155. """Updates the dictionary with a single header line.
  156. >>> h = HTTPHeaders()
  157. >>> h.parse_line("Content-Type: text/html")
  158. >>> h.get('content-type')
  159. 'text/html'
  160. """
  161. if line[0].isspace():
  162. # continuation of a multi-line header
  163. if self._last_key is None:
  164. raise HTTPInputError("first header line cannot start with whitespace")
  165. new_part = " " + line.lstrip()
  166. self._as_list[self._last_key][-1] += new_part
  167. self._dict[self._last_key] += new_part
  168. else:
  169. try:
  170. name, value = line.split(":", 1)
  171. except ValueError:
  172. raise HTTPInputError("no colon in header line")
  173. self.add(name, value.strip())
  174. @classmethod
  175. def parse(cls, headers: str) -> "HTTPHeaders":
  176. """Returns a dictionary from HTTP header text.
  177. >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
  178. >>> sorted(h.items())
  179. [('Content-Length', '42'), ('Content-Type', 'text/html')]
  180. .. versionchanged:: 5.1
  181. Raises `HTTPInputError` on malformed headers instead of a
  182. mix of `KeyError`, and `ValueError`.
  183. """
  184. h = cls()
  185. for line in _CRLF_RE.split(headers):
  186. if line:
  187. h.parse_line(line)
  188. return h
  189. # MutableMapping abstract method implementations.
  190. def __setitem__(self, name: str, value: str) -> None:
  191. norm_name = _normalized_headers[name]
  192. self._dict[norm_name] = value
  193. self._as_list[norm_name] = [value]
  194. def __getitem__(self, name: str) -> str:
  195. return self._dict[_normalized_headers[name]]
  196. def __delitem__(self, name: str) -> None:
  197. norm_name = _normalized_headers[name]
  198. del self._dict[norm_name]
  199. del self._as_list[norm_name]
  200. def __len__(self) -> int:
  201. return len(self._dict)
  202. def __iter__(self) -> Iterator[typing.Any]:
  203. return iter(self._dict)
  204. def copy(self) -> "HTTPHeaders":
  205. # defined in dict but not in MutableMapping.
  206. return HTTPHeaders(self)
  207. # Use our overridden copy method for the copy.copy module.
  208. # This makes shallow copies one level deeper, but preserves
  209. # the appearance that HTTPHeaders is a single container.
  210. __copy__ = copy
  211. def __str__(self) -> str:
  212. lines = []
  213. for name, value in self.get_all():
  214. lines.append("%s: %s\n" % (name, value))
  215. return "".join(lines)
  216. __unicode__ = __str__
  217. class HTTPServerRequest(object):
  218. """A single HTTP request.
  219. All attributes are type `str` unless otherwise noted.
  220. .. attribute:: method
  221. HTTP request method, e.g. "GET" or "POST"
  222. .. attribute:: uri
  223. The requested uri.
  224. .. attribute:: path
  225. The path portion of `uri`
  226. .. attribute:: query
  227. The query portion of `uri`
  228. .. attribute:: version
  229. HTTP version specified in request, e.g. "HTTP/1.1"
  230. .. attribute:: headers
  231. `.HTTPHeaders` dictionary-like object for request headers. Acts like
  232. a case-insensitive dictionary with additional methods for repeated
  233. headers.
  234. .. attribute:: body
  235. Request body, if present, as a byte string.
  236. .. attribute:: remote_ip
  237. Client's IP address as a string. If ``HTTPServer.xheaders`` is set,
  238. will pass along the real IP address provided by a load balancer
  239. in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.
  240. .. versionchanged:: 3.1
  241. The list format of ``X-Forwarded-For`` is now supported.
  242. .. attribute:: protocol
  243. The protocol used, either "http" or "https". If ``HTTPServer.xheaders``
  244. is set, will pass along the protocol used by a load balancer if
  245. reported via an ``X-Scheme`` header.
  246. .. attribute:: host
  247. The requested hostname, usually taken from the ``Host`` header.
  248. .. attribute:: arguments
  249. GET/POST arguments are available in the arguments property, which
  250. maps arguments names to lists of values (to support multiple values
  251. for individual names). Names are of type `str`, while arguments
  252. are byte strings. Note that this is different from
  253. `.RequestHandler.get_argument`, which returns argument values as
  254. unicode strings.
  255. .. attribute:: query_arguments
  256. Same format as ``arguments``, but contains only arguments extracted
  257. from the query string.
  258. .. versionadded:: 3.2
  259. .. attribute:: body_arguments
  260. Same format as ``arguments``, but contains only arguments extracted
  261. from the request body.
  262. .. versionadded:: 3.2
  263. .. attribute:: files
  264. File uploads are available in the files property, which maps file
  265. names to lists of `.HTTPFile`.
  266. .. attribute:: connection
  267. An HTTP request is attached to a single HTTP connection, which can
  268. be accessed through the "connection" attribute. Since connections
  269. are typically kept open in HTTP/1.1, multiple requests can be handled
  270. sequentially on a single connection.
  271. .. versionchanged:: 4.0
  272. Moved from ``tornado.httpserver.HTTPRequest``.
  273. """
  274. path = None # type: str
  275. query = None # type: str
  276. # HACK: Used for stream_request_body
  277. _body_future = None # type: Future[None]
  278. def __init__(
  279. self,
  280. method: str = None,
  281. uri: str = None,
  282. version: str = "HTTP/1.0",
  283. headers: HTTPHeaders = None,
  284. body: bytes = None,
  285. host: str = None,
  286. files: Dict[str, List["HTTPFile"]] = None,
  287. connection: "HTTPConnection" = None,
  288. start_line: "RequestStartLine" = None,
  289. server_connection: object = None,
  290. ) -> None:
  291. if start_line is not None:
  292. method, uri, version = start_line
  293. self.method = method
  294. self.uri = uri
  295. self.version = version
  296. self.headers = headers or HTTPHeaders()
  297. self.body = body or b""
  298. # set remote IP and protocol
  299. context = getattr(connection, "context", None)
  300. self.remote_ip = getattr(context, "remote_ip", None)
  301. self.protocol = getattr(context, "protocol", "http")
  302. self.host = host or self.headers.get("Host") or "127.0.0.1"
  303. self.host_name = split_host_and_port(self.host.lower())[0]
  304. self.files = files or {}
  305. self.connection = connection
  306. self.server_connection = server_connection
  307. self._start_time = time.time()
  308. self._finish_time = None
  309. if uri is not None:
  310. self.path, sep, self.query = uri.partition("?")
  311. self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)
  312. self.query_arguments = copy.deepcopy(self.arguments)
  313. self.body_arguments = {} # type: Dict[str, List[bytes]]
  314. @property
  315. def cookies(self) -> Dict[str, http.cookies.Morsel]:
  316. """A dictionary of ``http.cookies.Morsel`` objects."""
  317. if not hasattr(self, "_cookies"):
  318. self._cookies = http.cookies.SimpleCookie()
  319. if "Cookie" in self.headers:
  320. try:
  321. parsed = parse_cookie(self.headers["Cookie"])
  322. except Exception:
  323. pass
  324. else:
  325. for k, v in parsed.items():
  326. try:
  327. self._cookies[k] = v
  328. except Exception:
  329. # SimpleCookie imposes some restrictions on keys;
  330. # parse_cookie does not. Discard any cookies
  331. # with disallowed keys.
  332. pass
  333. return self._cookies
  334. def full_url(self) -> str:
  335. """Reconstructs the full URL for this request."""
  336. return self.protocol + "://" + self.host + self.uri
  337. def request_time(self) -> float:
  338. """Returns the amount of time it took for this request to execute."""
  339. if self._finish_time is None:
  340. return time.time() - self._start_time
  341. else:
  342. return self._finish_time - self._start_time
  343. def get_ssl_certificate(
  344. self, binary_form: bool = False
  345. ) -> Union[None, Dict, bytes]:
  346. """Returns the client's SSL certificate, if any.
  347. To use client certificates, the HTTPServer's
  348. `ssl.SSLContext.verify_mode` field must be set, e.g.::
  349. ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
  350. ssl_ctx.load_cert_chain("foo.crt", "foo.key")
  351. ssl_ctx.load_verify_locations("cacerts.pem")
  352. ssl_ctx.verify_mode = ssl.CERT_REQUIRED
  353. server = HTTPServer(app, ssl_options=ssl_ctx)
  354. By default, the return value is a dictionary (or None, if no
  355. client certificate is present). If ``binary_form`` is true, a
  356. DER-encoded form of the certificate is returned instead. See
  357. SSLSocket.getpeercert() in the standard library for more
  358. details.
  359. http://docs.python.org/library/ssl.html#sslsocket-objects
  360. """
  361. try:
  362. if self.connection is None:
  363. return None
  364. # TODO: add a method to HTTPConnection for this so it can work with HTTP/2
  365. return self.connection.stream.socket.getpeercert( # type: ignore
  366. binary_form=binary_form
  367. )
  368. except SSLError:
  369. return None
  370. def _parse_body(self) -> None:
  371. parse_body_arguments(
  372. self.headers.get("Content-Type", ""),
  373. self.body,
  374. self.body_arguments,
  375. self.files,
  376. self.headers,
  377. )
  378. for k, v in self.body_arguments.items():
  379. self.arguments.setdefault(k, []).extend(v)
  380. def __repr__(self) -> str:
  381. attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")
  382. args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs])
  383. return "%s(%s)" % (self.__class__.__name__, args)
  384. class HTTPInputError(Exception):
  385. """Exception class for malformed HTTP requests or responses
  386. from remote sources.
  387. .. versionadded:: 4.0
  388. """
  389. pass
  390. class HTTPOutputError(Exception):
  391. """Exception class for errors in HTTP output.
  392. .. versionadded:: 4.0
  393. """
  394. pass
  395. class HTTPServerConnectionDelegate(object):
  396. """Implement this interface to handle requests from `.HTTPServer`.
  397. .. versionadded:: 4.0
  398. """
  399. def start_request(
  400. self, server_conn: object, request_conn: "HTTPConnection"
  401. ) -> "HTTPMessageDelegate":
  402. """This method is called by the server when a new request has started.
  403. :arg server_conn: is an opaque object representing the long-lived
  404. (e.g. tcp-level) connection.
  405. :arg request_conn: is a `.HTTPConnection` object for a single
  406. request/response exchange.
  407. This method should return a `.HTTPMessageDelegate`.
  408. """
  409. raise NotImplementedError()
  410. def on_close(self, server_conn: object) -> None:
  411. """This method is called when a connection has been closed.
  412. :arg server_conn: is a server connection that has previously been
  413. passed to ``start_request``.
  414. """
  415. pass
  416. class HTTPMessageDelegate(object):
  417. """Implement this interface to handle an HTTP request or response.
  418. .. versionadded:: 4.0
  419. """
  420. # TODO: genericize this class to avoid exposing the Union.
  421. def headers_received(
  422. self,
  423. start_line: Union["RequestStartLine", "ResponseStartLine"],
  424. headers: HTTPHeaders,
  425. ) -> Optional[Awaitable[None]]:
  426. """Called when the HTTP headers have been received and parsed.
  427. :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`
  428. depending on whether this is a client or server message.
  429. :arg headers: a `.HTTPHeaders` instance.
  430. Some `.HTTPConnection` methods can only be called during
  431. ``headers_received``.
  432. May return a `.Future`; if it does the body will not be read
  433. until it is done.
  434. """
  435. pass
  436. def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:
  437. """Called when a chunk of data has been received.
  438. May return a `.Future` for flow control.
  439. """
  440. pass
  441. def finish(self) -> None:
  442. """Called after the last chunk of data has been received."""
  443. pass
  444. def on_connection_close(self) -> None:
  445. """Called if the connection is closed without finishing the request.
  446. If ``headers_received`` is called, either ``finish`` or
  447. ``on_connection_close`` will be called, but not both.
  448. """
  449. pass
  450. class HTTPConnection(object):
  451. """Applications use this interface to write their responses.
  452. .. versionadded:: 4.0
  453. """
  454. def write_headers(
  455. self,
  456. start_line: Union["RequestStartLine", "ResponseStartLine"],
  457. headers: HTTPHeaders,
  458. chunk: bytes = None,
  459. ) -> "Future[None]":
  460. """Write an HTTP header block.
  461. :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.
  462. :arg headers: a `.HTTPHeaders` instance.
  463. :arg chunk: the first (optional) chunk of data. This is an optimization
  464. so that small responses can be written in the same call as their
  465. headers.
  466. The ``version`` field of ``start_line`` is ignored.
  467. Returns a future for flow control.
  468. .. versionchanged:: 6.0
  469. The ``callback`` argument was removed.
  470. """
  471. raise NotImplementedError()
  472. def write(self, chunk: bytes) -> "Future[None]":
  473. """Writes a chunk of body data.
  474. Returns a future for flow control.
  475. .. versionchanged:: 6.0
  476. The ``callback`` argument was removed.
  477. """
  478. raise NotImplementedError()
  479. def finish(self) -> None:
  480. """Indicates that the last body data has been written.
  481. """
  482. raise NotImplementedError()
  483. def url_concat(
  484. url: str,
  485. args: Union[
  486. None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]
  487. ],
  488. ) -> str:
  489. """Concatenate url and arguments regardless of whether
  490. url has existing query parameters.
  491. ``args`` may be either a dictionary or a list of key-value pairs
  492. (the latter allows for multiple values with the same key.
  493. >>> url_concat("http://example.com/foo", dict(c="d"))
  494. 'http://example.com/foo?c=d'
  495. >>> url_concat("http://example.com/foo?a=b", dict(c="d"))
  496. 'http://example.com/foo?a=b&c=d'
  497. >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])
  498. 'http://example.com/foo?a=b&c=d&c=d2'
  499. """
  500. if args is None:
  501. return url
  502. parsed_url = urlparse(url)
  503. if isinstance(args, dict):
  504. parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
  505. parsed_query.extend(args.items())
  506. elif isinstance(args, list) or isinstance(args, tuple):
  507. parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
  508. parsed_query.extend(args)
  509. else:
  510. err = "'args' parameter should be dict, list or tuple. Not {0}".format(
  511. type(args)
  512. )
  513. raise TypeError(err)
  514. final_query = urlencode(parsed_query)
  515. url = urlunparse(
  516. (
  517. parsed_url[0],
  518. parsed_url[1],
  519. parsed_url[2],
  520. parsed_url[3],
  521. final_query,
  522. parsed_url[5],
  523. )
  524. )
  525. return url
  526. class HTTPFile(ObjectDict):
  527. """Represents a file uploaded via a form.
  528. For backwards compatibility, its instance attributes are also
  529. accessible as dictionary keys.
  530. * ``filename``
  531. * ``body``
  532. * ``content_type``
  533. """
  534. pass
  535. def _parse_request_range(
  536. range_header: str,
  537. ) -> Optional[Tuple[Optional[int], Optional[int]]]:
  538. """Parses a Range header.
  539. Returns either ``None`` or tuple ``(start, end)``.
  540. Note that while the HTTP headers use inclusive byte positions,
  541. this method returns indexes suitable for use in slices.
  542. >>> start, end = _parse_request_range("bytes=1-2")
  543. >>> start, end
  544. (1, 3)
  545. >>> [0, 1, 2, 3, 4][start:end]
  546. [1, 2]
  547. >>> _parse_request_range("bytes=6-")
  548. (6, None)
  549. >>> _parse_request_range("bytes=-6")
  550. (-6, None)
  551. >>> _parse_request_range("bytes=-0")
  552. (None, 0)
  553. >>> _parse_request_range("bytes=")
  554. (None, None)
  555. >>> _parse_request_range("foo=42")
  556. >>> _parse_request_range("bytes=1-2,6-10")
  557. Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).
  558. See [0] for the details of the range header.
  559. [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges
  560. """
  561. unit, _, value = range_header.partition("=")
  562. unit, value = unit.strip(), value.strip()
  563. if unit != "bytes":
  564. return None
  565. start_b, _, end_b = value.partition("-")
  566. try:
  567. start = _int_or_none(start_b)
  568. end = _int_or_none(end_b)
  569. except ValueError:
  570. return None
  571. if end is not None:
  572. if start is None:
  573. if end != 0:
  574. start = -end
  575. end = None
  576. else:
  577. end += 1
  578. return (start, end)
  579. def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:
  580. """Returns a suitable Content-Range header:
  581. >>> print(_get_content_range(None, 1, 4))
  582. bytes 0-0/4
  583. >>> print(_get_content_range(1, 3, 4))
  584. bytes 1-2/4
  585. >>> print(_get_content_range(None, None, 4))
  586. bytes 0-3/4
  587. """
  588. start = start or 0
  589. end = (end or total) - 1
  590. return "bytes %s-%s/%s" % (start, end, total)
  591. def _int_or_none(val: str) -> Optional[int]:
  592. val = val.strip()
  593. if val == "":
  594. return None
  595. return int(val)
  596. def parse_body_arguments(
  597. content_type: str,
  598. body: bytes,
  599. arguments: Dict[str, List[bytes]],
  600. files: Dict[str, List[HTTPFile]],
  601. headers: HTTPHeaders = None,
  602. ) -> None:
  603. """Parses a form request body.
  604. Supports ``application/x-www-form-urlencoded`` and
  605. ``multipart/form-data``. The ``content_type`` parameter should be
  606. a string and ``body`` should be a byte string. The ``arguments``
  607. and ``files`` parameters are dictionaries that will be updated
  608. with the parsed contents.
  609. """
  610. if content_type.startswith("application/x-www-form-urlencoded"):
  611. if headers and "Content-Encoding" in headers:
  612. gen_log.warning(
  613. "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
  614. )
  615. return
  616. try:
  617. uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True)
  618. except Exception as e:
  619. gen_log.warning("Invalid x-www-form-urlencoded body: %s", e)
  620. uri_arguments = {}
  621. for name, values in uri_arguments.items():
  622. if values:
  623. arguments.setdefault(name, []).extend(values)
  624. elif content_type.startswith("multipart/form-data"):
  625. if headers and "Content-Encoding" in headers:
  626. gen_log.warning(
  627. "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
  628. )
  629. return
  630. try:
  631. fields = content_type.split(";")
  632. for field in fields:
  633. k, sep, v = field.strip().partition("=")
  634. if k == "boundary" and v:
  635. parse_multipart_form_data(utf8(v), body, arguments, files)
  636. break
  637. else:
  638. raise ValueError("multipart boundary not found")
  639. except Exception as e:
  640. gen_log.warning("Invalid multipart/form-data: %s", e)
  641. def parse_multipart_form_data(
  642. boundary: bytes,
  643. data: bytes,
  644. arguments: Dict[str, List[bytes]],
  645. files: Dict[str, List[HTTPFile]],
  646. ) -> None:
  647. """Parses a ``multipart/form-data`` body.
  648. The ``boundary`` and ``data`` parameters are both byte strings.
  649. The dictionaries given in the arguments and files parameters
  650. will be updated with the contents of the body.
  651. .. versionchanged:: 5.1
  652. Now recognizes non-ASCII filenames in RFC 2231/5987
  653. (``filename*=``) format.
  654. """
  655. # The standard allows for the boundary to be quoted in the header,
  656. # although it's rare (it happens at least for google app engine
  657. # xmpp). I think we're also supposed to handle backslash-escapes
  658. # here but I'll save that until we see a client that uses them
  659. # in the wild.
  660. if boundary.startswith(b'"') and boundary.endswith(b'"'):
  661. boundary = boundary[1:-1]
  662. final_boundary_index = data.rfind(b"--" + boundary + b"--")
  663. if final_boundary_index == -1:
  664. gen_log.warning("Invalid multipart/form-data: no final boundary")
  665. return
  666. parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
  667. for part in parts:
  668. if not part:
  669. continue
  670. eoh = part.find(b"\r\n\r\n")
  671. if eoh == -1:
  672. gen_log.warning("multipart/form-data missing headers")
  673. continue
  674. headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
  675. disp_header = headers.get("Content-Disposition", "")
  676. disposition, disp_params = _parse_header(disp_header)
  677. if disposition != "form-data" or not part.endswith(b"\r\n"):
  678. gen_log.warning("Invalid multipart/form-data")
  679. continue
  680. value = part[eoh + 4 : -2]
  681. if not disp_params.get("name"):
  682. gen_log.warning("multipart/form-data value missing name")
  683. continue
  684. name = disp_params["name"]
  685. if disp_params.get("filename"):
  686. ctype = headers.get("Content-Type", "application/unknown")
  687. files.setdefault(name, []).append(
  688. HTTPFile(
  689. filename=disp_params["filename"], body=value, content_type=ctype
  690. )
  691. )
  692. else:
  693. arguments.setdefault(name, []).append(value)
  694. def format_timestamp(
  695. ts: Union[int, float, tuple, time.struct_time, datetime.datetime]
  696. ) -> str:
  697. """Formats a timestamp in the format used by HTTP.
  698. The argument may be a numeric timestamp as returned by `time.time`,
  699. a time tuple as returned by `time.gmtime`, or a `datetime.datetime`
  700. object.
  701. >>> format_timestamp(1359312200)
  702. 'Sun, 27 Jan 2013 18:43:20 GMT'
  703. """
  704. if isinstance(ts, (int, float)):
  705. time_num = ts
  706. elif isinstance(ts, (tuple, time.struct_time)):
  707. time_num = calendar.timegm(ts)
  708. elif isinstance(ts, datetime.datetime):
  709. time_num = calendar.timegm(ts.utctimetuple())
  710. else:
  711. raise TypeError("unknown timestamp type: %r" % ts)
  712. return email.utils.formatdate(time_num, usegmt=True)
  713. RequestStartLine = collections.namedtuple(
  714. "RequestStartLine", ["method", "path", "version"]
  715. )
  716. def parse_request_start_line(line: str) -> RequestStartLine:
  717. """Returns a (method, path, version) tuple for an HTTP 1.x request line.
  718. The response is a `collections.namedtuple`.
  719. >>> parse_request_start_line("GET /foo HTTP/1.1")
  720. RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
  721. """
  722. try:
  723. method, path, version = line.split(" ")
  724. except ValueError:
  725. # https://tools.ietf.org/html/rfc7230#section-3.1.1
  726. # invalid request-line SHOULD respond with a 400 (Bad Request)
  727. raise HTTPInputError("Malformed HTTP request line")
  728. if not re.match(r"^HTTP/1\.[0-9]$", version):
  729. raise HTTPInputError(
  730. "Malformed HTTP version in HTTP Request-Line: %r" % version
  731. )
  732. return RequestStartLine(method, path, version)
  733. ResponseStartLine = collections.namedtuple(
  734. "ResponseStartLine", ["version", "code", "reason"]
  735. )
  736. def parse_response_start_line(line: str) -> ResponseStartLine:
  737. """Returns a (version, code, reason) tuple for an HTTP 1.x response line.
  738. The response is a `collections.namedtuple`.
  739. >>> parse_response_start_line("HTTP/1.1 200 OK")
  740. ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
  741. """
  742. line = native_str(line)
  743. match = re.match("(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)", line)
  744. if not match:
  745. raise HTTPInputError("Error parsing response start line")
  746. return ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
  747. # _parseparam and _parse_header are copied and modified from python2.7's cgi.py
  748. # The original 2.7 version of this code did not correctly support some
  749. # combinations of semicolons and double quotes.
  750. # It has also been modified to support valueless parameters as seen in
  751. # websocket extension negotiations, and to support non-ascii values in
  752. # RFC 2231/5987 format.
  753. def _parseparam(s: str) -> Generator[str, None, None]:
  754. while s[:1] == ";":
  755. s = s[1:]
  756. end = s.find(";")
  757. while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
  758. end = s.find(";", end + 1)
  759. if end < 0:
  760. end = len(s)
  761. f = s[:end]
  762. yield f.strip()
  763. s = s[end:]
  764. def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:
  765. r"""Parse a Content-type like header.
  766. Return the main content-type and a dictionary of options.
  767. >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"
  768. >>> ct, d = _parse_header(d)
  769. >>> ct
  770. 'form-data'
  771. >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')
  772. True
  773. >>> d['foo']
  774. 'b\\a"r'
  775. """
  776. parts = _parseparam(";" + line)
  777. key = next(parts)
  778. # decode_params treats first argument special, but we already stripped key
  779. params = [("Dummy", "value")]
  780. for p in parts:
  781. i = p.find("=")
  782. if i >= 0:
  783. name = p[:i].strip().lower()
  784. value = p[i + 1 :].strip()
  785. params.append((name, native_str(value)))
  786. decoded_params = email.utils.decode_params(params)
  787. decoded_params.pop(0) # get rid of the dummy again
  788. pdict = {}
  789. for name, decoded_value in decoded_params:
  790. value = email.utils.collapse_rfc2231_value(decoded_value)
  791. if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
  792. value = value[1:-1]
  793. pdict[name] = value
  794. return key, pdict
  795. def _encode_header(key: str, pdict: Dict[str, str]) -> str:
  796. """Inverse of _parse_header.
  797. >>> _encode_header('permessage-deflate',
  798. ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})
  799. 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'
  800. """
  801. if not pdict:
  802. return key
  803. out = [key]
  804. # Sort the parameters just to make it easy to test.
  805. for k, v in sorted(pdict.items()):
  806. if v is None:
  807. out.append(k)
  808. else:
  809. # TODO: quote if necessary.
  810. out.append("%s=%s" % (k, v))
  811. return "; ".join(out)
  812. def encode_username_password(
  813. username: Union[str, bytes], password: Union[str, bytes]
  814. ) -> bytes:
  815. """Encodes a username/password pair in the format used by HTTP auth.
  816. The return value is a byte string in the form ``username:password``.
  817. .. versionadded:: 5.1
  818. """
  819. if isinstance(username, unicode_type):
  820. username = unicodedata.normalize("NFC", username)
  821. if isinstance(password, unicode_type):
  822. password = unicodedata.normalize("NFC", password)
  823. return utf8(username) + b":" + utf8(password)
  824. def doctests():
  825. # type: () -> unittest.TestSuite
  826. import doctest
  827. return doctest.DocTestSuite()
  828. def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:
  829. """Returns ``(host, port)`` tuple from ``netloc``.
  830. Returned ``port`` will be ``None`` if not present.
  831. .. versionadded:: 4.1
  832. """
  833. match = re.match(r"^(.+):(\d+)$", netloc)
  834. if match:
  835. host = match.group(1)
  836. port = int(match.group(2)) # type: Optional[int]
  837. else:
  838. host = netloc
  839. port = None
  840. return (host, port)
  841. def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:
  842. """Generator converting a result of ``parse_qs`` back to name-value pairs.
  843. .. versionadded:: 5.0
  844. """
  845. for k, vs in qs.items():
  846. for v in vs:
  847. yield (k, v)
  848. _OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
  849. _QuotePatt = re.compile(r"[\\].")
  850. _nulljoin = "".join
  851. def _unquote_cookie(s: str) -> str:
  852. """Handle double quotes and escaping in cookie values.
  853. This method is copied verbatim from the Python 3.5 standard
  854. library (http.cookies._unquote) so we don't have to depend on
  855. non-public interfaces.
  856. """
  857. # If there aren't any doublequotes,
  858. # then there can't be any special characters. See RFC 2109.
  859. if s is None or len(s) < 2:
  860. return s
  861. if s[0] != '"' or s[-1] != '"':
  862. return s
  863. # We have to assume that we must decode this string.
  864. # Down to work.
  865. # Remove the "s
  866. s = s[1:-1]
  867. # Check for special sequences. Examples:
  868. # \012 --> \n
  869. # \" --> "
  870. #
  871. i = 0
  872. n = len(s)
  873. res = []
  874. while 0 <= i < n:
  875. o_match = _OctalPatt.search(s, i)
  876. q_match = _QuotePatt.search(s, i)
  877. if not o_match and not q_match: # Neither matched
  878. res.append(s[i:])
  879. break
  880. # else:
  881. j = k = -1
  882. if o_match:
  883. j = o_match.start(0)
  884. if q_match:
  885. k = q_match.start(0)
  886. if q_match and (not o_match or k < j): # QuotePatt matched
  887. res.append(s[i:k])
  888. res.append(s[k + 1])
  889. i = k + 2
  890. else: # OctalPatt matched
  891. res.append(s[i:j])
  892. res.append(chr(int(s[j + 1 : j + 4], 8)))
  893. i = j + 4
  894. return _nulljoin(res)
  895. def parse_cookie(cookie: str) -> Dict[str, str]:
  896. """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
  897. This function attempts to mimic browser cookie parsing behavior;
  898. it specifically does not follow any of the cookie-related RFCs
  899. (because browsers don't either).
  900. The algorithm used is identical to that used by Django version 1.9.10.
  901. .. versionadded:: 4.4.2
  902. """
  903. cookiedict = {}
  904. for chunk in cookie.split(str(";")):
  905. if str("=") in chunk:
  906. key, val = chunk.split(str("="), 1)
  907. else:
  908. # Assume an empty name per
  909. # https://bugzilla.mozilla.org/show_bug.cgi?id=169091
  910. key, val = str(""), chunk
  911. key, val = key.strip(), val.strip()
  912. if key or val:
  913. # unquote using Python's algorithm.
  914. cookiedict[key] = _unquote_cookie(val)
  915. return cookiedict