| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522 |
- # -*- coding: utf-8 -*-
- from tornado.httputil import (
- url_concat,
- parse_multipart_form_data,
- HTTPHeaders,
- format_timestamp,
- HTTPServerRequest,
- parse_request_start_line,
- parse_cookie,
- qs_to_qsl,
- HTTPInputError,
- HTTPFile,
- )
- from tornado.escape import utf8, native_str
- from tornado.log import gen_log
- from tornado.testing import ExpectLog
- import copy
- import datetime
- import logging
- import pickle
- import time
- import urllib.parse
- import unittest
- from typing import Tuple, Dict, List
- def form_data_args() -> Tuple[Dict[str, List[bytes]], Dict[str, List[HTTPFile]]]:
- """Return two empty dicts suitable for use with parse_multipart_form_data.
- mypy insists on type annotations for dict literals, so this lets us avoid
- the verbose types throughout this test.
- """
- return {}, {}
- class TestUrlConcat(unittest.TestCase):
- def test_url_concat_no_query_params(self):
- url = url_concat("https://localhost/path", [("y", "y"), ("z", "z")])
- self.assertEqual(url, "https://localhost/path?y=y&z=z")
- def test_url_concat_encode_args(self):
- url = url_concat("https://localhost/path", [("y", "/y"), ("z", "z")])
- self.assertEqual(url, "https://localhost/path?y=%2Fy&z=z")
- def test_url_concat_trailing_q(self):
- url = url_concat("https://localhost/path?", [("y", "y"), ("z", "z")])
- self.assertEqual(url, "https://localhost/path?y=y&z=z")
- def test_url_concat_q_with_no_trailing_amp(self):
- url = url_concat("https://localhost/path?x", [("y", "y"), ("z", "z")])
- self.assertEqual(url, "https://localhost/path?x=&y=y&z=z")
- def test_url_concat_trailing_amp(self):
- url = url_concat("https://localhost/path?x&", [("y", "y"), ("z", "z")])
- self.assertEqual(url, "https://localhost/path?x=&y=y&z=z")
- def test_url_concat_mult_params(self):
- url = url_concat("https://localhost/path?a=1&b=2", [("y", "y"), ("z", "z")])
- self.assertEqual(url, "https://localhost/path?a=1&b=2&y=y&z=z")
- def test_url_concat_no_params(self):
- url = url_concat("https://localhost/path?r=1&t=2", [])
- self.assertEqual(url, "https://localhost/path?r=1&t=2")
- def test_url_concat_none_params(self):
- url = url_concat("https://localhost/path?r=1&t=2", None)
- self.assertEqual(url, "https://localhost/path?r=1&t=2")
- def test_url_concat_with_frag(self):
- url = url_concat("https://localhost/path#tab", [("y", "y")])
- self.assertEqual(url, "https://localhost/path?y=y#tab")
- def test_url_concat_multi_same_params(self):
- url = url_concat("https://localhost/path", [("y", "y1"), ("y", "y2")])
- self.assertEqual(url, "https://localhost/path?y=y1&y=y2")
- def test_url_concat_multi_same_query_params(self):
- url = url_concat("https://localhost/path?r=1&r=2", [("y", "y")])
- self.assertEqual(url, "https://localhost/path?r=1&r=2&y=y")
- def test_url_concat_dict_params(self):
- url = url_concat("https://localhost/path", dict(y="y"))
- self.assertEqual(url, "https://localhost/path?y=y")
- class QsParseTest(unittest.TestCase):
- def test_parsing(self):
- qsstring = "a=1&b=2&a=3"
- qs = urllib.parse.parse_qs(qsstring)
- qsl = list(qs_to_qsl(qs))
- self.assertIn(("a", "1"), qsl)
- self.assertIn(("a", "3"), qsl)
- self.assertIn(("b", "2"), qsl)
- class MultipartFormDataTest(unittest.TestCase):
- def test_file_upload(self):
- data = b"""\
- --1234
- Content-Disposition: form-data; name="files"; filename="ab.txt"
- Foo
- --1234--""".replace(
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- parse_multipart_form_data(b"1234", data, args, files)
- file = files["files"][0]
- self.assertEqual(file["filename"], "ab.txt")
- self.assertEqual(file["body"], b"Foo")
- def test_unquoted_names(self):
- # quotes are optional unless special characters are present
- data = b"""\
- --1234
- Content-Disposition: form-data; name=files; filename=ab.txt
- Foo
- --1234--""".replace(
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- parse_multipart_form_data(b"1234", data, args, files)
- file = files["files"][0]
- self.assertEqual(file["filename"], "ab.txt")
- self.assertEqual(file["body"], b"Foo")
- def test_special_filenames(self):
- filenames = [
- "a;b.txt",
- 'a"b.txt',
- 'a";b.txt',
- 'a;"b.txt',
- 'a";";.txt',
- 'a\\"b.txt',
- "a\\b.txt",
- ]
- for filename in filenames:
- logging.debug("trying filename %r", filename)
- str_data = """\
- --1234
- Content-Disposition: form-data; name="files"; filename="%s"
- Foo
- --1234--""" % filename.replace(
- "\\", "\\\\"
- ).replace(
- '"', '\\"'
- )
- data = utf8(str_data.replace("\n", "\r\n"))
- args, files = form_data_args()
- parse_multipart_form_data(b"1234", data, args, files)
- file = files["files"][0]
- self.assertEqual(file["filename"], filename)
- self.assertEqual(file["body"], b"Foo")
- def test_non_ascii_filename(self):
- data = b"""\
- --1234
- Content-Disposition: form-data; name="files"; filename="ab.txt"; filename*=UTF-8''%C3%A1b.txt
- Foo
- --1234--""".replace(
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- parse_multipart_form_data(b"1234", data, args, files)
- file = files["files"][0]
- self.assertEqual(file["filename"], u"áb.txt")
- self.assertEqual(file["body"], b"Foo")
- def test_boundary_starts_and_ends_with_quotes(self):
- data = b"""\
- --1234
- Content-Disposition: form-data; name="files"; filename="ab.txt"
- Foo
- --1234--""".replace(
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- parse_multipart_form_data(b'"1234"', data, args, files)
- file = files["files"][0]
- self.assertEqual(file["filename"], "ab.txt")
- self.assertEqual(file["body"], b"Foo")
- def test_missing_headers(self):
- data = b"""\
- --1234
- Foo
- --1234--""".replace(
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- with ExpectLog(gen_log, "multipart/form-data missing headers"):
- parse_multipart_form_data(b"1234", data, args, files)
- self.assertEqual(files, {})
- def test_invalid_content_disposition(self):
- data = b"""\
- --1234
- Content-Disposition: invalid; name="files"; filename="ab.txt"
- Foo
- --1234--""".replace(
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- with ExpectLog(gen_log, "Invalid multipart/form-data"):
- parse_multipart_form_data(b"1234", data, args, files)
- self.assertEqual(files, {})
- def test_line_does_not_end_with_correct_line_break(self):
- data = b"""\
- --1234
- Content-Disposition: form-data; name="files"; filename="ab.txt"
- Foo--1234--""".replace(
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- with ExpectLog(gen_log, "Invalid multipart/form-data"):
- parse_multipart_form_data(b"1234", data, args, files)
- self.assertEqual(files, {})
- def test_content_disposition_header_without_name_parameter(self):
- data = b"""\
- --1234
- Content-Disposition: form-data; filename="ab.txt"
- Foo
- --1234--""".replace(
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- with ExpectLog(gen_log, "multipart/form-data value missing name"):
- parse_multipart_form_data(b"1234", data, args, files)
- self.assertEqual(files, {})
- def test_data_after_final_boundary(self):
- # The spec requires that data after the final boundary be ignored.
- # http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
- # In practice, some libraries include an extra CRLF after the boundary.
- data = b"""\
- --1234
- Content-Disposition: form-data; name="files"; filename="ab.txt"
- Foo
- --1234--
- """.replace(
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- parse_multipart_form_data(b"1234", data, args, files)
- file = files["files"][0]
- self.assertEqual(file["filename"], "ab.txt")
- self.assertEqual(file["body"], b"Foo")
- class HTTPHeadersTest(unittest.TestCase):
- def test_multi_line(self):
- # Lines beginning with whitespace are appended to the previous line
- # with any leading whitespace replaced by a single space.
- # Note that while multi-line headers are a part of the HTTP spec,
- # their use is strongly discouraged.
- data = """\
- Foo: bar
- baz
- Asdf: qwer
- \tzxcv
- Foo: even
- more
- lines
- """.replace(
- "\n", "\r\n"
- )
- headers = HTTPHeaders.parse(data)
- self.assertEqual(headers["asdf"], "qwer zxcv")
- self.assertEqual(headers.get_list("asdf"), ["qwer zxcv"])
- self.assertEqual(headers["Foo"], "bar baz,even more lines")
- self.assertEqual(headers.get_list("foo"), ["bar baz", "even more lines"])
- self.assertEqual(
- sorted(list(headers.get_all())),
- [("Asdf", "qwer zxcv"), ("Foo", "bar baz"), ("Foo", "even more lines")],
- )
- def test_malformed_continuation(self):
- # If the first line starts with whitespace, it's a
- # continuation line with nothing to continue, so reject it
- # (with a proper error).
- data = " Foo: bar"
- self.assertRaises(HTTPInputError, HTTPHeaders.parse, data)
- def test_unicode_newlines(self):
- # Ensure that only \r\n is recognized as a header separator, and not
- # the other newline-like unicode characters.
- # Characters that are likely to be problematic can be found in
- # http://unicode.org/standard/reports/tr13/tr13-5.html
- # and cpython's unicodeobject.c (which defines the implementation
- # of unicode_type.splitlines(), and uses a different list than TR13).
- newlines = [
- u"\u001b", # VERTICAL TAB
- u"\u001c", # FILE SEPARATOR
- u"\u001d", # GROUP SEPARATOR
- u"\u001e", # RECORD SEPARATOR
- u"\u0085", # NEXT LINE
- u"\u2028", # LINE SEPARATOR
- u"\u2029", # PARAGRAPH SEPARATOR
- ]
- for newline in newlines:
- # Try the utf8 and latin1 representations of each newline
- for encoding in ["utf8", "latin1"]:
- try:
- try:
- encoded = newline.encode(encoding)
- except UnicodeEncodeError:
- # Some chars cannot be represented in latin1
- continue
- data = b"Cookie: foo=" + encoded + b"bar"
- # parse() wants a native_str, so decode through latin1
- # in the same way the real parser does.
- headers = HTTPHeaders.parse(native_str(data.decode("latin1")))
- expected = [
- (
- "Cookie",
- "foo=" + native_str(encoded.decode("latin1")) + "bar",
- )
- ]
- self.assertEqual(expected, list(headers.get_all()))
- except Exception:
- gen_log.warning("failed while trying %r in %s", newline, encoding)
- raise
- def test_optional_cr(self):
- # Both CRLF and LF should be accepted as separators. CR should not be
- # part of the data when followed by LF, but it is a normal char
- # otherwise (or should bare CR be an error?)
- headers = HTTPHeaders.parse("CRLF: crlf\r\nLF: lf\nCR: cr\rMore: more\r\n")
- self.assertEqual(
- sorted(headers.get_all()),
- [("Cr", "cr\rMore: more"), ("Crlf", "crlf"), ("Lf", "lf")],
- )
- def test_copy(self):
- all_pairs = [("A", "1"), ("A", "2"), ("B", "c")]
- h1 = HTTPHeaders()
- for k, v in all_pairs:
- h1.add(k, v)
- h2 = h1.copy()
- h3 = copy.copy(h1)
- h4 = copy.deepcopy(h1)
- for headers in [h1, h2, h3, h4]:
- # All the copies are identical, no matter how they were
- # constructed.
- self.assertEqual(list(sorted(headers.get_all())), all_pairs)
- for headers in [h2, h3, h4]:
- # Neither the dict or its member lists are reused.
- self.assertIsNot(headers, h1)
- self.assertIsNot(headers.get_list("A"), h1.get_list("A"))
- def test_pickle_roundtrip(self):
- headers = HTTPHeaders()
- headers.add("Set-Cookie", "a=b")
- headers.add("Set-Cookie", "c=d")
- headers.add("Content-Type", "text/html")
- pickled = pickle.dumps(headers)
- unpickled = pickle.loads(pickled)
- self.assertEqual(sorted(headers.get_all()), sorted(unpickled.get_all()))
- self.assertEqual(sorted(headers.items()), sorted(unpickled.items()))
- def test_setdefault(self):
- headers = HTTPHeaders()
- headers["foo"] = "bar"
- # If a value is present, setdefault returns it without changes.
- self.assertEqual(headers.setdefault("foo", "baz"), "bar")
- self.assertEqual(headers["foo"], "bar")
- # If a value is not present, setdefault sets it for future use.
- self.assertEqual(headers.setdefault("quux", "xyzzy"), "xyzzy")
- self.assertEqual(headers["quux"], "xyzzy")
- self.assertEqual(sorted(headers.get_all()), [("Foo", "bar"), ("Quux", "xyzzy")])
- def test_string(self):
- headers = HTTPHeaders()
- headers.add("Foo", "1")
- headers.add("Foo", "2")
- headers.add("Foo", "3")
- headers2 = HTTPHeaders.parse(str(headers))
- self.assertEquals(headers, headers2)
- class FormatTimestampTest(unittest.TestCase):
- # Make sure that all the input types are supported.
- TIMESTAMP = 1359312200.503611
- EXPECTED = "Sun, 27 Jan 2013 18:43:20 GMT"
- def check(self, value):
- self.assertEqual(format_timestamp(value), self.EXPECTED)
- def test_unix_time_float(self):
- self.check(self.TIMESTAMP)
- def test_unix_time_int(self):
- self.check(int(self.TIMESTAMP))
- def test_struct_time(self):
- self.check(time.gmtime(self.TIMESTAMP))
- def test_time_tuple(self):
- tup = tuple(time.gmtime(self.TIMESTAMP))
- self.assertEqual(9, len(tup))
- self.check(tup)
- def test_datetime(self):
- self.check(datetime.datetime.utcfromtimestamp(self.TIMESTAMP))
- # HTTPServerRequest is mainly tested incidentally to the server itself,
- # but this tests the parts of the class that can be tested in isolation.
- class HTTPServerRequestTest(unittest.TestCase):
- def test_default_constructor(self):
- # All parameters are formally optional, but uri is required
- # (and has been for some time). This test ensures that no
- # more required parameters slip in.
- HTTPServerRequest(uri="/")
- def test_body_is_a_byte_string(self):
- requets = HTTPServerRequest(uri="/")
- self.assertIsInstance(requets.body, bytes)
- def test_repr_does_not_contain_headers(self):
- request = HTTPServerRequest(
- uri="/", headers=HTTPHeaders({"Canary": ["Coal Mine"]})
- )
- self.assertTrue("Canary" not in repr(request))
- class ParseRequestStartLineTest(unittest.TestCase):
- METHOD = "GET"
- PATH = "/foo"
- VERSION = "HTTP/1.1"
- def test_parse_request_start_line(self):
- start_line = " ".join([self.METHOD, self.PATH, self.VERSION])
- parsed_start_line = parse_request_start_line(start_line)
- self.assertEqual(parsed_start_line.method, self.METHOD)
- self.assertEqual(parsed_start_line.path, self.PATH)
- self.assertEqual(parsed_start_line.version, self.VERSION)
- class ParseCookieTest(unittest.TestCase):
- # These tests copied from Django:
- # https://github.com/django/django/pull/6277/commits/da810901ada1cae9fc1f018f879f11a7fb467b28
- def test_python_cookies(self):
- """
- Test cases copied from Python's Lib/test/test_http_cookies.py
- """
- self.assertEqual(
- parse_cookie("chips=ahoy; vienna=finger"),
- {"chips": "ahoy", "vienna": "finger"},
- )
- # Here parse_cookie() differs from Python's cookie parsing in that it
- # treats all semicolons as delimiters, even within quotes.
- self.assertEqual(
- parse_cookie('keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"'),
- {"keebler": '"E=mc2', "L": '\\"Loves\\"', "fudge": "\\012", "": '"'},
- )
- # Illegal cookies that have an '=' char in an unquoted value.
- self.assertEqual(parse_cookie("keebler=E=mc2"), {"keebler": "E=mc2"})
- # Cookies with ':' character in their name.
- self.assertEqual(
- parse_cookie("key:term=value:term"), {"key:term": "value:term"}
- )
- # Cookies with '[' and ']'.
- self.assertEqual(
- parse_cookie("a=b; c=[; d=r; f=h"), {"a": "b", "c": "[", "d": "r", "f": "h"}
- )
- def test_cookie_edgecases(self):
- # Cookies that RFC6265 allows.
- self.assertEqual(
- parse_cookie("a=b; Domain=example.com"), {"a": "b", "Domain": "example.com"}
- )
- # parse_cookie() has historically kept only the last cookie with the
- # same name.
- self.assertEqual(parse_cookie("a=b; h=i; a=c"), {"a": "c", "h": "i"})
- def test_invalid_cookies(self):
- """
- Cookie strings that go against RFC6265 but browsers will send if set
- via document.cookie.
- """
- # Chunks without an equals sign appear as unnamed values per
- # https://bugzilla.mozilla.org/show_bug.cgi?id=169091
- self.assertIn(
- "django_language",
- parse_cookie("abc=def; unnamed; django_language=en").keys(),
- )
- # Even a double quote may be an unamed value.
- self.assertEqual(parse_cookie('a=b; "; c=d'), {"a": "b", "": '"', "c": "d"})
- # Spaces in names and values, and an equals sign in values.
- self.assertEqual(
- parse_cookie("a b c=d e = f; gh=i"), {"a b c": "d e = f", "gh": "i"}
- )
- # More characters the spec forbids.
- self.assertEqual(
- parse_cookie('a b,c<>@:/[]?{}=d " =e,f g'),
- {"a b,c<>@:/[]?{}": 'd " =e,f g'},
- )
- # Unicode characters. The spec only allows ASCII.
- self.assertEqual(
- parse_cookie("saint=André Bessette"),
- {"saint": native_str("André Bessette")},
- )
- # Browsers don't send extra whitespace or semicolons in Cookie headers,
- # but parse_cookie() should parse whitespace the same way
- # document.cookie parses whitespace.
- self.assertEqual(
- parse_cookie(" = b ; ; = ; c = ; "), {"": "b", "c": ""}
- )
|