upload: Attempt to guess character set text/* which are served inline.

This is only `text/plain`, currently.  In such cases where the
client-provided content-type also does not specify a `charset`, we use
`chardet` to make a guess, and store that guess to provide later when
serving the content.  The detection is done in a streaming fashion,
and thus should not require re-downloading the full content.
This commit is contained in:
Alex Vandiver
2025-07-18 05:55:07 +00:00
committed by Tim Abbott
parent ae001dfa96
commit 25fbb05fea
10 changed files with 76 additions and 20 deletions

24
uv.lock generated
View File

@@ -538,6 +538,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009, upload-time = "2024-09-04T20:44:45.309Z" },
]
[[package]]
name = "chardet"
version = "5.2.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" },
]
[[package]]
name = "charset-normalizer"
version = "3.4.2"
@@ -4700,15 +4709,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f1/86/e26e6ae4dfcbf6031b8422c22cf3a9eb2b6d127770406e7645b6248d8091/types_cffi-1.17.0.20250523-py3-none-any.whl", hash = "sha256:e98c549d8e191f6220e440f9f14315d6775a21a0e588c32c20476be885b2fad9", size = 20010, upload-time = "2025-05-23T03:05:39.136Z" },
]
[[package]]
name = "types-chardet"
version = "5.0.4.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/dd/47/932d35ac07203e936e69102dc9570e83606d386bacb60696f0c403224e86/types-chardet-5.0.4.6.tar.gz", hash = "sha256:caf4c74cd13ccfd8b3313c314aba943b159de562a2573ed03137402b2bb37818", size = 4592, upload-time = "2023-05-10T15:22:21.325Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/10/35/2a06c5c892eb1a0a4f4f74a6aff1ade05da82444af0190cf731761f2c46c/types_chardet-5.0.4.6-py3-none-any.whl", hash = "sha256:ea832d87e798abf1e4dfc73767807c2b7fee35d0003ae90348aea4ae00fb004d", size = 5853, upload-time = "2023-05-10T15:22:19.797Z" },
]
[[package]]
name = "types-decorator"
version = "5.2.0.20250324"
@@ -5359,6 +5359,7 @@ dev = [
{ name = "black" },
{ name = "boto3" },
{ name = "boto3-stubs", extra = ["s3", "ses", "sns", "sqs"] },
{ name = "chardet" },
{ name = "circuitbreaker" },
{ name = "codespell" },
{ name = "coverage" },
@@ -5450,7 +5451,6 @@ dev = [
{ name = "tornado" },
{ name = "types-beautifulsoup4" },
{ name = "types-boto" },
{ name = "types-chardet" },
{ name = "types-decorator" },
{ name = "types-defusedxml" },
{ name = "types-jsonschema" },
@@ -5494,6 +5494,7 @@ prod = [
{ name = "backoff" },
{ name = "beautifulsoup4" },
{ name = "boto3" },
{ name = "chardet" },
{ name = "circuitbreaker" },
{ name = "cryptography" },
{ name = "css-inline" },
@@ -5582,6 +5583,7 @@ dev = [
{ name = "black" },
{ name = "boto3" },
{ name = "boto3-stubs", extras = ["s3", "ses", "sns", "sqs"] },
{ name = "chardet", specifier = ">=5.1.0" },
{ name = "circuitbreaker" },
{ name = "codespell" },
{ name = "coverage" },
@@ -5674,7 +5676,6 @@ dev = [
{ name = "tornado" },
{ name = "types-beautifulsoup4" },
{ name = "types-boto" },
{ name = "types-chardet" },
{ name = "types-decorator" },
{ name = "types-defusedxml" },
{ name = "types-jsonschema" },
@@ -5718,6 +5719,7 @@ prod = [
{ name = "backoff" },
{ name = "beautifulsoup4" },
{ name = "boto3" },
{ name = "chardet", specifier = ">=5.1.0" },
{ name = "circuitbreaker" },
{ name = "cryptography" },
{ name = "css-inline" },