remove unused sync request functions

This commit is contained in:
ducklet 2023-02-04 18:17:13 +01:00
parent b408fee1bc
commit 9fb24741a1

View file

@ -4,7 +4,7 @@ import logging
import os
import tempfile
from collections import deque
from contextlib import asynccontextmanager, contextmanager
from contextlib import asynccontextmanager
from dataclasses import dataclass, field
from functools import wraps
from hashlib import md5
@ -25,44 +25,14 @@ if config.debug and config.cachedir:
_shared_asession = None
_shared_session = None
_ASession_T = httpx.AsyncClient
_Session_T = httpx.Client
_Response_T = httpx.Response
_T = TypeVar("_T")
_P = ParamSpec("_P")
@contextmanager
def session():
"""Return the shared request session.
The session is shared by all request functions and provides cookie
persistence and connection pooling.
Opening the session before making a request allows you to set headers
or change the retry behavior.
"""
global _shared_session
if _shared_session:
yield _shared_session
return
_shared_session = _Session()
try:
yield _shared_session
finally:
_shared_session = None
def _Session() -> _Session_T:
s = _Session_T()
s.headers["user-agent"] = "Mozilla/5.0 Gecko/20100101 unwind/20230203"
return s
@asynccontextmanager
async def asession():
"""Return the shared request session.
@ -158,50 +128,6 @@ def cache_path(req) -> Path | None:
return config.cachedir / md5(sig.encode()).hexdigest()
@_throttle(1, 1, random)
def _http_get(s: _Session_T, url: str, *args, **kwds) -> _Response_T:
req = s.build_request(method="GET", url=url, *args, **kwds)
cachefile = cache_path(req) if config.debug else None
if cachefile:
if cachefile.exists():
log.debug(
f"💾 loading {req.url} ({req.headers!a}) from cache {cachefile} ..."
)
with cachefile.open() as fp:
resp = _CachedResponse(**json.load(fp))
if 300 <= resp.status_code <= 399:
raise _RedirectError(
from_url=resp.url, to_url=resp.headers["location"], is_cached=True
)
return cast(_Response_T, resp)
log.debug(f"⚡️ loading {req.url} ({req.headers!a}) ...")
resp = s.send(req, follow_redirects=False, stream=True)
resp.raise_for_status()
resp.read() # Download the response stream to allow `resp.text` access.
if cachefile:
with cachefile.open("w") as fp:
json.dump(
{
"status_code": resp.status_code,
"text": resp.text,
"url": resp.url,
"headers": dict(resp.headers),
},
fp,
)
if resp.is_redirect:
# Redirects could mean trouble, we need to stay on top of that!
raise _RedirectError(from_url=str(resp.url), to_url=resp.headers["location"])
return resp
@_throttle(1, 1, random)
async def _ahttp_get(s: _ASession_T, url: str, *args, **kwds) -> _Response_T:
req = s.build_request(method="GET", url=url, *args, **kwds)
@ -252,15 +178,6 @@ async def _ahttp_get(s: _ASession_T, url: str, *args, **kwds) -> _Response_T:
return resp
def soup_from_url(url):
"""Return a BeautifulSoup instance from the contents for the given URL."""
with session() as s:
r = _http_get(s, url)
soup = bs4.BeautifulSoup(r.text, "html5lib")
return soup
async def asoup_from_url(url):
"""Return a BeautifulSoup instance from the contents for the given URL."""
async with asession() as s:
@ -282,114 +199,6 @@ def _last_modified_from_file(path: Path) -> float:
return path.stat().st_mtime
def download(
url: str,
file_path: Path | str | None = None,
*,
replace_existing: bool | None = None,
only_if_newer: bool = False,
timeout: float | None = None,
chunk_callback=None,
response_callback=None,
) -> bytes | None:
"""Download a file.
If `file_path` is `None` return the remote content, otherwise write the
content to the given file path.
Existing files will not be overwritten unless `replace_existing` is set.
Setting `only_if_newer` will check if the remote file is newer than the
local file, otherwise the download will be aborted.
"""
if replace_existing is None:
replace_existing = only_if_newer
file_exists = None
if file_path is not None:
file_path = Path(file_path)
file_exists = file_path.exists() and file_path.stat().st_size
if file_exists and not replace_existing:
raise FileExistsError(23, "Would replace existing file", str(file_path))
with session() as s:
headers = {}
if file_exists and only_if_newer:
assert file_path
file_lastmod = _last_modified_from_file(file_path)
headers["if-modified-since"] = email.utils.formatdate(
file_lastmod, usegmt=True
)
req = s.build_request(method="GET", url=url, headers=headers, timeout=timeout)
log.debug("⚡️ loading %s (%s) ...", req.url, req.headers)
resp = s.send(req, follow_redirects=True, stream=True)
if response_callback is not None:
try:
response_callback(resp)
except:
log.exception("🐛 Error in response callback.")
log.debug("☕️ Response status: %s; headers: %s", resp.status_code, resp.headers)
if resp.status_code == httpx.codes.NOT_MODIFIED:
log.debug("✋ Remote file has not changed, skipping download.")
return
resp.raise_for_status()
if file_path is None:
resp.read() # Download the response stream to allow `resp.content` access.
return resp.content
assert replace_existing is True
resp_lastmod = _last_modified_from_response(resp)
# Check Last-Modified in case the server ignored If-Modified-Since.
# XXX also check Content-Length?
if file_exists and only_if_newer and resp_lastmod is not None:
assert file_lastmod
if resp_lastmod <= file_lastmod:
log.debug("✋ Local file is newer, skipping download.")
resp.close()
return
# Create intermediate directories if necessary.
download_dir = file_path.parent
download_dir.mkdir(parents=True, exist_ok=True)
# Write content to temp file.
tempdir = download_dir
tempfd, tempfile_path = tempfile.mkstemp(
dir=tempdir, prefix=f".download-{file_path.name}."
)
one_mb = 2**20
chunk_size = 8 * one_mb
try:
log.debug("💾 Writing to temp file %s ...", tempfile_path)
for chunk in resp.iter_bytes(chunk_size):
os.write(tempfd, chunk)
if chunk_callback:
try:
chunk_callback(chunk)
except:
log.exception("🐛 Error in chunk callback.")
finally:
os.close(tempfd)
# Move downloaded file to destination.
if file_exists:
log.debug("💾 Replacing existing file: %s", file_path)
Path(tempfile_path).replace(file_path)
# Fix file attributes.
if resp_lastmod is not None:
os.utime(file_path, (resp_lastmod, resp_lastmod))
async def adownload(
url: str,
*,