From 9fb24741a1c2851551af5361202d7780a7b9c33e Mon Sep 17 00:00:00 2001 From: ducklet Date: Sat, 4 Feb 2023 18:17:13 +0100 Subject: [PATCH] remove unused sync request functions --- unwind/request.py | 193 +--------------------------------------------- 1 file changed, 1 insertion(+), 192 deletions(-) diff --git a/unwind/request.py b/unwind/request.py index 4579313..4e57564 100644 --- a/unwind/request.py +++ b/unwind/request.py @@ -4,7 +4,7 @@ import logging import os import tempfile from collections import deque -from contextlib import asynccontextmanager, contextmanager +from contextlib import asynccontextmanager from dataclasses import dataclass, field from functools import wraps from hashlib import md5 @@ -25,44 +25,14 @@ if config.debug and config.cachedir: _shared_asession = None -_shared_session = None _ASession_T = httpx.AsyncClient -_Session_T = httpx.Client _Response_T = httpx.Response _T = TypeVar("_T") _P = ParamSpec("_P") -@contextmanager -def session(): - """Return the shared request session. - - The session is shared by all request functions and provides cookie - persistence and connection pooling. - Opening the session before making a request allows you to set headers - or change the retry behavior. - """ - global _shared_session - - if _shared_session: - yield _shared_session - return - - _shared_session = _Session() - try: - yield _shared_session - finally: - _shared_session = None - - -def _Session() -> _Session_T: - s = _Session_T() - s.headers["user-agent"] = "Mozilla/5.0 Gecko/20100101 unwind/20230203" - return s - - @asynccontextmanager async def asession(): """Return the shared request session. @@ -158,50 +128,6 @@ def cache_path(req) -> Path | None: return config.cachedir / md5(sig.encode()).hexdigest() -@_throttle(1, 1, random) -def _http_get(s: _Session_T, url: str, *args, **kwds) -> _Response_T: - req = s.build_request(method="GET", url=url, *args, **kwds) - - cachefile = cache_path(req) if config.debug else None - - if cachefile: - if cachefile.exists(): - log.debug( - f"💾 loading {req.url} ({req.headers!a}) from cache {cachefile} ..." - ) - with cachefile.open() as fp: - resp = _CachedResponse(**json.load(fp)) - if 300 <= resp.status_code <= 399: - raise _RedirectError( - from_url=resp.url, to_url=resp.headers["location"], is_cached=True - ) - return cast(_Response_T, resp) - - log.debug(f"⚡️ loading {req.url} ({req.headers!a}) ...") - resp = s.send(req, follow_redirects=False, stream=True) - resp.raise_for_status() - - resp.read() # Download the response stream to allow `resp.text` access. - - if cachefile: - with cachefile.open("w") as fp: - json.dump( - { - "status_code": resp.status_code, - "text": resp.text, - "url": resp.url, - "headers": dict(resp.headers), - }, - fp, - ) - - if resp.is_redirect: - # Redirects could mean trouble, we need to stay on top of that! - raise _RedirectError(from_url=str(resp.url), to_url=resp.headers["location"]) - - return resp - - @_throttle(1, 1, random) async def _ahttp_get(s: _ASession_T, url: str, *args, **kwds) -> _Response_T: req = s.build_request(method="GET", url=url, *args, **kwds) @@ -252,15 +178,6 @@ async def _ahttp_get(s: _ASession_T, url: str, *args, **kwds) -> _Response_T: return resp -def soup_from_url(url): - """Return a BeautifulSoup instance from the contents for the given URL.""" - with session() as s: - r = _http_get(s, url) - - soup = bs4.BeautifulSoup(r.text, "html5lib") - return soup - - async def asoup_from_url(url): """Return a BeautifulSoup instance from the contents for the given URL.""" async with asession() as s: @@ -282,114 +199,6 @@ def _last_modified_from_file(path: Path) -> float: return path.stat().st_mtime -def download( - url: str, - file_path: Path | str | None = None, - *, - replace_existing: bool | None = None, - only_if_newer: bool = False, - timeout: float | None = None, - chunk_callback=None, - response_callback=None, -) -> bytes | None: - """Download a file. - - If `file_path` is `None` return the remote content, otherwise write the - content to the given file path. - Existing files will not be overwritten unless `replace_existing` is set. - Setting `only_if_newer` will check if the remote file is newer than the - local file, otherwise the download will be aborted. - """ - if replace_existing is None: - replace_existing = only_if_newer - - file_exists = None - if file_path is not None: - file_path = Path(file_path) - - file_exists = file_path.exists() and file_path.stat().st_size - if file_exists and not replace_existing: - raise FileExistsError(23, "Would replace existing file", str(file_path)) - - with session() as s: - headers = {} - if file_exists and only_if_newer: - assert file_path - file_lastmod = _last_modified_from_file(file_path) - headers["if-modified-since"] = email.utils.formatdate( - file_lastmod, usegmt=True - ) - - req = s.build_request(method="GET", url=url, headers=headers, timeout=timeout) - - log.debug("⚡️ loading %s (%s) ...", req.url, req.headers) - resp = s.send(req, follow_redirects=True, stream=True) - - if response_callback is not None: - try: - response_callback(resp) - except: - log.exception("🐛 Error in response callback.") - - log.debug("☕️ Response status: %s; headers: %s", resp.status_code, resp.headers) - - if resp.status_code == httpx.codes.NOT_MODIFIED: - log.debug("✋ Remote file has not changed, skipping download.") - return - - resp.raise_for_status() - - if file_path is None: - resp.read() # Download the response stream to allow `resp.content` access. - return resp.content - - assert replace_existing is True - - resp_lastmod = _last_modified_from_response(resp) - - # Check Last-Modified in case the server ignored If-Modified-Since. - # XXX also check Content-Length? - if file_exists and only_if_newer and resp_lastmod is not None: - assert file_lastmod - - if resp_lastmod <= file_lastmod: - log.debug("✋ Local file is newer, skipping download.") - resp.close() - return - - # Create intermediate directories if necessary. - download_dir = file_path.parent - download_dir.mkdir(parents=True, exist_ok=True) - - # Write content to temp file. - tempdir = download_dir - tempfd, tempfile_path = tempfile.mkstemp( - dir=tempdir, prefix=f".download-{file_path.name}." - ) - one_mb = 2**20 - chunk_size = 8 * one_mb - try: - log.debug("💾 Writing to temp file %s ...", tempfile_path) - for chunk in resp.iter_bytes(chunk_size): - os.write(tempfd, chunk) - if chunk_callback: - try: - chunk_callback(chunk) - except: - log.exception("🐛 Error in chunk callback.") - finally: - os.close(tempfd) - - # Move downloaded file to destination. - if file_exists: - log.debug("💾 Replacing existing file: %s", file_path) - Path(tempfile_path).replace(file_path) - - # Fix file attributes. - if resp_lastmod is not None: - os.utime(file_path, (resp_lastmod, resp_lastmod)) - - async def adownload( url: str, *,