Source code for range_streams.codecs.conda.stream

from __future__ import annotations

from ranges import Range

from ..zip import ZipStream

__all__ = ["CondaStream"]


[docs]class CondaStream(ZipStream):
[docs] def __init__( self, url: str, client=None, byte_range: Range | tuple[int, int] = Range("[0, 0)"), pruning_level: int = 0, single_request: bool = False, force_async: bool = False, chunk_size: int | None = None, raise_response: bool = True, scan_contents: bool = True, ): """ Set up a stream for the conda (ZIP) archive at ``url``, with either an initial range to be requested (HTTP partial content request), or if left as the empty range (default: ``Range(0,0)``) a HEAD request will be sent instead, so as to set the total size of the target file on the :attr:`~range_streams.stream.RangeStream.total_bytes` property. By default (if ``client`` is left as ``None``) a fresh :class:`httpx.Client` will be created for each stream. The ``byte_range`` can be specified as either a :class:`~ranges.Range` object, or 2-tuple of integers (``(start, end)``), interpreted either way as a half-closed interval ``[start, end)``, as given by Python's built-in :class:`range`. The ``pruning_level`` controls the policy for overlap handling (``0`` will resize overlapped ranges, ``1`` will delete overlapped ranges, and ``2`` will raise an error when a new range is added which overlaps a pre-existing range). - See docs for the :meth:`~range_streams.stream.RangeStream.handle_overlap` method for further details. Args: url : (:class:`str`) The URL of the file to be streamed client : (:class:`httpx.Client` | ``None``) The HTTPX client to use for HTTP requests byte_range : (:class:`~ranges.Range` | ``tuple[int,int]``) The range of positions on the file to be requested pruning_level : (:class:`int`) Either ``0`` ('replant'), ``1`` ('burn'), or ``2`` ('strict') single_request : (:class:`bool`) Whether to use a single GET request and just add 'windows' onto this rather than create multiple partial content requests. force_async : (:class:`bool` | ``None``) Whether to require the client to be ``httpx.AsyncClient``, and if no client is given, to create one on initialisation. (Experimental/WIP) chunk_size : (:class:`int` | ``None``) The chunk size used for the ``httpx.Response.iter_raw`` response byte iterators raise_response : (:class:`bool`) Whether to raise HTTP status code exceptions scan_contents : (:class:`bool`) Whether to scan the archive contents upon initialisation and add the archive's file ranges """ super().__init__( url=url, client=client, byte_range=byte_range, pruning_level=pruning_level, single_request=single_request, force_async=force_async, chunk_size=chunk_size, raise_response=raise_response, scan_contents=scan_contents, ) if scan_contents: self.validate_files()
[docs] def validate_files(self) -> None: """ After :attr:`zipped_files` is set (as a list of :class:`~range_streams.codecs.zip.ZippedFileInfo`), validate that they meet the specification of the ``.conda`` file format. This means: 1 ``info-...tar.zst``, 1 ``pkg-...tar.zst``, and 1 ``metadata.json``. The simplest way to uniquely identify them is to sort alphabetically by filename and check file prefixes/suffixes. """ info_tzst, meta_json, pkg_tzst = sorted( self.zipped_files, key=lambda f: f.filename ) prefixes = ["info-", "pkg-"] info_tzst_fn = info_tzst.filename pkg_tzst_fn = pkg_tzst.filename if not ( (info_tzst_fn.startswith("info-") and info_tzst_fn.endswith(".tar.zst")) and (pkg_tzst_fn.startswith("pkg-") and pkg_tzst_fn.endswith(".tar.zst")) ): raise ValueError("Invalid .conda archive") self.info_tzst = info_tzst self.meta_json = meta_json self.pkg_tzst = pkg_tzst return