Source code for range_streams.codecs.conda.stream

from __future__ import annotations

from ranges import Range

from ..zip import ZipStream

__all__ = ["CondaStream"]


[docs]class CondaStream(ZipStream):
[docs]    def __init__(
        self,
        url: str,
        client=None,
        byte_range: Range | tuple[int, int] = Range("[0, 0)"),
        pruning_level: int = 0,
        single_request: bool = False,
        force_async: bool = False,
        chunk_size: int | None = None,
        raise_response: bool = True,
        scan_contents: bool = True,
    ):
        """
        Set up a stream for the conda (ZIP) archive at ``url``, with either an initial
        range to be requested (HTTP partial content request), or if left
        as the empty range (default: ``Range(0,0)``) a HEAD request will
        be sent instead, so as to set the total size of the target
        file on the :attr:`~range_streams.stream.RangeStream.total_bytes`
        property.

        By default (if ``client`` is left as ``None``) a fresh
        :class:`httpx.Client` will be created for each stream.

        The ``byte_range`` can be specified as either a :class:`~ranges.Range`
        object, or 2-tuple of integers (``(start, end)``), interpreted
        either way as a half-closed interval ``[start, end)``, as given by
        Python's built-in :class:`range`.

        The ``pruning_level`` controls the policy for overlap handling
        (``0`` will resize overlapped ranges, ``1`` will delete overlapped
        ranges, and ``2`` will raise an error when a new range is added
        which overlaps a pre-existing range).

        - See docs for the
          :meth:`~range_streams.stream.RangeStream.handle_overlap`
          method for further details.

        Args:
          url            : (:class:`str`) The URL of the file to be streamed
          client         : (:class:`httpx.Client` | ``None``) The HTTPX client
                           to use for HTTP requests
          byte_range     : (:class:`~ranges.Range` | ``tuple[int,int]``) The range
                           of positions on the file to be requested
          pruning_level  : (:class:`int`) Either ``0`` ('replant'), ``1`` ('burn'),
                           or ``2`` ('strict')
          single_request : (:class:`bool`) Whether to use a single GET request and
                           just add 'windows' onto this rather than create multiple
                           partial content requests.
          force_async    : (:class:`bool` | ``None``) Whether to require the client
                           to be ``httpx.AsyncClient``, and if no client is given,
                           to create one on initialisation. (Experimental/WIP)
          chunk_size     : (:class:`int` | ``None``) The chunk size used for the
                           ``httpx.Response.iter_raw`` response byte iterators
          raise_response : (:class:`bool`) Whether to raise HTTP status code exceptions
          scan_contents  : (:class:`bool`) Whether to scan the archive contents
                           upon initialisation and add the archive's file ranges
        """
        super().__init__(
            url=url,
            client=client,
            byte_range=byte_range,
            pruning_level=pruning_level,
            single_request=single_request,
            force_async=force_async,
            chunk_size=chunk_size,
            raise_response=raise_response,
            scan_contents=scan_contents,
        )
        if scan_contents:
            self.validate_files()

[docs]    def validate_files(self) -> None:
        """
        After :attr:`zipped_files` is set (as a list of
        :class:`~range_streams.codecs.zip.ZippedFileInfo`), validate
        that they meet the specification of the ``.conda`` file format.
        This means: 1 ``info-...tar.zst``, 1 ``pkg-...tar.zst``, and 1
        ``metadata.json``. The simplest way to uniquely identify them is to sort
        alphabetically by filename and check file prefixes/suffixes.
        """
        info_tzst, meta_json, pkg_tzst = sorted(
            self.zipped_files, key=lambda f: f.filename
        )
        prefixes = ["info-", "pkg-"]
        info_tzst_fn = info_tzst.filename
        pkg_tzst_fn = pkg_tzst.filename
        if not (
            (info_tzst_fn.startswith("info-") and info_tzst_fn.endswith(".tar.zst"))
            and (pkg_tzst_fn.startswith("pkg-") and pkg_tzst_fn.endswith(".tar.zst"))
        ):
            raise ValueError("Invalid .conda archive")
        self.info_tzst = info_tzst
        self.meta_json = meta_json
        self.pkg_tzst = pkg_tzst
        return