AlaK4X
Linux lhjmq-records 5.15.0-118-generic #128-Ubuntu SMP Fri Jul 5 09:28:59 UTC 2024 x86_64



Your IP : 52.15.35.129


Current Path : /lib/python3/dist-packages/hyperlink/
Upload File :
Current File : //lib/python3/dist-packages/hyperlink/hypothesis.py

# -*- coding: utf-8 -*-
"""
Hypothesis strategies.
"""
from __future__ import absolute_import

try:
    import hypothesis

    del hypothesis
except ImportError:
    from typing import Tuple

    __all__ = ()  # type: Tuple[str, ...]
else:
    from csv import reader as csv_reader
    from os.path import dirname, join
    from string import ascii_letters, digits
    from sys import maxunicode
    from typing import (
        Callable,
        Iterable,
        List,
        Optional,
        Sequence,
        Text,
        TypeVar,
        cast,
    )
    from gzip import open as open_gzip

    from . import DecodedURL, EncodedURL

    from hypothesis import assume
    from hypothesis.strategies import (
        composite,
        integers,
        lists,
        sampled_from,
        text,
    )

    from idna import IDNAError, check_label, encode as idna_encode

    __all__ = (
        "decoded_urls",
        "encoded_urls",
        "hostname_labels",
        "hostnames",
        "idna_text",
        "paths",
        "port_numbers",
    )

    T = TypeVar("T")
    DrawCallable = Callable[[Callable[..., T]], T]

    try:
        unichr
    except NameError:  # Py3
        unichr = chr  # type: Callable[[int], Text]

    def idna_characters():
        # type: () -> Text
        """
        Returns a string containing IDNA characters.
        """
        global _idnaCharacters

        if not _idnaCharacters:
            result = []

            # Data source "IDNA Derived Properties":
            # https://www.iana.org/assignments/idna-tables-6.3.0/
            #   idna-tables-6.3.0.xhtml#idna-tables-properties
            dataFileName = join(
                dirname(__file__), "idna-tables-properties.csv.gz"
            )
            with open_gzip(dataFileName) as dataFile:
                reader = csv_reader(
                    (line.decode("utf-8") for line in dataFile),
                    delimiter=",",
                )
                next(reader)  # Skip header row
                for row in reader:
                    codes, prop, description = row

                    if prop != "PVALID":
                        # CONTEXTO or CONTEXTJ are also allowed, but they come
                        # with rules, so we're punting on those here.
                        # See: https://tools.ietf.org/html/rfc5892
                        continue

                    startEnd = row[0].split("-", 1)
                    if len(startEnd) == 1:
                        # No end of range given; use start
                        startEnd.append(startEnd[0])
                    start, end = (int(i, 16) for i in startEnd)

                    for i in range(start, end + 1):
                        if i > maxunicode:  # Happens using Py2 on Windows
                            break
                        result.append(unichr(i))

            _idnaCharacters = u"".join(result)

        return _idnaCharacters

    _idnaCharacters = ""  # type: Text

    @composite
    def idna_text(draw, min_size=1, max_size=None):
        # type: (DrawCallable, int, Optional[int]) -> Text
        """
        A strategy which generates IDNA-encodable text.

        @param min_size: The minimum number of characters in the text.
            C{None} is treated as C{0}.

        @param max_size: The maximum number of characters in the text.
            Use C{None} for an unbounded size.
        """
        alphabet = idna_characters()

        assert min_size >= 1

        if max_size is not None:
            assert max_size >= 1

        result = cast(
            Text,
            draw(text(min_size=min_size, max_size=max_size, alphabet=alphabet)),
        )

        # FIXME: There should be a more efficient way to ensure we produce
        # valid IDNA text.
        try:
            idna_encode(result)
        except IDNAError:
            assume(False)

        return result

    @composite
    def port_numbers(draw, allow_zero=False):
        # type: (DrawCallable, bool) -> int
        """
        A strategy which generates port numbers.

        @param allow_zero: Whether to allow port C{0} as a possible value.
        """
        if allow_zero:
            min_value = 0
        else:
            min_value = 1

        return cast(int, draw(integers(min_value=min_value, max_value=65535)))

    @composite
    def hostname_labels(draw, allow_idn=True):
        # type: (DrawCallable, bool) -> Text
        """
        A strategy which generates host name labels.

        @param allow_idn: Whether to allow non-ASCII characters as allowed by
            internationalized domain names (IDNs).
        """
        if allow_idn:
            label = cast(Text, draw(idna_text(min_size=1, max_size=63)))

            try:
                label.encode("ascii")
            except UnicodeEncodeError:
                # If the label doesn't encode to ASCII, then we need to check
                # the length of the label after encoding to punycode and adding
                # the xn-- prefix.
                while len(label.encode("punycode")) > 63 - len("xn--"):
                    # Rather than bombing out, just trim from the end until it
                    # is short enough, so hypothesis doesn't have to generate
                    # new data.
                    label = label[:-1]

        else:
            label = cast(
                Text,
                draw(
                    text(
                        min_size=1,
                        max_size=63,
                        alphabet=Text(ascii_letters + digits + u"-"),
                    )
                ),
            )

        # Filter invalid labels.
        # It would be better to reliably avoid generation of bogus labels in
        # the first place, but it's hard...
        try:
            check_label(label)
        except UnicodeError:  # pragma: no cover (not always drawn)
            assume(False)

        return label

    @composite
    def hostnames(draw, allow_leading_digit=True, allow_idn=True):
        # type: (DrawCallable, bool, bool) -> Text
        """
        A strategy which generates host names.

        @param allow_leading_digit: Whether to allow a leading digit in host
            names; they were not allowed prior to RFC 1123.

        @param allow_idn: Whether to allow non-ASCII characters as allowed by
            internationalized domain names (IDNs).
        """
        # Draw first label, filtering out labels with leading digits if needed
        labels = [
            cast(
                Text,
                draw(
                    hostname_labels(allow_idn=allow_idn).filter(
                        lambda l: (
                            True if allow_leading_digit else l[0] not in digits
                        )
                    )
                ),
            )
        ]
        # Draw remaining labels
        labels += cast(
            List[Text],
            draw(
                lists(
                    hostname_labels(allow_idn=allow_idn),
                    min_size=1,
                    max_size=4,
                )
            ),
        )

        # Trim off labels until the total host name length fits in 252
        # characters.  This avoids having to filter the data.
        while sum(len(label) for label in labels) + len(labels) - 1 > 252:
            labels = labels[:-1]

        return u".".join(labels)

    def path_characters():
        # type: () -> str
        """
        Returns a string containing valid URL path characters.
        """
        global _path_characters

        if _path_characters is None:

            def chars():
                # type: () -> Iterable[Text]
                for i in range(maxunicode):
                    c = unichr(i)

                    # Exclude reserved characters
                    if c in "#/?":
                        continue

                    # Exclude anything not UTF-8 compatible
                    try:
                        c.encode("utf-8")
                    except UnicodeEncodeError:
                        continue

                    yield c

            _path_characters = "".join(chars())

        return _path_characters

    _path_characters = None  # type: Optional[str]

    @composite
    def paths(draw):
        # type: (DrawCallable) -> Sequence[Text]
        return cast(
            List[Text],
            draw(
                lists(text(min_size=1, alphabet=path_characters()), max_size=10)
            ),
        )

    @composite
    def encoded_urls(draw):
        # type: (DrawCallable) -> EncodedURL
        """
        A strategy which generates L{EncodedURL}s.
        Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
        protocol-friendly URI.
        """
        port = cast(Optional[int], draw(port_numbers(allow_zero=True)))
        host = cast(Text, draw(hostnames()))
        path = cast(Sequence[Text], draw(paths()))

        if port == 0:
            port = None

        return EncodedURL(
            scheme=cast(Text, draw(sampled_from((u"http", u"https")))),
            host=host,
            port=port,
            path=path,
        )

    @composite
    def decoded_urls(draw):
        # type: (DrawCallable) -> DecodedURL
        """
        A strategy which generates L{DecodedURL}s.
        Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
        protocol-friendly URI.
        """
        return DecodedURL(draw(encoded_urls()))