Initial commit

2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions
--- a/venv/lib/python3.10/site-packages/rapidfuzz/distance/Postfix_py.py
+++ b/venv/lib/python3.10/site-packages/rapidfuzz/distance/Postfix_py.py
@@ -0,0 +1,182 @@
+# SPDX-License-Identifier: MIT
+# Copyright (C) 2022 Max Bachmann
+from __future__ import annotations
+
+from rapidfuzz._common_py import conv_sequences
+from rapidfuzz._utils import is_none, setupPandas
+
+
+def distance(
+    s1,
+    s2,
+    *,
+    processor=None,
+    score_cutoff=None,
+):
+    """
+    Calculates the postfix distance between two strings.
+
+    Parameters
+    ----------
+    s1 : Sequence[Hashable]
+        First string to compare.
+    s2 : Sequence[Hashable]
+        Second string to compare.
+    processor: callable, optional
+        Optional callable that is used to preprocess the strings before
+        comparing them. Default is None, which deactivates this behaviour.
+    score_cutoff : int or None, optional
+        Maximum distance between s1 and s2, that is
+        considered as a result. If the distance is bigger than score_cutoff,
+        score_cutoff + 1 is returned instead. Default is None, which deactivates
+        this behaviour.
+
+    Returns
+    -------
+    distance : int
+        distance between s1 and s2
+    """
+    if processor is not None:
+        s1 = processor(s1)
+        s2 = processor(s2)
+
+    s1, s2 = conv_sequences(s1, s2)
+    maximum = max(len(s1), len(s2))
+    sim = similarity(s1, s2)
+    dist = maximum - sim
+
+    return dist if (score_cutoff is None or dist <= score_cutoff) else score_cutoff + 1
+
+
+def similarity(
+    s1,
+    s2,
+    *,
+    processor=None,
+    score_cutoff=None,
+):
+    """
+    Calculates the postfix similarity between two strings.
+
+    This is calculated as ``len1 - distance``.
+
+    Parameters
+    ----------
+    s1 : Sequence[Hashable]
+        First string to compare.
+    s2 : Sequence[Hashable]
+        Second string to compare.
+    processor: callable, optional
+        Optional callable that is used to preprocess the strings before
+        comparing them. Default is None, which deactivates this behaviour.
+    score_cutoff : int, optional
+        Maximum distance between s1 and s2, that is
+        considered as a result. If the similarity is smaller than score_cutoff,
+        0 is returned instead. Default is None, which deactivates
+        this behaviour.
+
+    Returns
+    -------
+    distance : int
+        distance between s1 and s2
+    """
+    if processor is not None:
+        s1 = processor(s1)
+        s2 = processor(s2)
+
+    s1, s2 = conv_sequences(s1, s2)
+    sim = 0
+    for ch1, ch2 in zip(reversed(s1), reversed(s2)):
+        if ch1 != ch2:
+            break
+        sim += 1
+
+    return sim if (score_cutoff is None or sim >= score_cutoff) else 0
+
+
+def normalized_distance(
+    s1,
+    s2,
+    *,
+    processor=None,
+    score_cutoff=None,
+):
+    """
+    Calculates a normalized postfix similarity in the range [1, 0].
+
+    This is calculated as ``distance / (len1 + len2)``.
+
+    Parameters
+    ----------
+    s1 : Sequence[Hashable]
+        First string to compare.
+    s2 : Sequence[Hashable]
+        Second string to compare.
+    processor: callable, optional
+        Optional callable that is used to preprocess the strings before
+        comparing them. Default is None, which deactivates this behaviour.
+    score_cutoff : float, optional
+        Optional argument for a score threshold as a float between 0 and 1.0.
+        For norm_dist > score_cutoff 1.0 is returned instead. Default is 1.0,
+        which deactivates this behaviour.
+
+    Returns
+    -------
+    norm_dist : float
+        normalized distance between s1 and s2 as a float between 0 and 1.0
+    """
+    setupPandas()
+    if is_none(s1) or is_none(s2):
+        return 1.0
+
+    norm_sim = normalized_similarity(s1, s2, processor=processor)
+    norm_dist = 1.0 - norm_sim
+
+    return norm_dist if (score_cutoff is None or norm_dist <= score_cutoff) else 1.0
+
+
+def normalized_similarity(
+    s1,
+    s2,
+    *,
+    processor=None,
+    score_cutoff=None,
+):
+    """
+    Calculates a normalized postfix similarity in the range [0, 1].
+
+    This is calculated as ``1 - normalized_distance``
+
+    Parameters
+    ----------
+    s1 : Sequence[Hashable]
+        First string to compare.
+    s2 : Sequence[Hashable]
+        Second string to compare.
+    processor: callable, optional
+        Optional callable that is used to preprocess the strings before
+        comparing them. Default is None, which deactivates this behaviour.
+    score_cutoff : float, optional
+        Optional argument for a score threshold as a float between 0 and 1.0.
+        For norm_sim < score_cutoff 0 is returned instead. Default is 0,
+        which deactivates this behaviour.
+
+    Returns
+    -------
+    norm_sim : float
+        normalized similarity between s1 and s2 as a float between 0 and 1.0
+    """
+    setupPandas()
+    if is_none(s1) or is_none(s2):
+        return 0.0
+
+    if processor is not None:
+        s1 = processor(s1)
+        s2 = processor(s2)
+
+    s1, s2 = conv_sequences(s1, s2)
+    maximum = max(len(s1), len(s2))
+    sim = similarity(s1, s2)
+    norm_sim = sim / maximum if maximum else 1.0
+
+    return norm_sim if (score_cutoff is None or norm_sim >= score_cutoff) else 0.0