Use uv build backend

uv's build backend expects our module to be in src. See: https://docs.astral.sh/uv/concepts/build-backend/#modules
2025-07-05 22:11:38 +02:00 · 2025-07-03 14:33:57 +03:00
parent 753f3340a3
commit be550e21f1
10 changed files with 3 additions and 8 deletions
--- a/csv_metadata_quality/util.py
+++ b/csv_metadata_quality/util.py
@ -1,65 +0,0 @@
-# SPDX-License-Identifier: GPL-3.0-only
-
-
-import json
-import os
-
-from ftfy.badness import is_bad
-
-
-def is_nfc(field):
-    """Utility function to check whether a string is using normalized Unicode.
-    Python's built-in unicodedata library has the is_normalized() function, but
-    it was only introduced in Python 3.8. By using a simple utility function we
-    are able to run on Python >= 3.6 again.
-
-    See: https://docs.python.org/3/library/unicodedata.html
-
-    Return boolean.
-    """
-
-    from unicodedata import normalize
-
-    return field == normalize("NFC", field)
-
-
-def is_mojibake(field):
-    """Determines whether a string contains mojibake.
-
-    We commonly deal with CSV files that were *encoded* in UTF-8, but decoded
-    as something else like CP-1252 (Windows Latin). This manifests in the form
-    of "mojibake", for example:
-
-        - CIAT PublicaÃ§ao
-        - CIAT PublicaciÃ³n
-
-    This uses the excellent "fixes text for you" (ftfy) library to determine
-    whether a string contains characters that have been encoded in one encoding
-    and decoded in another.
-
-    Inspired by this code snippet from Martijn Pieters on StackOverflow:
-    https://stackoverflow.com/questions/29071995/identify-garbage-unicode-string-using-python
-
-    Return boolean.
-    """
-    if not is_bad(field):
-        # Nothing weird, should be okay
-        return False
-    try:
-        field.encode("sloppy-windows-1252")
-    except UnicodeEncodeError:
-        # Not CP-1252 encodable, probably fine
-        return False
-    else:
-        # Encodable as CP-1252, Mojibake alert level high
-        return True
-
-
-def load_spdx_licenses():
-    """Returns a Python list of SPDX short license identifiers."""
-
-    with open(os.path.join(os.path.dirname(__file__), "data/licenses.json")) as f:
-        licenses = json.load(f)
-
-    # List comprehension to extract the license ID for each license
-    return [license["licenseId"] for license in licenses["licenses"]]