2024-11-28 16:48:20 +01:00
14 changed files with 172 additions and 191 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

-## [0.6.0] = 2022-09-02
+## Unreleased
 ### Changed
 - Perform fix for "unnecessary" Unicode characters after we try to fix encoding
 issues with ftfy
@ -13,7 +13,6 @@ issues with ftfy

 ### Updated
 - Python dependencies
- Metadatata field exclude logic

 ### Added
 - Ability to drop invalid AGROVOC values with `-d` when checking AGROVOC values
@ -21,10 +20,6 @@ with `-a <field.name>`
 - Ability to add missing UN M.49 regions when both country and region columns
 are present. Enable with `-u` (unsafe fixes) for now.

-### Removed
- Support for reading Excel files (both `.xls` and `.xlsx`) as it was completely
-untested
-
 ## [0.5.0] - 2021-12-08
 ### Added
 - Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy)
--- a/README.md
+++ b/README.md
@ -8,7 +8,7 @@

 A simple, but opinionated metadata quality checker and fixer designed to work with CSVs in the DSpace ecosystem (though it could theoretically work on any CSV that uses Dublin Core fields as columns). The implementation is essentially a pipeline of checks and fixes that begins with splitting multi-value fields on the standard DSpace "||" separator, trimming leading/trailing whitespace, and then proceeding to more specialized cases like ISSNs, ISBNs, languages, unnecessary Unicode, AGROVOC terms, etc.

-Requires Python 3.8 or greater. CSV support comes from the [Pandas](https://pandas.pydata.org/) library.
+Requires Python 3.8 or greater. CSV and Excel support comes from the [Pandas](https://pandas.pydata.org/) library, though your mileage may vary with Excel because this is much less tested.

 If you use the DSpace CSV metadata quality checker please cite:

--- a/csv_metadata_quality/app.py
+++ b/csv_metadata_quality/app.py
@ -36,7 +36,7 @@ def parse_args(argv):
    parser.add_argument(
        "--input-file",
        "-i",
-        help="Path to input file. Must be a UTF-8 CSV.",
+        help="Path to input file. Can be UTF-8 CSV or Excel XLSX.",
        required=True,
        type=argparse.FileType("r", encoding="UTF-8"),
    )
@ -76,16 +76,16 @@ def run(argv):
    # Read all fields as strings so dates don't get converted from 1998 to 1998.0
    df = pd.read_csv(args.input_file, dtype=str)

+    for column in df.columns:
        # Check if the user requested to skip any fields
        if args.exclude_fields:
-        # Split the list of excluded fields on ',' into a list. Note that the
-        # user should be careful to no include spaces here.
-        exclude = args.exclude_fields.split(",")
-    else:
-        exclude = list()
-
-    for column in df.columns:
-        if column in exclude:
+            skip = False
+            # Split the list of excludes on ',' so we can test exact matches
+            # rather than fuzzy matches with regexes or "if word in string"
+            for exclude in args.exclude_fields.split(","):
+                if column == exclude and skip is False:
+                    skip = True
+            if skip:
                print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")

                continue
@ -200,22 +200,20 @@ def run(argv):
    # should rename column in this for loop...
    for column in df_transposed.columns:
        # Check: citation DOI
-        check.citation_doi(df_transposed[column], exclude)
+        check.citation_doi(df_transposed[column])

        # Check: title in citation
-        check.title_in_citation(df_transposed[column], exclude)
+        check.title_in_citation(df_transposed[column])

        if args.unsafe_fixes:
            # Fix: countries match regions
-            df_transposed[column] = fix.countries_match_regions(
-                df_transposed[column], exclude
-            )
+            df_transposed[column] = fix.countries_match_regions(df_transposed[column])
        else:
            # Check: countries match regions
-            check.countries_match_regions(df_transposed[column], exclude)
+            check.countries_match_regions(df_transposed[column])

        if args.experimental_checks:
-            experimental.correct_language(df_transposed[column], exclude)
+            experimental.correct_language(df_transposed[column])

    # Transpose the DataFrame back before writing. This is probably wasteful to
    # do every time since we technically only need to do it if we've done the
--- a/csv_metadata_quality/check.py
+++ b/csv_metadata_quality/check.py
@ -391,20 +391,13 @@ def mojibake(field, field_name):
    return


-def citation_doi(row, exclude):
+def citation_doi(row):
    """Check for the scenario where an item has a DOI listed in its citation,
    but does not have a cg.identifier.doi field.

    Function prints a warning if the DOI field is missing, but there is a DOI
    in the citation.
    """
-    # Check if the user requested us to skip any DOI fields so we can
-    # just return before going any further.
-    for field in exclude:
-        match = re.match(r"^.*?doi.*$", field)
-        if match is not None:
-            return
-
    # Initialize some variables at global scope so that we can set them in the
    # loop scope below and still be able to access them afterwards.
    citation = ""
@ -422,10 +415,9 @@ def citation_doi(row, exclude):
        if match is not None:
            return

-        # Check if the current label is a citation field and make sure the user
-        # hasn't asked to skip it. If not, then set the citation.
+        # Get the name of the citation field
        match = re.match(r"^.*?[cC]itation.*$", label)
-        if match is not None and label not in exclude:
+        if match is not None:
            citation = row[label]

    if citation != "":
@ -441,7 +433,7 @@ def citation_doi(row, exclude):
    return


-def title_in_citation(row, exclude):
+def title_in_citation(row):
    """Check for the scenario where an item's title is missing from its cita-
    tion. This could mean that it is missing entirely, or perhaps just exists
    in a different format (whitespace, accents, etc).
@ -463,12 +455,12 @@ def title_in_citation(row, exclude):

        # Find the name of the title column
        match = re.match(r"^(dc|dcterms)\.title.*$", label)
-        if match is not None and label not in exclude:
+        if match is not None:
            title = row[label]

        # Find the name of the citation column
        match = re.match(r"^.*?[cC]itation.*$", label)
-        if match is not None and label not in exclude:
+        if match is not None:
            citation = row[label]

    if citation != "":
@ -478,7 +470,7 @@ def title_in_citation(row, exclude):
    return


-def countries_match_regions(row, exclude):
+def countries_match_regions(row):
    """Check for the scenario where an item has country coverage metadata, but
    does not have the corresponding region metadata. For example, an item that
    has country coverage "Kenya" should also have region "Eastern Africa" acc-
@ -522,12 +514,6 @@ def countries_match_regions(row, exclude):
        if match is not None:
            title_column_name = label

-    # Make sure the user has not asked to exclude any metadata fields. If so, we
-    # should return immediately.
-    column_names = [country_column_name, region_column_name, title_column_name]
-    if any(field in column_names for field in exclude):
-        return
-
    # Make sure we found the country and region columns
    if country_column_name != "" and region_column_name != "":
        # If we don't have any countries then we should return early before
--- a/csv_metadata_quality/experimental.py
+++ b/csv_metadata_quality/experimental.py
@ -8,7 +8,7 @@ from colorama import Fore
 from pycountry import languages


-def correct_language(row, exclude):
+def correct_language(row):
    """Analyze the text used in the title, abstract, and citation fields to pre-
    dict the language being used and compare it with the item's dc.language.iso
    field.
@ -39,8 +39,7 @@ def correct_language(row, exclude):

            language = row[label]

-        # Extract title if it is present (note that we don't allow excluding
-        # the title here because it complicates things).
+        # Extract title if it is present
        match = re.match(r"^.*?title.*$", label)
        if match is not None:
            title = row[label]
@ -49,12 +48,12 @@ def correct_language(row, exclude):

        # Extract abstract if it is present
        match = re.match(r"^.*?abstract.*$", label)
-        if match is not None and label not in exclude:
+        if match is not None:
            sample_strings.append(row[label])

        # Extract citation if it is present
        match = re.match(r"^.*?[cC]itation.*$", label)
-        if match is not None and label not in exclude:
+        if match is not None:
            sample_strings.append(row[label])

    # Make sure language is not blank and is valid ISO 639-1/639-3 before proceeding with language prediction
--- a/csv_metadata_quality/fix.py
+++ b/csv_metadata_quality/fix.py
@ -293,7 +293,7 @@ def mojibake(field, field_name):
        return field


-def countries_match_regions(row, exclude):
+def countries_match_regions(row):
    """Check for the scenario where an item has country coverage metadata, but
    does not have the corresponding region metadata. For example, an item that
    has country coverage "Kenya" should also have region "Eastern Africa" acc-
@ -337,12 +337,6 @@ def countries_match_regions(row, exclude):
        if match is not None:
            title_column_name = label

-    # Make sure the user has not asked to exclude any metadata fields. If so, we
-    # should return immediately.
-    column_names = [country_column_name, region_column_name, title_column_name]
-    if any(field in column_names for field in exclude):
-        return row
-
    # Make sure we found the country and region columns
    if country_column_name != "" and region_column_name != "":
        # If we don't have any countries then we should return early before
--- a/csv_metadata_quality/version.py
+++ b/csv_metadata_quality/version.py
@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-3.0-only

-VERSION = "0.6.0"
+VERSION = "0.6.0-dev"
--- a/poetry.lock
+++ b/poetry.lock
@ -74,6 +74,14 @@ category = "main"
 optional = false
 python-versions = "*"

+[[package]]
+name = "atomicwrites"
+version = "1.4.1"
+description = "Atomic file writes."
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
 [[package]]
 name = "attrs"
 version = "22.1.0"
@ -241,16 +249,16 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "flake8"
-version = "5.0.4"
+version = "4.0.1"
 description = "the modular source code checker: pep8 pyflakes and co"
 category = "dev"
 optional = false
-python-versions = ">=3.6.1"
+python-versions = ">=3.6"

 [package.dependencies]
-mccabe = ">=0.7.0,<0.8.0"
-pycodestyle = ">=2.9.0,<2.10.0"
-pyflakes = ">=2.5.0,<2.6.0"
+mccabe = ">=0.6.0,<0.7.0"
+pycodestyle = ">=2.8.0,<2.9.0"
+pyflakes = ">=2.4.0,<2.5.0"

 [[package]]
 name = "ftfy"
@ -275,7 +283,7 @@ python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 name = "greenlet"
 version = "1.1.3"
 description = "Lightweight in-process concurrent programming"
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"

@ -347,11 +355,11 @@ six = ">=1.6.1"

 [[package]]
 name = "mccabe"
-version = "0.7.0"
+version = "0.6.1"
 description = "McCabe checker, plugin for flake8"
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = "*"

 [[package]]
 name = "mypy-extensions"
@ -433,7 +441,7 @@ future = "*"

 [[package]]
 name = "pathspec"
-version = "0.10.1"
+version = "0.10.0"
 description = "Utility library for gitignore style pattern matching of file paths."
 category = "dev"
 optional = false
@ -481,11 +489,11 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"

 [[package]]
 name = "pycodestyle"
-version = "2.9.1"
+version = "2.8.0"
 description = "Python style guide checker"
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"

 [[package]]
 name = "pycountry"
@ -500,11 +508,11 @@ setuptools = "*"

 [[package]]
 name = "pyflakes"
-version = "2.5.0"
+version = "2.4.0"
 description = "passive checker of Python programs"
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"

 [[package]]
 name = "Pygments"
@ -530,23 +538,24 @@ diagrams = ["jinja2", "railroad-diagrams"]

 [[package]]
 name = "pytest"
-version = "7.1.3"
+version = "6.2.5"
 description = "pytest: simple powerful testing with Python"
 category = "dev"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.6"

 [package.dependencies]
+atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
 attrs = ">=19.2.0"
 colorama = {version = "*", markers = "sys_platform == \"win32\""}
 iniconfig = "*"
 packaging = "*"
 pluggy = ">=0.12,<2.0"
 py = ">=1.8.2"
-tomli = ">=1.0.0"
+toml = "*"

 [package.extras]
-testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
+testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]

 [[package]]
 name = "pytest-clarity"
@ -707,31 +716,30 @@ python-versions = "*"

 [[package]]
 name = "SQLAlchemy"
-version = "1.4.40"
+version = "1.4.22"
 description = "Database Abstraction Library"
-category = "dev"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"

 [package.dependencies]
-greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"}
+greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\""}

 [package.extras]
 aiomysql = ["aiomysql", "greenlet (!=0.4.17)"]
-aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"]
+aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)"]
 asyncio = ["greenlet (!=0.4.17)"]
-asyncmy = ["asyncmy (>=0.2.3,!=0.2.4)", "greenlet (!=0.4.17)"]
-mariadb_connector = ["mariadb (>=1.0.1,!=1.1.2)"]
+mariadb_connector = ["mariadb (>=1.0.1)"]
 mssql = ["pyodbc"]
 mssql_pymssql = ["pymssql"]
 mssql_pyodbc = ["pyodbc"]
-mypy = ["mypy (>=0.910)", "sqlalchemy2-stubs"]
+mypy = ["mypy (>=0.800)", "sqlalchemy2-stubs"]
 mysql = ["mysqlclient (>=1.4.0)", "mysqlclient (>=1.4.0,<2)"]
-mysql_connector = ["mysql-connector-python"]
+mysql_connector = ["mysqlconnector"]
 oracle = ["cx_oracle (>=7)", "cx_oracle (>=7,<8)"]
 postgresql = ["psycopg2 (>=2.7)"]
 postgresql_asyncpg = ["asyncpg", "greenlet (!=0.4.17)"]
-postgresql_pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"]
+postgresql_pg8000 = ["pg8000 (>=1.16.6)"]
 postgresql_psycopg2binary = ["psycopg2-binary"]
 postgresql_psycopg2cffi = ["psycopg2cffi"]
 pymysql = ["pymysql", "pymysql (<1)"]
@ -745,6 +753,14 @@ category = "dev"
 optional = false
 python-versions = "*"

+[[package]]
+name = "toml"
+version = "0.10.2"
+description = "Python Library for Tom's Obvious, Minimal Language"
+category = "dev"
+optional = false
+python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
+
 [[package]]
 name = "tomli"
 version = "2.0.1"
@ -795,21 +811,16 @@ python-versions = "*"

 [[package]]
 name = "xlrd"
-version = "2.0.1"
-description = "Library for developers to extract data from Microsoft Excel (tm) .xls spreadsheet files"
-category = "dev"
+version = "1.2.0"
+description = "Library for developers to extract data from Microsoft Excel (tm) spreadsheet files"
+category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
-
-[package.extras]
-build = ["twine", "wheel"]
-docs = ["sphinx"]
-test = ["pytest", "pytest-cov"]
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"

 [metadata]
 lock-version = "1.1"
 python-versions = "^3.8"
-content-hash = "266a3911a7403fb9fe468626e179c919143d6efc26cab0adad69bd64c9e2a06f"
+content-hash = "71d3e50c7f44aa2e1d800e31b198cb8f614c4b0c31fdde1c20eae26eeaef646d"

 [metadata.files]
 agate = [
@ -832,6 +843,9 @@ appdirs = [
    {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"},
    {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"},
 ]
+atomicwrites = [
+    {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"},
+]
 attrs = [
    {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"},
    {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"},
@ -909,8 +923,8 @@ exceptiongroup = [
    {file = "exceptiongroup-1.0.0rc9.tar.gz", hash = "sha256:9086a4a21ef9b31c72181c77c040a074ba0889ee56a7b289ff0afb0d97655f96"},
 ]
 flake8 = [
-    {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"},
-    {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"},
+    {file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"},
+    {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"},
 ]
 ftfy = [
    {file = "ftfy-6.1.1-py3-none-any.whl", hash = "sha256:0ffd33fce16b54cccaec78d6ec73d95ad370e5df5a25255c8966a6147bd667ca"},
@ -999,8 +1013,8 @@ leather = [
    {file = "leather-0.3.4.tar.gz", hash = "sha256:b43e21c8fa46b2679de8449f4d953c06418666dc058ce41055ee8a8d3bb40918"},
 ]
 mccabe = [
-    {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
-    {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
+    {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"},
+    {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"},
 ]
 mypy-extensions = [
    {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
@ -1075,8 +1089,8 @@ parsedatetime = [
    {file = "parsedatetime-2.4.tar.gz", hash = "sha256:3d817c58fb9570d1eec1dd46fa9448cd644eeed4fb612684b02dfda3a79cb84b"},
 ]
 pathspec = [
-    {file = "pathspec-0.10.1-py3-none-any.whl", hash = "sha256:46846318467efc4556ccfd27816e004270a9eeeeb4d062ce5e6fc7a87c573f93"},
-    {file = "pathspec-0.10.1.tar.gz", hash = "sha256:7ace6161b621d31e7902eb6b5ae148d12cfd23f4a249b9ffb6b9fee12084323d"},
+    {file = "pathspec-0.10.0-py3-none-any.whl", hash = "sha256:aefa80ac32d5bf1f96139dca67cefb69a431beff4e6bf1168468f37d7ab87015"},
+    {file = "pathspec-0.10.0.tar.gz", hash = "sha256:01eecd304ba0e6eeed188ae5fa568e99ef10265af7fd9ab737d6412b4ee0ab85"},
 ]
 platformdirs = [
    {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"},
@ -1095,15 +1109,15 @@ py = [
    {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
 ]
 pycodestyle = [
-    {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"},
-    {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"},
+    {file = "pycodestyle-2.8.0-py2.py3-none-any.whl", hash = "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20"},
+    {file = "pycodestyle-2.8.0.tar.gz", hash = "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"},
 ]
 pycountry = [
    {file = "pycountry-22.3.5.tar.gz", hash = "sha256:b2163a246c585894d808f18783e19137cb70a0c18fb36748dc01fc6f109c1646"},
 ]
 pyflakes = [
-    {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"},
-    {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"},
+    {file = "pyflakes-2.4.0-py2.py3-none-any.whl", hash = "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"},
+    {file = "pyflakes-2.4.0.tar.gz", hash = "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c"},
 ]
 Pygments = [
    {file = "Pygments-2.13.0-py3-none-any.whl", hash = "sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42"},
@ -1114,8 +1128,8 @@ pyparsing = [
    {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
 ]
 pytest = [
-    {file = "pytest-7.1.3-py3-none-any.whl", hash = "sha256:1377bda3466d70b55e3f5cecfa55bb7cfcf219c7964629b967c37cf0bda818b7"},
-    {file = "pytest-7.1.3.tar.gz", hash = "sha256:4f365fec2dff9c1162f834d9f18af1ba13062db0c708bf7b946f8a5c76180c39"},
+    {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"},
+    {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"},
 ]
 pytest-clarity = [
    {file = "pytest-clarity-1.0.1.tar.gz", hash = "sha256:505fe345fad4fe11c6a4187fe683f2c7c52c077caa1e135f3e483fe112db7772"},
@ -1165,47 +1179,45 @@ spdx-license-list = [
    {file = "spdx_license_list-0.5.2.tar.gz", hash = "sha256:952996f72ab807972dc2278bb9b91e5294767211e51f09aad9c0e2ff5b82a31b"},
 ]
 SQLAlchemy = [
-    {file = "SQLAlchemy-1.4.40-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:b07fc38e6392a65935dc8b486229679142b2ea33c94059366b4d8b56f1e35a97"},
-    {file = "SQLAlchemy-1.4.40-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fb4edb6c354eac0fcc07cb91797e142f702532dbb16c1d62839d6eec35f814cf"},
-    {file = "SQLAlchemy-1.4.40-cp27-cp27m-win32.whl", hash = "sha256:2026632051a93997cf8f6fda14360f99230be1725b7ab2ef15be205a4b8a5430"},
-    {file = "SQLAlchemy-1.4.40-cp27-cp27m-win_amd64.whl", hash = "sha256:f2aa85aebc0ef6b342d5d3542f969caa8c6a63c8d36cf5098769158a9fa2123c"},
-    {file = "SQLAlchemy-1.4.40-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0b9e3d81f86ba04007f0349e373a5b8c81ec2047aadb8d669caf8c54a092461"},
-    {file = "SQLAlchemy-1.4.40-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:1ab08141d93de83559f6a7d9a962830f918623a885b3759ec2b9d1a531ff28fe"},
-    {file = "SQLAlchemy-1.4.40-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00dd998b43b282c71de46b061627b5edb9332510eb1edfc5017b9e4356ed44ea"},
-    {file = "SQLAlchemy-1.4.40-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:bb342c0e25cc8f78a0e7c692da3b984f072666b316fbbec2a0e371cb4dfef5f0"},
-    {file = "SQLAlchemy-1.4.40-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23b693876ac7963b6bc7b1a5f3a2642f38d2624af834faad5933913928089d1b"},
-    {file = "SQLAlchemy-1.4.40-cp310-cp310-win32.whl", hash = "sha256:2cf50611ef4221ad587fb7a1708e61ff72966f84330c6317642e08d6db4138fd"},
-    {file = "SQLAlchemy-1.4.40-cp310-cp310-win_amd64.whl", hash = "sha256:26ee4dbac5dd7abf18bf3cd8f04e51f72c339caf702f68172d308888cd26c6c9"},
-    {file = "SQLAlchemy-1.4.40-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:b41b87b929118838bafc4bb18cf3c5cd1b3be4b61cd9042e75174df79e8ac7a2"},
-    {file = "SQLAlchemy-1.4.40-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:885e11638946472b4a0a7db8e6df604b2cf64d23dc40eedc3806d869fcb18fae"},
-    {file = "SQLAlchemy-1.4.40-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b7ff0a8bf0aec1908b92b8dfa1246128bf4f94adbdd3da6730e9c542e112542d"},
-    {file = "SQLAlchemy-1.4.40-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfa8ab4ba0c97ab6bcae1f0948497d14c11b6c6ecd1b32b8a79546a0823d8211"},
-    {file = "SQLAlchemy-1.4.40-cp36-cp36m-win32.whl", hash = "sha256:d259fa08e4b3ed952c01711268bcf6cd2442b0c54866d64aece122f83da77c6d"},
-    {file = "SQLAlchemy-1.4.40-cp36-cp36m-win_amd64.whl", hash = "sha256:c8d974c991eef0cd29418a5957ae544559dc326685a6f26b3a914c87759bf2f4"},
-    {file = "SQLAlchemy-1.4.40-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:28b1791a30d62fc104070965f1a2866699c45bbf5adc0be0cf5f22935edcac58"},
-    {file = "SQLAlchemy-1.4.40-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7ccdca6cd167611f4a62a8c2c0c4285c2535640d77108f782ce3f3cccb70f3a"},
-    {file = "SQLAlchemy-1.4.40-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:69deec3a94de10062080d91e1ba69595efeafeafe68b996426dec9720031fb25"},
-    {file = "SQLAlchemy-1.4.40-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63ad778f4e80913fb171247e4fa82123d0068615ae1d51a9791fc4284cb81748"},
-    {file = "SQLAlchemy-1.4.40-cp37-cp37m-win32.whl", hash = "sha256:9ced2450c9fd016f9232d976661623e54c450679eeefc7aa48a3d29924a63189"},
-    {file = "SQLAlchemy-1.4.40-cp37-cp37m-win_amd64.whl", hash = "sha256:cdee4d475e35684d210dc6b430ff8ca2ed0636378ac19b457e2f6f350d1f5acc"},
-    {file = "SQLAlchemy-1.4.40-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:08b47c971327e733ffd6bae2d4f50a7b761793efe69d41067fcba86282819eea"},
-    {file = "SQLAlchemy-1.4.40-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cf03d37819dc17a388d313919daf32058d19ba1e592efdf14ce8cbd997e6023"},
-    {file = "SQLAlchemy-1.4.40-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a62c0ecbb9976550f26f7bf75569f425e661e7249349487f1483115e5fc893a6"},
-    {file = "SQLAlchemy-1.4.40-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ec440990ab00650d0c7ea2c75bc225087afdd7ddcb248e3d934def4dff62762"},
-    {file = "SQLAlchemy-1.4.40-cp38-cp38-win32.whl", hash = "sha256:2b64955850a14b9d481c17becf0d3f62fb1bb31ac2c45c2caf5ad06d9e811187"},
-    {file = "SQLAlchemy-1.4.40-cp38-cp38-win_amd64.whl", hash = "sha256:959bf4390766a8696aa01285016c766b4eb676f712878aac5fce956dd49695d9"},
-    {file = "SQLAlchemy-1.4.40-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:0992f3cc640ec0f88f721e426da884c34ff0a60eb73d3d64172e23dfadfc8a0b"},
-    {file = "SQLAlchemy-1.4.40-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa9e0d7832b7511b3b3fd0e67fac85ff11fd752834c143ca2364c9b778c0485a"},
-    {file = "SQLAlchemy-1.4.40-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c9d0f1a9538cc5e75f2ea0cb6c3d70155a1b7f18092c052e0d84105622a41b63"},
-    {file = "SQLAlchemy-1.4.40-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c956a5d1adb49a35d78ef0fae26717afc48a36262359bb5b0cbd7a3a247c26f"},
-    {file = "SQLAlchemy-1.4.40-cp39-cp39-win32.whl", hash = "sha256:6b70d02bbe1adbbf715d2249cacf9ac17c6f8d22dfcb3f1a4fbc5bf64364da8a"},
-    {file = "SQLAlchemy-1.4.40-cp39-cp39-win_amd64.whl", hash = "sha256:bf073c619b5a7f7cd731507d0fdc7329bee14b247a63b0419929e4acd24afea8"},
-    {file = "SQLAlchemy-1.4.40.tar.gz", hash = "sha256:44a660506080cc975e1dfa5776fe5f6315ddc626a77b50bf0eee18b0389ea265"},
+    {file = "SQLAlchemy-1.4.22-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:488608953385d6c127d2dcbc4b11f8d7f2f30b89f6bd27c01b042253d985cc2f"},
+    {file = "SQLAlchemy-1.4.22-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:5d856cc50fd26fc8dd04892ed5a5a3d7eeb914fea2c2e484183e2d84c14926e0"},
+    {file = "SQLAlchemy-1.4.22-cp27-cp27m-win32.whl", hash = "sha256:a00d9c6d3a8afe1d1681cd8a5266d2f0ed684b0b44bada2ca82403b9e8b25d39"},
+    {file = "SQLAlchemy-1.4.22-cp27-cp27m-win_amd64.whl", hash = "sha256:5908ea6c652a050d768580d01219c98c071e71910ab8e7b42c02af4010608397"},
+    {file = "SQLAlchemy-1.4.22-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b7fb937c720847879c7402fe300cfdb2aeff22349fa4ea3651bca4e2d6555939"},
+    {file = "SQLAlchemy-1.4.22-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:9bfe882d5a1bbde0245dca0bd48da0976bd6634cf2041d2fdf0417c5463e40e5"},
+    {file = "SQLAlchemy-1.4.22-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eedd76f135461cf237534a6dc0d1e0f6bb88a1dc193678fab48a11d223462da5"},
+    {file = "SQLAlchemy-1.4.22-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6a16c7c4452293da5143afa3056680db2d187b380b3ef4d470d4e29885720de3"},
+    {file = "SQLAlchemy-1.4.22-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44d23ea797a5e0be71bc5454b9ae99158ea0edc79e2393c6e9a2354de88329c0"},
+    {file = "SQLAlchemy-1.4.22-cp36-cp36m-win32.whl", hash = "sha256:a5e14cb0c0a4ac095395f24575a0e7ab5d1be27f5f9347f1762f21505e3ba9f1"},
+    {file = "SQLAlchemy-1.4.22-cp36-cp36m-win_amd64.whl", hash = "sha256:bc34a007e604091ca3a4a057525efc4cefd2b7fe970f44d20b9cfa109ab1bddb"},
+    {file = "SQLAlchemy-1.4.22-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:756f5d2f5b92d27450167247fb574b09c4cd192a3f8c2e493b3e518a204ee543"},
+    {file = "SQLAlchemy-1.4.22-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fcbb4b4756b250ed19adc5e28c005b8ed56fdb5c21efa24c6822c0575b4964d"},
+    {file = "SQLAlchemy-1.4.22-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:09dbb4bc01a734ccddbf188deb2a69aede4b3c153a72b6d5c6900be7fb2945b1"},
+    {file = "SQLAlchemy-1.4.22-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f028ef6a1d828bc754852a022b2160e036202ac8658a6c7d34875aafd14a9a15"},
+    {file = "SQLAlchemy-1.4.22-cp37-cp37m-win32.whl", hash = "sha256:68393d3fd31469845b6ba11f5b4209edbea0b58506be0e077aafbf9aa2e21e11"},
+    {file = "SQLAlchemy-1.4.22-cp37-cp37m-win_amd64.whl", hash = "sha256:891927a49b2363a4199763a9d436d97b0b42c65922a4ea09025600b81a00d17e"},
+    {file = "SQLAlchemy-1.4.22-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:fd2102a8f8a659522719ed73865dff3d3cc76eb0833039dc473e0ad3041d04be"},
+    {file = "SQLAlchemy-1.4.22-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4014978de28163cd8027434916a92d0f5bb1a3a38dff5e8bf8bff4d9372a9117"},
+    {file = "SQLAlchemy-1.4.22-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f814d80844969b0d22ea63663da4de5ca1c434cfbae226188901e5d368792c17"},
+    {file = "SQLAlchemy-1.4.22-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d09a760b0a045b4d799102ae7965b5491ccf102123f14b2a8cc6c01d1021a2d9"},
+    {file = "SQLAlchemy-1.4.22-cp38-cp38-win32.whl", hash = "sha256:26daa429f039e29b1e523bf763bfab17490556b974c77b5ca7acb545b9230e9a"},
+    {file = "SQLAlchemy-1.4.22-cp38-cp38-win_amd64.whl", hash = "sha256:12bac5fa1a6ea870bdccb96fe01610641dd44ebe001ed91ef7fcd980e9702db5"},
+    {file = "SQLAlchemy-1.4.22-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:39b5d36ab71f73c068cdcf70c38075511de73616e6c7fdd112d6268c2704d9f5"},
+    {file = "SQLAlchemy-1.4.22-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5102b9face693e8b2db3b2539c7e1a5d9a5b4dc0d79967670626ffd2f710d6e6"},
+    {file = "SQLAlchemy-1.4.22-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c9373ef67a127799027091fa53449125351a8c943ddaa97bec4e99271dbb21f4"},
+    {file = "SQLAlchemy-1.4.22-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36a089dc604032d41343d86290ce85d4e6886012eea73faa88001260abf5ff81"},
+    {file = "SQLAlchemy-1.4.22-cp39-cp39-win32.whl", hash = "sha256:b48148ceedfb55f764562e04c00539bb9ea72bf07820ca15a594a9a049ff6b0e"},
+    {file = "SQLAlchemy-1.4.22-cp39-cp39-win_amd64.whl", hash = "sha256:1fdae7d980a2fa617d119d0dc13ecb5c23cc63a8b04ffcb5298f2c59d86851e9"},
+    {file = "SQLAlchemy-1.4.22.tar.gz", hash = "sha256:ec1be26cdccd60d180359a527d5980d959a26269a2c7b1b327a1eea0cab37ed8"},
 ]
 text-unidecode = [
    {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"},
    {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"},
 ]
+toml = [
+    {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
+    {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
+]
 tomli = [
    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
@ -1227,6 +1239,6 @@ wcwidth = [
    {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"},
 ]
 xlrd = [
-    {file = "xlrd-2.0.1-py2.py3-none-any.whl", hash = "sha256:6a33ee89877bd9abc1158129f6e94be74e2679636b8a205b43b85206c3f0bbdd"},
-    {file = "xlrd-2.0.1.tar.gz", hash = "sha256:f72f148f54442c6b056bf931dbc34f986fd0c3b0b6b5a58d013c9aef274d0c88"},
+    {file = "xlrd-1.2.0-py2.py3-none-any.whl", hash = "sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde"},
+    {file = "xlrd-1.2.0.tar.gz", hash = "sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2"},
 ]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "csv-metadata-quality"
-version = "0.6.0"
+version = "0.6.0-dev"
 description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
 authors = ["Alan Orth <alan.orth@gmail.com>"]
 license="GPL-3.0-only"
@ -14,22 +14,24 @@ csv-metadata-quality = 'csv_metadata_quality.__main__:main'
 python = "^3.8"
 pandas = "^1.4.0"
 python-stdnum = "^1.13"
-requests = "^2.28.1"
-requests-cache = "^0.9.6"
-pycountry = "^22.3.5"
+xlrd = "^1.2.0"
+requests = "^2.27.1"
+requests-cache = "^0.9.1"
+pycountry = "^22.1.10"
 langid = "^1.1.6"
-colorama = "^0.4.5"
+colorama = "^0.4.4"
 spdx-license-list = "^0.5.2"
-ftfy = "^6.1.1"
-country-converter = "^0.7.7"
+ftfy = "^6.0"
+SQLAlchemy = ">=1.3.3,<1.4.23"
+country-converter = "^0.7.4"

 [tool.poetry.dev-dependencies]
-pytest = "^7.1.3"
-flake8 = "^5.0.4"
+pytest = "^6.1.1"
+flake8 = "^4.0.1"
 pytest-clarity = "^1.0.1"
-black = "^22.8.0"
-isort = "^5.10.1"
-csvkit = "^1.0.7"
+black = "^22.1.0"
+isort = "^5.5.4"
+csvkit = "^1.0.5"

 [build-system]
 requires = ["poetry>=0.12"]
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -3,6 +3,7 @@ agate-excel==0.2.5 ; python_version >= "3.8" and python_version < "4.0"
 agate-sql==0.5.8 ; python_version >= "3.8" and python_version < "4.0"
 agate==1.6.3 ; python_version >= "3.8" and python_version < "4.0"
 appdirs==1.4.4 ; python_version >= "3.8" and python_version < "4.0"
+atomicwrites==1.4.1 ; python_version >= "3.8" and python_version < "4.0" and sys_platform == "win32"
 attrs==22.1.0 ; python_version >= "3.8" and python_version < "4.0"
 babel==2.10.3 ; python_version >= "3.8" and python_version < "4.0"
 black==22.8.0 ; python_version >= "3.8" and python_version < "4.0"
@ -17,17 +18,17 @@ csvkit==1.0.7 ; python_version >= "3.8" and python_version < "4.0"
 dbfread==2.0.7 ; python_version >= "3.8" and python_version < "4.0"
 et-xmlfile==1.1.0 ; python_version >= "3.8" and python_version < "4.0"
 exceptiongroup==1.0.0rc9 ; python_version >= "3.8" and python_version <= "3.10"
-flake8==5.0.4 ; python_version >= "3.8" and python_version < "4.0"
+flake8==4.0.1 ; python_version >= "3.8" and python_version < "4.0"
 ftfy==6.1.1 ; python_version >= "3.8" and python_version < "4"
 future==0.18.2 ; python_version >= "3.8" and python_version < "4.0"
-greenlet==1.1.3 ; python_version >= "3.8" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version < "4.0"
+greenlet==1.1.3 ; python_version >= "3.8" and python_version < "4.0"
 idna==3.3 ; python_version >= "3.8" and python_version < "4"
 iniconfig==1.1.1 ; python_version >= "3.8" and python_version < "4.0"
 isodate==0.6.1 ; python_version >= "3.8" and python_version < "4.0"
 isort==5.10.1 ; python_version >= "3.8" and python_version < "4.0"
 langid==1.1.6 ; python_version >= "3.8" and python_version < "4.0"
 leather==0.3.4 ; python_version >= "3.8" and python_version < "4.0"
-mccabe==0.7.0 ; python_version >= "3.8" and python_version < "4.0"
+mccabe==0.6.1 ; python_version >= "3.8" and python_version < "4.0"
 mypy-extensions==0.4.3 ; python_version >= "3.8" and python_version < "4.0"
 numpy==1.23.2 ; python_version < "4.0" and python_version >= "3.8"
 olefile==0.46 ; python_version >= "3.8" and python_version < "4.0"
@ -35,18 +36,18 @@ openpyxl==3.0.10 ; python_version >= "3.8" and python_version < "4.0"
 packaging==21.3 ; python_version >= "3.8" and python_version < "4.0"
 pandas==1.4.4 ; python_version >= "3.8" and python_version < "4.0"
 parsedatetime==2.4 ; python_version >= "3.8" and python_version < "4.0"
-pathspec==0.10.1 ; python_version >= "3.8" and python_version < "4.0"
+pathspec==0.10.0 ; python_version >= "3.8" and python_version < "4.0"
 platformdirs==2.5.2 ; python_version >= "3.8" and python_version < "4.0"
 pluggy==1.0.0 ; python_version >= "3.8" and python_version < "4.0"
 pprintpp==0.4.0 ; python_version >= "3.8" and python_version < "4.0"
 py==1.11.0 ; python_version >= "3.8" and python_version < "4.0"
-pycodestyle==2.9.1 ; python_version >= "3.8" and python_version < "4.0"
+pycodestyle==2.8.0 ; python_version >= "3.8" and python_version < "4.0"
 pycountry==22.3.5 ; python_version >= "3.8" and python_version < "4"
-pyflakes==2.5.0 ; python_version >= "3.8" and python_version < "4.0"
+pyflakes==2.4.0 ; python_version >= "3.8" and python_version < "4.0"
 pygments==2.13.0 ; python_version >= "3.8" and python_version < "4.0"
 pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "4.0"
 pytest-clarity==1.0.1 ; python_version >= "3.8" and python_version < "4.0"
-pytest==7.1.3 ; python_version >= "3.8" and python_version < "4.0"
+pytest==6.2.5 ; python_version >= "3.8" and python_version < "4.0"
 python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "4.0"
 python-slugify==6.1.2 ; python_version >= "3.8" and python_version < "4.0"
 python-stdnum==1.17 ; python_version >= "3.8" and python_version < "4.0"
@ -58,11 +59,12 @@ rich==12.5.1 ; python_version >= "3.8" and python_version < "4.0"
 setuptools==65.3.0 ; python_version >= "3.8" and python_version < "4"
 six==1.16.0 ; python_version >= "3.8" and python_version < "4.0"
 spdx-license-list==0.5.2 ; python_version >= "3.8" and python_version < "4.0"
-sqlalchemy==1.4.40 ; python_version >= "3.8" and python_version < "4.0"
+sqlalchemy==1.4.22 ; python_version >= "3.8" and python_version < "4.0"
 text-unidecode==1.3 ; python_version >= "3.8" and python_version < "4.0"
-tomli==2.0.1 ; python_version >= "3.8" and python_version < "4.0"
+toml==0.10.2 ; python_version >= "3.8" and python_version < "4.0"
+tomli==2.0.1 ; python_version >= "3.8" and python_full_version < "3.11.0a7"
 typing-extensions==4.3.0 ; python_version >= "3.8" and python_version < "3.10"
 url-normalize==1.4.3 ; python_version >= "3.8" and python_version < "4.0"
 urllib3==1.26.12 ; python_version >= "3.8" and python_version < "4"
 wcwidth==0.2.5 ; python_version >= "3.8" and python_version < "4"
-xlrd==2.0.1 ; python_version >= "3.8" and python_version < "4.0"
+xlrd==1.2.0 ; python_version >= "3.8" and python_version < "4.0"
--- a/requirements.txt
+++ b/requirements.txt
@ -7,6 +7,7 @@ colorama==0.4.5 ; python_version >= "3.8" and python_version < "4.0"
 country-converter==0.7.7 ; python_version >= "3.8" and python_version < "4.0"
 exceptiongroup==1.0.0rc9 ; python_version >= "3.8" and python_version <= "3.10"
 ftfy==6.1.1 ; python_version >= "3.8" and python_version < "4"
+greenlet==1.1.3 ; python_version >= "3.8" and python_version < "4.0"
 idna==3.3 ; python_version >= "3.8" and python_version < "4"
 langid==1.1.6 ; python_version >= "3.8" and python_version < "4.0"
 numpy==1.23.2 ; python_version < "4.0" and python_version >= "3.8"
@ -20,6 +21,8 @@ requests==2.28.1 ; python_version >= "3.8" and python_version < "4"
 setuptools==65.3.0 ; python_version >= "3.8" and python_version < "4"
 six==1.16.0 ; python_version >= "3.8" and python_version < "4.0"
 spdx-license-list==0.5.2 ; python_version >= "3.8" and python_version < "4.0"
+sqlalchemy==1.4.22 ; python_version >= "3.8" and python_version < "4.0"
 url-normalize==1.4.3 ; python_version >= "3.8" and python_version < "4.0"
 urllib3==1.26.12 ; python_version >= "3.8" and python_version < "4"
 wcwidth==0.2.5 ; python_version >= "3.8" and python_version < "4"
+xlrd==1.2.0 ; python_version >= "3.8" and python_version < "4.0"
--- a/setup.py
+++ b/setup.py
@ -14,7 +14,7 @@ install_requires = [

 setuptools.setup(
    name="csv-metadata-quality",
-    version="0.6.0",
+    version="0.6.0-dev",
    author="Alan Orth",
    author_email="aorth@mjanja.ch",
    description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
@ -23,6 +23,7 @@ setuptools.setup(
    long_description_content_type="text/markdown",
    url="https://github.com/alanorth/csv-metadata-quality",
    classifiers=[
+        "Programming Language :: Python :: 3.7",
        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
--- a/tests/test_check.py
+++ b/tests/test_check.py
@ -257,13 +257,12 @@ def test_check_incorrect_iso_639_1_language(capsys):

    title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
    language = "es"
-    exclude = list()

    # Create a dictionary to mimic Pandas series
    row = {"dc.title": title, "dc.language.iso": language}
    series = pd.Series(row)

-    experimental.correct_language(series, exclude)
+    experimental.correct_language(series)

    captured = capsys.readouterr()
    assert (
@ -277,13 +276,12 @@ def test_check_incorrect_iso_639_3_language(capsys):

    title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
    language = "spa"
-    exclude = list()

    # Create a dictionary to mimic Pandas series
    row = {"dc.title": title, "dc.language.iso": language}
    series = pd.Series(row)

-    experimental.correct_language(series, exclude)
+    experimental.correct_language(series)

    captured = capsys.readouterr()
    assert (
@ -297,13 +295,12 @@ def test_check_correct_iso_639_1_language():

    title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
    language = "en"
-    exclude = list()

    # Create a dictionary to mimic Pandas series
    row = {"dc.title": title, "dc.language.iso": language}
    series = pd.Series(row)

-    result = experimental.correct_language(series, exclude)
+    result = experimental.correct_language(series)

    assert result == None

@ -313,13 +310,12 @@ def test_check_correct_iso_639_3_language():

    title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle"
    language = "eng"
-    exclude = list()

    # Create a dictionary to mimic Pandas series
    row = {"dc.title": title, "dc.language.iso": language}
    series = pd.Series(row)

-    result = experimental.correct_language(series, exclude)
+    result = experimental.correct_language(series)

    assert result == None

@ -407,9 +403,8 @@ def test_check_doi_field():
    # the citation and a DOI field.
    d = {"cg.identifier.doi": doi, "dcterms.bibliographicCitation": citation}
    series = pd.Series(data=d)
-    exclude = list()

-    result = check.citation_doi(series, exclude)
+    result = check.citation_doi(series)

    assert result == None

@ -418,14 +413,13 @@ def test_check_doi_only_in_citation(capsys):
    """Test an item with a DOI in its citation, but no DOI field."""

    citation = "Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218"
-    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series), with
    # an empty DOI field and a citation containing a DOI.
    d = {"cg.identifier.doi": None, "dcterms.bibliographicCitation": citation}
    series = pd.Series(data=d)

-    check.citation_doi(series, exclude)
+    check.citation_doi(series)

    captured = capsys.readouterr()
    assert (
@ -439,14 +433,13 @@ def test_title_in_citation():

    title = "Testing all the things"
    citation = "Orth, A. 2021. Testing all the things."
-    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series), with
    # the title and citation.
    d = {"dc.title": title, "dcterms.bibliographicCitation": citation}
    series = pd.Series(data=d)

-    result = check.title_in_citation(series, exclude)
+    result = check.title_in_citation(series)

    assert result == None

@ -456,14 +449,13 @@ def test_title_not_in_citation(capsys):

    title = "Testing all the things"
    citation = "Orth, A. 2021. Testing all teh things."
-    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series), with
    # the title and citation.
    d = {"dc.title": title, "dcterms.bibliographicCitation": citation}
    series = pd.Series(data=d)

-    check.title_in_citation(series, exclude)
+    check.title_in_citation(series)

    captured = capsys.readouterr()
    assert (
@ -477,13 +469,12 @@ def test_country_matches_region():

    country = "Kenya"
    region = "Eastern Africa"
-    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series)
    d = {"cg.coverage.country": country, "cg.coverage.region": region}
    series = pd.Series(data=d)

-    result = check.countries_match_regions(series, exclude)
+    result = check.countries_match_regions(series)

    assert result == None

@ -495,7 +486,6 @@ def test_country_not_matching_region(capsys):
    country = "Kenya"
    region = ""
    missing_region = "Eastern Africa"
-    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series)
    d = {
@ -505,7 +495,7 @@ def test_country_not_matching_region(capsys):
    }
    series = pd.Series(data=d)

-    check.countries_match_regions(series, exclude)
+    check.countries_match_regions(series)

    captured = capsys.readouterr()
    assert (
--- a/tests/test_fix.py
+++ b/tests/test_fix.py
@ -131,7 +131,6 @@ def test_fix_country_not_matching_region():
    country = "Kenya"
    region = ""
    missing_region = "Eastern Africa"
-    exclude = list()

    # Emulate a column in a transposed dataframe (which is just a series)
    d = {
@ -141,7 +140,7 @@ def test_fix_country_not_matching_region():
    }
    series = pd.Series(data=d)

-    result = fix.countries_match_regions(series, exclude)
+    result = fix.countries_match_regions(series)

    # Emulate the correct series we are expecting
    d_correct = {