1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-28 00:28:18 +01:00

Compare commits

..

No commits in common. "25ac290df45f4e139a70973f1013d2b5aa182ccc" and "17d089cc6eae0b050e2e5011b079a0626af6c0dd" have entirely different histories.

4 changed files with 78 additions and 23 deletions

View File

@ -16,30 +16,36 @@ jobs:
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install poetry - name: Set up Python 3.11
run: pipx install poetry uses: actions/setup-python@v4
- uses: actions/setup-python@v4
with: with:
python-version: '3.11' python-version: '3.11'
cache: 'poetry' cache: 'pip'
- run: poetry install - name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
- name: Lint with flake8 - name: Lint with flake8
run: | run: |
# stop the build if there are Python syntax errors or undefined names # stop the build if there are Python syntax errors or undefined names
poetry run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest - name: Test with pytest
run: poetry run pytest run: |
pytest
- name: Test CLI - name: Test CLI
run: | run: |
python setup.py install
# Basic test # Basic test
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv csv-metadata-quality -i data/test.csv -o /tmp/test.csv
# Test with unsafe fixes # Test with unsafe fixes
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
# Test with experimental checks # Test with experimental checks
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
# Test with AGROVOC validation # Test with AGROVOC validation
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
# Test with AGROVOC validation (and dropping invalid) # Test with AGROVOC validation (and dropping invalid)
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d

29
poetry.lock generated
View File

@ -79,6 +79,17 @@ sqlalchemy = "<2"
docs = ["Sphinx (>=1.2.2)", "sphinx-rtd-theme (>=0.1.6)"] docs = ["Sphinx (>=1.2.2)", "sphinx-rtd-theme (>=0.1.6)"]
test = ["crate", "geojson", "pytest", "pytest-cov"] test = ["crate", "geojson", "pytest", "pytest-cov"]
[[package]]
name = "appdirs"
version = "1.4.4"
description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
optional = false
python-versions = "*"
files = [
{file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"},
{file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"},
]
[[package]] [[package]]
name = "appnope" name = "appnope"
version = "0.1.3" version = "0.1.3"
@ -1416,29 +1427,29 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]] [[package]]
name = "requests-cache" name = "requests-cache"
version = "1.0.1" version = "0.9.8"
description = "A persistent cache for python requests" description = "A transparent persistent cache for the requests library"
optional = false optional = false
python-versions = ">=3.7,<4.0" python-versions = ">=3.7,<4.0"
files = [ files = [
{file = "requests_cache-1.0.1-py3-none-any.whl", hash = "sha256:55c5765c26fd98a38c633d6e3931a507b7708cdd07c0afb48773d0718ac15969"}, {file = "requests_cache-0.9.8-py3-none-any.whl", hash = "sha256:3a16021a4b5014b5b32af9c34f07cb911e99a69074d664dfd4fddb62a2997c21"},
{file = "requests_cache-1.0.1.tar.gz", hash = "sha256:d42e6c2f11de54e6a134c9a00c5ca2a3c8edde3c3f2bdfd942586fafa8990e14"}, {file = "requests_cache-0.9.8.tar.gz", hash = "sha256:eaed4eb5fd5c392ba5e7cfa000d4ab96b1d32c1a1620f37aa558c43741ac362b"},
] ]
[package.dependencies] [package.dependencies]
appdirs = ">=1.4.4"
attrs = ">=21.2" attrs = ">=21.2"
cattrs = ">=22.2" cattrs = ">=22.2"
platformdirs = ">=2.5"
requests = ">=2.22" requests = ">=2.22"
url-normalize = ">=1.4" url-normalize = ">=1.4"
urllib3 = ">=1.25.5" urllib3 = ">=1.25.5"
[package.extras] [package.extras]
all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=5.4)", "redis (>=3)", "ujson (>=5.4)"] all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=5.4)", "redis (>=3)", "ujson (>=4.0)"]
bson = ["bson (>=0.5)"] bson = ["bson (>=0.5)"]
docs = ["furo (>=2022.12.7,<2023.0.0)", "linkify-it-py (>=2.0,<3.0)", "myst-parser (>=1.0,<2.0)", "sphinx (>=5.0.2,<6.0.0)", "sphinx-autodoc-typehints (>=1.19)", "sphinx-automodapi (>=0.14)", "sphinx-copybutton (>=0.5)", "sphinx-design (>=0.2)", "sphinx-notfound-page (>=0.8)", "sphinxcontrib-apidoc (>=0.3)", "sphinxext-opengraph (>=0.6)"] docs = ["furo (>=2021.9.8)", "linkify-it-py (>=1.0.1,<2.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx (==4.3.0)", "sphinx-autodoc-typehints (>=1.11,<2.0)", "sphinx-automodapi (>=0.13,<0.15)", "sphinx-copybutton (>=0.3,<0.5)", "sphinx-inline-tabs (>=2022.1.2b11)", "sphinx-notfound-page (>=0.8)", "sphinx-panels (>=0.6,<0.7)", "sphinxcontrib-apidoc (>=0.3,<0.4)"]
dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"] dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"]
json = ["ujson (>=5.4)"] json = ["ujson (>=4.0)"]
mongodb = ["pymongo (>=3)"] mongodb = ["pymongo (>=3)"]
redis = ["redis (>=3)"] redis = ["redis (>=3)"]
security = ["itsdangerous (>=2.0)"] security = ["itsdangerous (>=2.0)"]
@ -1686,4 +1697,4 @@ test = ["pytest", "pytest-cov"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.9" python-versions = "^3.9"
content-hash = "eaaca6180d161500925dc49b864311cc90d1394d05f379e272679d672ba96aab" content-hash = "9b1d8847f81f5a935d7a12a43cbf462173ddd6ebe64764b538a5f7ab53cce58f"

View File

@ -15,7 +15,7 @@ python = "^3.9"
pandas = {version = "^2.0.2", extras = ["feather", "performance"]} pandas = {version = "^2.0.2", extras = ["feather", "performance"]}
python-stdnum = "^1.18" python-stdnum = "^1.18"
requests = "^2.28.2" requests = "^2.28.2"
requests-cache = "^1.0.0" requests-cache = "^0.9.8"
langid = "^1.1.6" langid = "^1.1.6"
colorama = "^0.4.6" colorama = "^0.4.6"
ftfy = "^6.1.1" ftfy = "^6.1.1"

38
setup.py Normal file
View File

@ -0,0 +1,38 @@
import setuptools
with open("README.md", "r") as fh:
long_description = fh.read()
install_requires = [
"pandas",
"python-stdnum",
"requests",
"requests-cache",
"pycountry",
"langid",
]
setuptools.setup(
name="csv-metadata-quality",
version="0.6.1",
author="Alan Orth",
author_email="aorth@mjanja.ch",
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
license="GPLv3",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/alanorth/csv-metadata-quality",
classifiers=[
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
],
packages=["csv_metadata_quality"],
entry_points={
"console_scripts": ["csv-metadata-quality = csv_metadata_quality.__main__:main"]
},
include_package_data=True,
install_requires=install_requires,
)