mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-28 00:28:18 +01:00
Compare commits
4 Commits
17d089cc6e
...
25ac290df4
Author | SHA1 | Date | |
---|---|---|---|
25ac290df4 | |||
3f52bad1e3 | |||
0208ad0ade | |||
|
3632ae0fc9 |
32
.github/workflows/python-app.yml
vendored
32
.github/workflows/python-app.yml
vendored
@ -16,36 +16,30 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v4
|
||||
- name: Install poetry
|
||||
run: pipx install poetry
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install flake8 pytest
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
|
||||
cache: 'poetry'
|
||||
- run: poetry install
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||
poetry run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
pytest
|
||||
run: poetry run pytest
|
||||
- name: Test CLI
|
||||
run: |
|
||||
python setup.py install
|
||||
# Basic test
|
||||
csv-metadata-quality -i data/test.csv -o /tmp/test.csv
|
||||
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv
|
||||
# Test with unsafe fixes
|
||||
csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
|
||||
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
|
||||
# Test with experimental checks
|
||||
csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
|
||||
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
|
||||
# Test with AGROVOC validation
|
||||
csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
|
||||
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
|
||||
# Test with AGROVOC validation (and dropping invalid)
|
||||
csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
|
||||
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
|
||||
|
29
poetry.lock
generated
29
poetry.lock
generated
@ -79,17 +79,6 @@ sqlalchemy = "<2"
|
||||
docs = ["Sphinx (>=1.2.2)", "sphinx-rtd-theme (>=0.1.6)"]
|
||||
test = ["crate", "geojson", "pytest", "pytest-cov"]
|
||||
|
||||
[[package]]
|
||||
name = "appdirs"
|
||||
version = "1.4.4"
|
||||
description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"},
|
||||
{file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "appnope"
|
||||
version = "0.1.3"
|
||||
@ -1427,29 +1416,29 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
||||
|
||||
[[package]]
|
||||
name = "requests-cache"
|
||||
version = "0.9.8"
|
||||
description = "A transparent persistent cache for the requests library"
|
||||
version = "1.0.1"
|
||||
description = "A persistent cache for python requests"
|
||||
optional = false
|
||||
python-versions = ">=3.7,<4.0"
|
||||
files = [
|
||||
{file = "requests_cache-0.9.8-py3-none-any.whl", hash = "sha256:3a16021a4b5014b5b32af9c34f07cb911e99a69074d664dfd4fddb62a2997c21"},
|
||||
{file = "requests_cache-0.9.8.tar.gz", hash = "sha256:eaed4eb5fd5c392ba5e7cfa000d4ab96b1d32c1a1620f37aa558c43741ac362b"},
|
||||
{file = "requests_cache-1.0.1-py3-none-any.whl", hash = "sha256:55c5765c26fd98a38c633d6e3931a507b7708cdd07c0afb48773d0718ac15969"},
|
||||
{file = "requests_cache-1.0.1.tar.gz", hash = "sha256:d42e6c2f11de54e6a134c9a00c5ca2a3c8edde3c3f2bdfd942586fafa8990e14"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
appdirs = ">=1.4.4"
|
||||
attrs = ">=21.2"
|
||||
cattrs = ">=22.2"
|
||||
platformdirs = ">=2.5"
|
||||
requests = ">=2.22"
|
||||
url-normalize = ">=1.4"
|
||||
urllib3 = ">=1.25.5"
|
||||
|
||||
[package.extras]
|
||||
all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=5.4)", "redis (>=3)", "ujson (>=4.0)"]
|
||||
all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=5.4)", "redis (>=3)", "ujson (>=5.4)"]
|
||||
bson = ["bson (>=0.5)"]
|
||||
docs = ["furo (>=2021.9.8)", "linkify-it-py (>=1.0.1,<2.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx (==4.3.0)", "sphinx-autodoc-typehints (>=1.11,<2.0)", "sphinx-automodapi (>=0.13,<0.15)", "sphinx-copybutton (>=0.3,<0.5)", "sphinx-inline-tabs (>=2022.1.2b11)", "sphinx-notfound-page (>=0.8)", "sphinx-panels (>=0.6,<0.7)", "sphinxcontrib-apidoc (>=0.3,<0.4)"]
|
||||
docs = ["furo (>=2022.12.7,<2023.0.0)", "linkify-it-py (>=2.0,<3.0)", "myst-parser (>=1.0,<2.0)", "sphinx (>=5.0.2,<6.0.0)", "sphinx-autodoc-typehints (>=1.19)", "sphinx-automodapi (>=0.14)", "sphinx-copybutton (>=0.5)", "sphinx-design (>=0.2)", "sphinx-notfound-page (>=0.8)", "sphinxcontrib-apidoc (>=0.3)", "sphinxext-opengraph (>=0.6)"]
|
||||
dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"]
|
||||
json = ["ujson (>=4.0)"]
|
||||
json = ["ujson (>=5.4)"]
|
||||
mongodb = ["pymongo (>=3)"]
|
||||
redis = ["redis (>=3)"]
|
||||
security = ["itsdangerous (>=2.0)"]
|
||||
@ -1697,4 +1686,4 @@ test = ["pytest", "pytest-cov"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "9b1d8847f81f5a935d7a12a43cbf462173ddd6ebe64764b538a5f7ab53cce58f"
|
||||
content-hash = "eaaca6180d161500925dc49b864311cc90d1394d05f379e272679d672ba96aab"
|
||||
|
@ -15,7 +15,7 @@ python = "^3.9"
|
||||
pandas = {version = "^2.0.2", extras = ["feather", "performance"]}
|
||||
python-stdnum = "^1.18"
|
||||
requests = "^2.28.2"
|
||||
requests-cache = "^0.9.8"
|
||||
requests-cache = "^1.0.0"
|
||||
langid = "^1.1.6"
|
||||
colorama = "^0.4.6"
|
||||
ftfy = "^6.1.1"
|
||||
|
38
setup.py
38
setup.py
@ -1,38 +0,0 @@
|
||||
import setuptools
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
install_requires = [
|
||||
"pandas",
|
||||
"python-stdnum",
|
||||
"requests",
|
||||
"requests-cache",
|
||||
"pycountry",
|
||||
"langid",
|
||||
]
|
||||
|
||||
setuptools.setup(
|
||||
name="csv-metadata-quality",
|
||||
version="0.6.1",
|
||||
author="Alan Orth",
|
||||
author_email="aorth@mjanja.ch",
|
||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
||||
license="GPLv3",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/alanorth/csv-metadata-quality",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
packages=["csv_metadata_quality"],
|
||||
entry_points={
|
||||
"console_scripts": ["csv-metadata-quality = csv_metadata_quality.__main__:main"]
|
||||
},
|
||||
include_package_data=True,
|
||||
install_requires=install_requires,
|
||||
)
|
Loading…
Reference in New Issue
Block a user