mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-05-13 16:37:45 +02:00
Compare commits
77 Commits
v0.6.1
...
b305da3f0b
Author | SHA1 | Date | |
---|---|---|---|
b305da3f0b
|
|||
530cd5863b
|
|||
f6018c51b6
|
|||
80c3f5b45a
|
|||
ba4637ea34 | |||
355428a691 | |||
58d4de973e | |||
e1216dae3c | |||
6b650ff1b3 | |||
fa7bde6fc0 | |||
f89159fe32 | |||
02058c5a65 | |||
8fed6b71ff | |||
b005b28cbe | |||
c626290599 | |||
1a06470b64 | |||
d46a81672e | |||
2a50e75082 | |||
0d45e73983 | |||
3611aab425 | |||
5c4ad0eb41 | |||
f1f39722f6 | |||
1c03999582 | |||
1f637f32cd
|
|||
b8241e919d
|
|||
b8dc19cc3f
|
|||
93c9b739ac
|
|||
4ed2786703
|
|||
8728789183 | |||
bf90464809
|
|||
1878002391 | |||
d21d2621e3 | |||
f3fb1ff7fb | |||
1fa81f7558 | |||
7409193b6b | |||
a84fcf0b7b
|
|||
25ac290df4
|
|||
3f52bad1e3
|
|||
0208ad0ade | |||
3632ae0fc9 | |||
17d089cc6e
|
|||
bc470a4343
|
|||
be609a809d
|
|||
de3387ded7
|
|||
f343e87f0c
|
|||
7d3524fbd5
|
|||
c614b71a52 | |||
d159a839f3 | |||
36e2ebe5f4
|
|||
33f67b7a7c
|
|||
c0e1448439
|
|||
5d0804a08f
|
|||
f01c9edf17
|
|||
8d4295b2b3
|
|||
e2d46e9495
|
|||
1491e1edb0
|
|||
34142c3e6b
|
|||
0c88b96e8d
|
|||
2e55b4d6e3
|
|||
c90aad29f0
|
|||
6fd1e1377f
|
|||
c64b7eb1f1
|
|||
29cbc4f3a3
|
|||
307af1acfc
|
|||
b5106de9df
|
|||
9eeadfc44e
|
|||
d4aed378cf
|
|||
20a2cce34b
|
|||
d661ffe439
|
|||
45a310387a
|
|||
47b03c49ba
|
|||
986b81cbf4
|
|||
d43a47ae32
|
|||
ede37569f1
|
|||
0c53efe60a
|
|||
5f0e25b818
|
|||
4776154d6c
|
70
.drone.yml
70
.drone.yml
@ -1,3 +1,33 @@
|
|||||||
|
---
|
||||||
|
kind: pipeline
|
||||||
|
type: docker
|
||||||
|
name: python311
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: test
|
||||||
|
image: python:3.11-slim
|
||||||
|
commands:
|
||||||
|
- id
|
||||||
|
- python -V
|
||||||
|
- apt update && apt install -y gcc g++ libicu-dev pkg-config git
|
||||||
|
- python -m pip install poetry
|
||||||
|
- poetry install
|
||||||
|
- poetry run pytest
|
||||||
|
# Basic test
|
||||||
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv
|
||||||
|
# Basic test with unsafe fixes
|
||||||
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
|
||||||
|
# Geography test
|
||||||
|
- poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv
|
||||||
|
# Geography test with unsafe fixes
|
||||||
|
- poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u
|
||||||
|
# Test with experimental checks
|
||||||
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
|
||||||
|
# Test with AGROVOC validation
|
||||||
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
|
||||||
|
# Test with AGROVOC validation (and dropping invalid)
|
||||||
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
|
||||||
|
|
||||||
---
|
---
|
||||||
kind: pipeline
|
kind: pipeline
|
||||||
type: docker
|
type: docker
|
||||||
@ -10,23 +40,23 @@ steps:
|
|||||||
- id
|
- id
|
||||||
- python -V
|
- python -V
|
||||||
- apt update && apt install -y gcc g++ libicu-dev pkg-config git
|
- apt update && apt install -y gcc g++ libicu-dev pkg-config git
|
||||||
- pip install -r requirements-dev.txt
|
- python -m pip install poetry
|
||||||
- pytest
|
- poetry install
|
||||||
- python setup.py install
|
- poetry run pytest
|
||||||
# Basic test
|
# Basic test
|
||||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv
|
||||||
# Basic test with unsafe fixes
|
# Basic test with unsafe fixes
|
||||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
|
||||||
# Geography test
|
# Geography test
|
||||||
- csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv
|
- poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv
|
||||||
# Geography test with unsafe fixes
|
# Geography test with unsafe fixes
|
||||||
- csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u
|
- poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u
|
||||||
# Test with experimental checks
|
# Test with experimental checks
|
||||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
|
||||||
# Test with AGROVOC validation
|
# Test with AGROVOC validation
|
||||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
|
||||||
# Test with AGROVOC validation (and dropping invalid)
|
# Test with AGROVOC validation (and dropping invalid)
|
||||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
|
||||||
|
|
||||||
---
|
---
|
||||||
kind: pipeline
|
kind: pipeline
|
||||||
@ -40,22 +70,22 @@ steps:
|
|||||||
- id
|
- id
|
||||||
- python -V
|
- python -V
|
||||||
- apt update && apt install -y gcc g++ libicu-dev pkg-config git
|
- apt update && apt install -y gcc g++ libicu-dev pkg-config git
|
||||||
- pip install -r requirements-dev.txt
|
- python -m pip install poetry
|
||||||
- pytest
|
- poetry install
|
||||||
- python setup.py install
|
- poetry run pytest
|
||||||
# Basic test
|
# Basic test
|
||||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv
|
||||||
# Basic test with unsafe fixes
|
# Basic test with unsafe fixes
|
||||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
|
||||||
# Geography test
|
# Geography test
|
||||||
- csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv
|
- poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv
|
||||||
# Geography test with unsafe fixes
|
# Geography test with unsafe fixes
|
||||||
- csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u
|
- poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u
|
||||||
# Test with experimental checks
|
# Test with experimental checks
|
||||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
|
||||||
# Test with AGROVOC validation
|
# Test with AGROVOC validation
|
||||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
|
||||||
# Test with AGROVOC validation (and dropping invalid)
|
# Test with AGROVOC validation (and dropping invalid)
|
||||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
|
- poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
|
||||||
|
|
||||||
# vim: ts=2 sw=2 et
|
# vim: ts=2 sw=2 et
|
||||||
|
36
.github/workflows/python-app.yml
vendored
36
.github/workflows/python-app.yml
vendored
@ -15,37 +15,31 @@ jobs:
|
|||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python 3.10
|
- name: Install poetry
|
||||||
uses: actions/setup-python@v4
|
run: pipx install poetry
|
||||||
|
- uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: '3.10'
|
python-version: '3.11'
|
||||||
cache: 'pip'
|
cache: 'poetry'
|
||||||
- name: Install dependencies
|
- run: poetry install
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
pip install flake8 pytest
|
|
||||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
|
||||||
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
|
|
||||||
- name: Lint with flake8
|
- name: Lint with flake8
|
||||||
run: |
|
run: |
|
||||||
# stop the build if there are Python syntax errors or undefined names
|
# stop the build if there are Python syntax errors or undefined names
|
||||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
poetry run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||||
- name: Test with pytest
|
- name: Test with pytest
|
||||||
run: |
|
run: poetry run pytest
|
||||||
pytest
|
|
||||||
- name: Test CLI
|
- name: Test CLI
|
||||||
run: |
|
run: |
|
||||||
python setup.py install
|
|
||||||
# Basic test
|
# Basic test
|
||||||
csv-metadata-quality -i data/test.csv -o /tmp/test.csv
|
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv
|
||||||
# Test with unsafe fixes
|
# Test with unsafe fixes
|
||||||
csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
|
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
|
||||||
# Test with experimental checks
|
# Test with experimental checks
|
||||||
csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
|
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
|
||||||
# Test with AGROVOC validation
|
# Test with AGROVOC validation
|
||||||
csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
|
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
|
||||||
# Test with AGROVOC validation (and dropping invalid)
|
# Test with AGROVOC validation (and dropping invalid)
|
||||||
csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
|
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
|
||||||
|
15
CHANGELOG.md
15
CHANGELOG.md
@ -4,6 +4,21 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
### Fixed
|
||||||
|
- Fixed regex so we don't run the invalid multi-value separator fix on
|
||||||
|
`dcterms.bibliographicCitation` fields
|
||||||
|
- Fixed regex so we run the comma space fix on `dcterms.bibliographicCitation`
|
||||||
|
fields
|
||||||
|
- Don't crash the country/region checker/fixer when a title field is missing
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Don't run newline fix on description fields
|
||||||
|
- Install requests-cache in main run() function instead of check.agrovoc() function so we only incur the overhead once
|
||||||
|
|
||||||
|
### Updated
|
||||||
|
- Python dependencies, including Pandas 2.0.0 and [Arrow-backed dtypes](https://datapythonista.me/blog/pandas-20-and-the-arrow-revolution-part-i)
|
||||||
|
|
||||||
## [0.6.1] - 2023-02-23
|
## [0.6.1] - 2023-02-23
|
||||||
### Fixed
|
### Fixed
|
||||||
- Missing region check should ignore subregion field, if it exists
|
- Missing region check should ignore subregion field, if it exists
|
||||||
|
1
MANIFEST.in
Normal file
1
MANIFEST.in
Normal file
@ -0,0 +1 @@
|
|||||||
|
include csv_metadata_quality/data/licenses.json
|
@ -127,7 +127,6 @@ This currently uses the [Python langid](https://github.com/saffsd/langid.py) lib
|
|||||||
- Warn if an author is shorter than 3 characters?
|
- Warn if an author is shorter than 3 characters?
|
||||||
- Validate DOIs? Normalize to https://doi.org format? Or use just the DOI part: 10.1016/j.worlddev.2010.06.006
|
- Validate DOIs? Normalize to https://doi.org format? Or use just the DOI part: 10.1016/j.worlddev.2010.06.006
|
||||||
- Warn if two items use the same file in `filename` column
|
- Warn if two items use the same file in `filename` column
|
||||||
- Add an option to drop invalid AGROVOC subjects?
|
|
||||||
- Add tests for application invocation, ie `tests/test_app.py`?
|
- Add tests for application invocation, ie `tests/test_app.py`?
|
||||||
- Validate ISSNs or journal titles against CrossRef API?
|
- Validate ISSNs or journal titles against CrossRef API?
|
||||||
- Add configurable field validation, like specify a field name and a validation file?
|
- Add configurable field validation, like specify a field name and a validation file?
|
||||||
@ -137,7 +136,7 @@ This currently uses the [Python langid](https://github.com/saffsd/langid.py) lib
|
|||||||
- Warn if item is Open Access, but missing a license
|
- Warn if item is Open Access, but missing a license
|
||||||
- Warn if item has an ISSN but no journal title
|
- Warn if item has an ISSN but no journal title
|
||||||
- Update journal titles from ISSN
|
- Update journal titles from ISSN
|
||||||
- Migrate to https://github.com/spdx/license-list-data
|
- Migrate from Pandas to Polars
|
||||||
|
|
||||||
## License
|
## License
|
||||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
# SPDX-License-Identifier: GPL-3.0-only
|
# SPDX-License-Identifier: GPL-3.0-only
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import requests_cache
|
||||||
from colorama import Fore
|
from colorama import Fore
|
||||||
|
|
||||||
import csv_metadata_quality.check as check
|
import csv_metadata_quality.check as check
|
||||||
@ -74,7 +77,7 @@ def run(argv):
|
|||||||
signal.signal(signal.SIGINT, signal_handler)
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
|
||||||
# Read all fields as strings so dates don't get converted from 1998 to 1998.0
|
# Read all fields as strings so dates don't get converted from 1998 to 1998.0
|
||||||
df = pd.read_csv(args.input_file, dtype=str)
|
df = pd.read_csv(args.input_file, dtype_backend="pyarrow", dtype="str")
|
||||||
|
|
||||||
# Check if the user requested to skip any fields
|
# Check if the user requested to skip any fields
|
||||||
if args.exclude_fields:
|
if args.exclude_fields:
|
||||||
@ -82,7 +85,20 @@ def run(argv):
|
|||||||
# user should be careful to no include spaces here.
|
# user should be careful to no include spaces here.
|
||||||
exclude = args.exclude_fields.split(",")
|
exclude = args.exclude_fields.split(",")
|
||||||
else:
|
else:
|
||||||
exclude = list()
|
exclude = []
|
||||||
|
|
||||||
|
# enable transparent request cache with thirty days expiry
|
||||||
|
expire_after = timedelta(days=30)
|
||||||
|
# Allow overriding the location of the requests cache, just in case we are
|
||||||
|
# running in an environment where we can't write to the current working di-
|
||||||
|
# rectory (for example from csv-metadata-quality-web).
|
||||||
|
REQUESTS_CACHE_DIR = os.environ.get("REQUESTS_CACHE_DIR", ".")
|
||||||
|
requests_cache.install_cache(
|
||||||
|
f"{REQUESTS_CACHE_DIR}/agrovoc-response-cache", expire_after=expire_after
|
||||||
|
)
|
||||||
|
|
||||||
|
# prune old cache entries
|
||||||
|
requests_cache.delete()
|
||||||
|
|
||||||
for column in df.columns:
|
for column in df.columns:
|
||||||
if column in exclude:
|
if column in exclude:
|
||||||
@ -91,7 +107,9 @@ def run(argv):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if args.unsafe_fixes:
|
if args.unsafe_fixes:
|
||||||
match = re.match(r"^.*?abstract.*$", column)
|
# Skip whitespace and newline fixes on abstracts and descriptions
|
||||||
|
# because there are too many with legitimate multi-line metadata.
|
||||||
|
match = re.match(r"^.*?(abstract|description).*$", column)
|
||||||
if match is None:
|
if match is None:
|
||||||
# Fix: whitespace
|
# Fix: whitespace
|
||||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||||
@ -102,7 +120,7 @@ def run(argv):
|
|||||||
# Fix: missing space after comma. Only run on author and citation
|
# Fix: missing space after comma. Only run on author and citation
|
||||||
# fields for now, as this problem is mostly an issue in names.
|
# fields for now, as this problem is mostly an issue in names.
|
||||||
if args.unsafe_fixes:
|
if args.unsafe_fixes:
|
||||||
match = re.match(r"^.*?(author|citation).*$", column)
|
match = re.match(r"^.*?(author|[Cc]itation).*$", column)
|
||||||
if match is not None:
|
if match is not None:
|
||||||
df[column] = df[column].apply(fix.comma_space, field_name=column)
|
df[column] = df[column].apply(fix.comma_space, field_name=column)
|
||||||
|
|
||||||
@ -126,7 +144,7 @@ def run(argv):
|
|||||||
# Fix: invalid and unnecessary multi-value separators. Skip the title
|
# Fix: invalid and unnecessary multi-value separators. Skip the title
|
||||||
# and abstract fields because "|" is used to indicate something like
|
# and abstract fields because "|" is used to indicate something like
|
||||||
# a subtitle.
|
# a subtitle.
|
||||||
match = re.match(r"^.*?(abstract|title).*$", column)
|
match = re.match(r"^.*?(abstract|[Cc]itation|title).*$", column)
|
||||||
if match is None:
|
if match is None:
|
||||||
df[column] = df[column].apply(fix.separators, field_name=column)
|
df[column] = df[column].apply(fix.separators, field_name=column)
|
||||||
# Run whitespace fix again after fixing invalid separators
|
# Run whitespace fix again after fixing invalid separators
|
||||||
|
@ -1,14 +1,12 @@
|
|||||||
# SPDX-License-Identifier: GPL-3.0-only
|
# SPDX-License-Identifier: GPL-3.0-only
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
import country_converter as coco
|
import country_converter as coco
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import requests
|
import requests
|
||||||
import requests_cache
|
|
||||||
from colorama import Fore
|
from colorama import Fore
|
||||||
from pycountry import languages
|
from pycountry import languages
|
||||||
from stdnum import isbn as stdnum_isbn
|
from stdnum import isbn as stdnum_isbn
|
||||||
@ -203,25 +201,12 @@ def agrovoc(field, field_name, drop):
|
|||||||
if pd.isna(field):
|
if pd.isna(field):
|
||||||
return
|
return
|
||||||
|
|
||||||
# enable transparent request cache with thirty days expiry
|
|
||||||
expire_after = timedelta(days=30)
|
|
||||||
# Allow overriding the location of the requests cache, just in case we are
|
|
||||||
# running in an environment where we can't write to the current working di-
|
|
||||||
# rectory (for example from csv-metadata-quality-web).
|
|
||||||
REQUESTS_CACHE_DIR = os.environ.get("REQUESTS_CACHE_DIR", ".")
|
|
||||||
requests_cache.install_cache(
|
|
||||||
f"{REQUESTS_CACHE_DIR}/agrovoc-response-cache", expire_after=expire_after
|
|
||||||
)
|
|
||||||
|
|
||||||
# prune old cache entries
|
|
||||||
# requests_cache.remove_expired_responses()
|
|
||||||
|
|
||||||
# Initialize an empty list to hold the validated AGROVOC values
|
# Initialize an empty list to hold the validated AGROVOC values
|
||||||
values = list()
|
values = []
|
||||||
|
|
||||||
# Try to split multi-value field on "||" separator
|
# Try to split multi-value field on "||" separator
|
||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
request_url = "http://agrovoc.uniroma2.it/agrovoc/rest/v1/agrovoc/search"
|
request_url = "https://agrovoc.uniroma2.it/agrovoc/rest/v1/agrovoc/search"
|
||||||
request_params = {"query": value}
|
request_params = {"query": value}
|
||||||
|
|
||||||
request = requests.get(request_url, params=request_params)
|
request = requests.get(request_url, params=request_params)
|
||||||
@ -373,7 +358,7 @@ def duplicate_items(df):
|
|||||||
|
|
||||||
if items_count_unique < items_count_total:
|
if items_count_unique < items_count_total:
|
||||||
# Create a list to hold our items while we check for duplicates
|
# Create a list to hold our items while we check for duplicates
|
||||||
items = list()
|
items = []
|
||||||
|
|
||||||
for index, row in df.iterrows():
|
for index, row in df.iterrows():
|
||||||
item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}"
|
item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}"
|
||||||
@ -554,7 +539,7 @@ def countries_match_regions(row, exclude):
|
|||||||
if row[region_column_name] is not None:
|
if row[region_column_name] is not None:
|
||||||
regions = row[region_column_name].split("||")
|
regions = row[region_column_name].split("||")
|
||||||
else:
|
else:
|
||||||
regions = list()
|
regions = []
|
||||||
|
|
||||||
for country in countries:
|
for country in countries:
|
||||||
# Look up the UN M.49 regions for this country code. CoCo seems to
|
# Look up the UN M.49 regions for this country code. CoCo seems to
|
||||||
@ -563,8 +548,13 @@ def countries_match_regions(row, exclude):
|
|||||||
un_region = cc.convert(names=country, to="UNRegion")
|
un_region = cc.convert(names=country, to="UNRegion")
|
||||||
|
|
||||||
if un_region != "not found" and un_region not in regions:
|
if un_region != "not found" and un_region not in regions:
|
||||||
print(
|
try:
|
||||||
f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}{row[title_column_name]}"
|
print(
|
||||||
)
|
f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}{row[title_column_name]}"
|
||||||
|
)
|
||||||
|
except KeyError:
|
||||||
|
print(
|
||||||
|
f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}<title field not present>"
|
||||||
|
)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
@ -20,7 +20,7 @@ def correct_language(row, exclude):
|
|||||||
# Initialize some variables at global scope so that we can set them in the
|
# Initialize some variables at global scope so that we can set them in the
|
||||||
# loop scope below and still be able to access them afterwards.
|
# loop scope below and still be able to access them afterwards.
|
||||||
language = ""
|
language = ""
|
||||||
sample_strings = list()
|
sample_strings = []
|
||||||
title = None
|
title = None
|
||||||
|
|
||||||
# Iterate over the labels of the current row's values. Before we transposed
|
# Iterate over the labels of the current row's values. Before we transposed
|
||||||
|
@ -23,7 +23,7 @@ def whitespace(field, field_name):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Initialize an empty list to hold the cleaned values
|
# Initialize an empty list to hold the cleaned values
|
||||||
values = list()
|
values = []
|
||||||
|
|
||||||
# Try to split multi-value field on "||" separator
|
# Try to split multi-value field on "||" separator
|
||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
@ -64,7 +64,7 @@ def separators(field, field_name):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Initialize an empty list to hold the cleaned values
|
# Initialize an empty list to hold the cleaned values
|
||||||
values = list()
|
values = []
|
||||||
|
|
||||||
# Try to split multi-value field on "||" separator
|
# Try to split multi-value field on "||" separator
|
||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
@ -175,7 +175,7 @@ def duplicates(field, field_name):
|
|||||||
values = field.split("||")
|
values = field.split("||")
|
||||||
|
|
||||||
# Initialize an empty list to hold the de-duplicated values
|
# Initialize an empty list to hold the de-duplicated values
|
||||||
new_values = list()
|
new_values = []
|
||||||
|
|
||||||
# Iterate over all values
|
# Iterate over all values
|
||||||
for value in values:
|
for value in values:
|
||||||
@ -355,10 +355,10 @@ def countries_match_regions(row, exclude):
|
|||||||
if row[region_column_name] is not None:
|
if row[region_column_name] is not None:
|
||||||
regions = row[region_column_name].split("||")
|
regions = row[region_column_name].split("||")
|
||||||
else:
|
else:
|
||||||
regions = list()
|
regions = []
|
||||||
|
|
||||||
# An empty list for our regions so we can keep track for all countries
|
# An empty list for our regions so we can keep track for all countries
|
||||||
missing_regions = list()
|
missing_regions = []
|
||||||
|
|
||||||
for country in countries:
|
for country in countries:
|
||||||
# Look up the UN M.49 regions for this country code. CoCo seems to
|
# Look up the UN M.49 regions for this country code. CoCo seems to
|
||||||
@ -370,9 +370,17 @@ def countries_match_regions(row, exclude):
|
|||||||
# it doesn't already exist in regions.
|
# it doesn't already exist in regions.
|
||||||
if un_region != "not found" and un_region not in regions:
|
if un_region != "not found" and un_region not in regions:
|
||||||
if un_region not in missing_regions:
|
if un_region not in missing_regions:
|
||||||
print(
|
try:
|
||||||
f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}{row[title_column_name]}"
|
print(
|
||||||
)
|
f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}{row[title_column_name]}"
|
||||||
|
)
|
||||||
|
except KeyError:
|
||||||
|
# If there is no title column in the CSV we will print
|
||||||
|
# the fix without the title instead of crashing.
|
||||||
|
print(
|
||||||
|
f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}<title field not present>"
|
||||||
|
)
|
||||||
|
|
||||||
missing_regions.append(un_region)
|
missing_regions.append(un_region)
|
||||||
|
|
||||||
if len(missing_regions) > 0:
|
if len(missing_regions) > 0:
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from importlib.resources import files
|
import os
|
||||||
|
|
||||||
from ftfy.badness import is_bad
|
from ftfy.badness import is_bad
|
||||||
|
|
||||||
@ -58,7 +58,7 @@ def is_mojibake(field):
|
|||||||
def load_spdx_licenses():
|
def load_spdx_licenses():
|
||||||
"""Returns a Python list of SPDX short license identifiers."""
|
"""Returns a Python list of SPDX short license identifiers."""
|
||||||
|
|
||||||
with open(files("csv_metadata_quality").joinpath("data/licenses.json")) as f:
|
with open(os.path.join(os.path.dirname(__file__), "data/licenses.json")) as f:
|
||||||
licenses = json.load(f)
|
licenses = json.load(f)
|
||||||
|
|
||||||
# List comprehension to extract the license ID for each license
|
# List comprehension to extract the license ID for each license
|
||||||
|
1684
poetry.lock
generated
1684
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -12,26 +12,25 @@ csv-metadata-quality = 'csv_metadata_quality.__main__:main'
|
|||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.9"
|
python = "^3.9"
|
||||||
pandas = "^1.5.2"
|
pandas = {version = "^2.0.2", extras = ["feather", "performance"]}
|
||||||
python-stdnum = "^1.18"
|
python-stdnum = "^1.18"
|
||||||
requests = "^2.28.2"
|
requests = "^2.28.2"
|
||||||
requests-cache = "^0.9.8"
|
requests-cache = "^1.0.0"
|
||||||
langid = "^1.1.6"
|
langid = "^1.1.6"
|
||||||
colorama = "^0.4.6"
|
colorama = "^0.4.6"
|
||||||
ftfy = "^6.1.1"
|
ftfy = "^6.1.1"
|
||||||
country-converter = {git = "https://github.com/alanorth/country_converter.git", rev = "myanmar-region"}
|
country-converter = "~1.1.0"
|
||||||
pycountry = {git = "https://github.com/alanorth/pycountry", rev = "iso-codes-4.12.0"}
|
pycountry = {git = "https://github.com/alanorth/pycountry", rev = "iso-codes-4.15.0"}
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
pytest = "^7.2.1"
|
pytest = "^7.2.1"
|
||||||
flake8 = "^6.0.0"
|
flake8 = "^6.0.0"
|
||||||
pytest-clarity = "^1.0.1"
|
pytest-clarity = "^1.0.1"
|
||||||
black = "^23.1.0"
|
black = "^23.1.0"
|
||||||
isort = "^5.12.0"
|
isort = "^5.12.0"
|
||||||
csvkit = "^1.1.0"
|
csvkit = "^1.1.0"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
|
||||||
ipython = "^8.10.0"
|
ipython = "^8.10.0"
|
||||||
|
fixit = "^2.1.0"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry>=0.12"]
|
requires = ["poetry>=0.12"]
|
||||||
|
9
renovate.json
Normal file
9
renovate.json
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
||||||
|
"extends": [
|
||||||
|
"config:base"
|
||||||
|
],
|
||||||
|
"pip_requirements": {
|
||||||
|
"enabled": false
|
||||||
|
}
|
||||||
|
}
|
@ -5,28 +5,28 @@ agate==1.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
|||||||
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
appnope==0.1.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "darwin"
|
appnope==0.1.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "darwin"
|
||||||
asttokens==2.2.1 ; python_version >= "3.9" and python_version < "4.0"
|
asttokens==2.2.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
attrs==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
babel==2.11.0 ; python_version >= "3.9" and python_version < "4.0"
|
babel==2.12.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
backcall==0.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
backcall==0.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
black==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
black==23.3.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
|
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
charset-normalizer==3.0.1 ; python_version >= "3.9" and python_version < "4"
|
charset-normalizer==3.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
click==8.1.3 ; python_version >= "3.9" and python_version < "4.0"
|
click==8.1.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
|
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
country-converter @ git+https://github.com/alanorth/country_converter.git@myanmar-region ; python_version >= "3.9" and python_version < "4.0"
|
country-converter==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
csvkit==1.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
csvkit==1.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
dbfread==2.0.7 ; python_version >= "3.9" and python_version < "4.0"
|
dbfread==2.0.7 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
et-xmlfile==1.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
et-xmlfile==1.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
exceptiongroup==1.1.0 ; python_version >= "3.9" and python_version < "3.11"
|
exceptiongroup==1.1.1 ; python_version >= "3.9" and python_version < "3.11"
|
||||||
executing==1.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
executing==1.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
flake8==6.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
flake8==6.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
|
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
|
||||||
greenlet==2.0.2 ; python_version >= "3.9" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version < "4.0"
|
greenlet==2.0.2 ; python_version >= "3.9" and platform_machine == "aarch64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "ppc64le" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "x86_64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "amd64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "AMD64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "win32" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "WIN32" and python_version < "4.0"
|
||||||
idna==3.4 ; python_version >= "3.9" and python_version < "4"
|
idna==3.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
ipython==8.10.0 ; python_version >= "3.9" and python_version < "4.0"
|
ipython==8.13.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
isodate==0.6.1 ; python_version >= "3.9" and python_version < "4.0"
|
isodate==0.6.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
isort==5.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
isort==5.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
jedi==0.18.2 ; python_version >= "3.9" and python_version < "4.0"
|
jedi==0.18.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
@ -37,44 +37,46 @@ matplotlib-inline==0.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
|||||||
mccabe==0.7.0 ; python_version >= "3.9" and python_version < "4.0"
|
mccabe==0.7.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0"
|
mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
numpy==1.24.2 ; python_version < "4.0" and python_version >= "3.9"
|
numpy==1.24.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
olefile==0.46 ; python_version >= "3.9" and python_version < "4.0"
|
olefile==0.46 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
openpyxl==3.1.1 ; python_version >= "3.9" and python_version < "4.0"
|
openpyxl==3.1.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
packaging==23.0 ; python_version >= "3.9" and python_version < "4.0"
|
packaging==23.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pandas==1.5.3 ; python_version >= "3.9" and python_version < "4.0"
|
pandas==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
parsedatetime==2.6 ; python_version >= "3.9" and python_version < "4.0"
|
parsedatetime==2.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0"
|
parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pathspec==0.11.0 ; python_version >= "3.9" and python_version < "4.0"
|
pathspec==0.11.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pexpect==4.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
|
pexpect==4.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
|
||||||
pickleshare==0.7.5 ; python_version >= "3.9" and python_version < "4.0"
|
pickleshare==0.7.5 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
platformdirs==3.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
platformdirs==3.5.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pluggy==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
pluggy==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pprintpp==0.4.0 ; python_version >= "3.9" and python_version < "4.0"
|
pprintpp==0.4.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
prompt-toolkit==3.0.37 ; python_version >= "3.9" and python_version < "4.0"
|
prompt-toolkit==3.0.38 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
|
ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
|
||||||
pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
|
pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
|
pyarrow==11.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pycodestyle==2.10.0 ; python_version >= "3.9" and python_version < "4.0"
|
pycodestyle==2.10.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.13.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pyflakes==3.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
pyflakes==3.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pygments==2.14.0 ; python_version >= "3.9" and python_version < "4.0"
|
pygments==2.15.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytest-clarity==1.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
pytest-clarity==1.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytest==7.2.1 ; python_version >= "3.9" and python_version < "4.0"
|
pytest==7.3.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-slugify==8.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
python-slugify==8.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytimeparse==1.1.8 ; python_version >= "3.9" and python_version < "4.0"
|
pytimeparse==1.1.8 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytz==2022.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
pytz==2023.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
|
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
requests==2.28.2 ; python_version >= "3.9" and python_version < "4"
|
requests==2.29.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
rich==13.3.1 ; python_version >= "3.9" and python_version < "4.0"
|
rich==13.3.5 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
sqlalchemy==1.4.46 ; python_version >= "3.9" and python_version < "4.0"
|
sqlalchemy==1.4.48 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
stack-data==0.6.2 ; python_version >= "3.9" and python_version < "4.0"
|
stack-data==0.6.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
text-unidecode==1.3 ; python_version >= "3.9" and python_version < "4.0"
|
text-unidecode==1.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
|
tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
|
||||||
traitlets==5.9.0 ; python_version >= "3.9" and python_version < "4.0"
|
traitlets==5.9.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
typing-extensions==4.5.0 ; python_version >= "3.9" and python_version < "3.10"
|
typing-extensions==4.5.0 ; python_version >= "3.9" and python_version < "3.10"
|
||||||
|
tzdata==2023.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
urllib3==1.26.14 ; python_version >= "3.9" and python_version < "4"
|
urllib3==1.26.15 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
|
wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
|
||||||
xlrd==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
xlrd==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
|
@ -1,23 +1,25 @@
|
|||||||
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
attrs==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
|
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
charset-normalizer==3.0.1 ; python_version >= "3.9" and python_version < "4"
|
charset-normalizer==3.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
|
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
country-converter @ git+https://github.com/alanorth/country_converter.git@myanmar-region ; python_version >= "3.9" and python_version < "4.0"
|
country-converter==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
exceptiongroup==1.1.0 ; python_version >= "3.9" and python_version < "3.11"
|
exceptiongroup==1.1.1 ; python_version >= "3.9" and python_version < "3.11"
|
||||||
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
|
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
|
||||||
idna==3.4 ; python_version >= "3.9" and python_version < "4"
|
idna==3.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
numpy==1.24.2 ; python_version < "4.0" and python_version >= "3.9"
|
numpy==1.24.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pandas==1.5.3 ; python_version >= "3.9" and python_version < "4.0"
|
pandas==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.12.0 ; python_version >= "3.9" and python_version < "4.0"
|
pyarrow==11.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
|
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.13.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytz==2022.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
pytz==2023.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
|
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
requests==2.28.2 ; python_version >= "3.9" and python_version < "4"
|
requests==2.29.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
|
tzdata==2023.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
urllib3==1.26.14 ; python_version >= "3.9" and python_version < "4"
|
urllib3==1.26.15 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
|
wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
|
||||||
|
36
setup.py
36
setup.py
@ -1,36 +0,0 @@
|
|||||||
import setuptools
|
|
||||||
|
|
||||||
with open("README.md", "r") as fh:
|
|
||||||
long_description = fh.read()
|
|
||||||
|
|
||||||
install_requires = [
|
|
||||||
"pandas",
|
|
||||||
"python-stdnum",
|
|
||||||
"requests",
|
|
||||||
"requests-cache",
|
|
||||||
"pycountry",
|
|
||||||
"langid",
|
|
||||||
]
|
|
||||||
|
|
||||||
setuptools.setup(
|
|
||||||
name="csv-metadata-quality",
|
|
||||||
version="0.6.1",
|
|
||||||
author="Alan Orth",
|
|
||||||
author_email="aorth@mjanja.ch",
|
|
||||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
|
||||||
license="GPLv3",
|
|
||||||
long_description=long_description,
|
|
||||||
long_description_content_type="text/markdown",
|
|
||||||
url="https://github.com/alanorth/csv-metadata-quality",
|
|
||||||
classifiers=[
|
|
||||||
"Programming Language :: Python :: 3.9",
|
|
||||||
"Programming Language :: Python :: 3.10",
|
|
||||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
|
||||||
"Operating System :: OS Independent",
|
|
||||||
],
|
|
||||||
packages=["csv_metadata_quality"],
|
|
||||||
entry_points={
|
|
||||||
"console_scripts": ["csv-metadata-quality = csv_metadata_quality.__main__:main"]
|
|
||||||
},
|
|
||||||
install_requires=install_requires,
|
|
||||||
)
|
|
Reference in New Issue
Block a user