mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-05-09 14:46:00 +02:00
Compare commits
56 Commits
Author | SHA1 | Date | |
---|---|---|---|
d76e72532a
|
|||
13980d2dde
|
|||
9aaaa62461
|
|||
a7fc5a246c
|
|||
7fb8acb866
|
|||
9f5d2c2c4f
|
|||
202abf140c
|
|||
0cd6d3dfe6
|
|||
a458beac55
|
|||
e62ecb0a8f
|
|||
de92f32ab6
|
|||
dbbbc0944a
|
|||
d17bf3033c
|
|||
2ec52f1b73
|
|||
aa1abf15a7
|
|||
cbf94490f2
|
|||
f3d0d5ef07
|
|||
4b7b99c94c
|
|||
df670e81b9
|
|||
ae357d8c6c
|
|||
ca80340f7a
|
|||
cc1743b86d
|
|||
bcb9885c6b
|
|||
b484b75178
|
|||
d3880a9dfa
|
|||
7edb8b19d7
|
|||
a6709c7f82
|
|||
d489ea4609
|
|||
96634cbb67
|
|||
29e67a0887
|
|||
32cea2055f
|
|||
0dc66c5c4e
|
|||
c26ad83534
|
|||
72ca9d99bf
|
|||
ae33a9b793
|
|||
fc0367bfc8
|
|||
e33b285034
|
|||
349fca03b8
|
|||
52d8904870
|
|||
971c69e535 | |||
f8cc233e25
|
|||
aa7b7a9592
|
|||
57b455bde7
|
|||
23b95fa368
|
|||
6985f76aa3
|
|||
98a6a19e12
|
|||
f4914c414f
|
|||
d352fe8017
|
|||
f13c360084
|
|||
7cfd4c0b59
|
|||
826509ddcf
|
|||
22b5c0f7a1
|
|||
774e274b32
|
|||
db474a802f
|
|||
e241f8461b
|
|||
431e6331c8
|
15
.build.yml
15
.build.yml
@ -1,15 +0,0 @@
|
||||
image: archlinux
|
||||
packages:
|
||||
- python-poetry
|
||||
sources:
|
||||
- https://git.sr.ht/~alanorth/csv-metadata-quality
|
||||
tasks:
|
||||
- setup: |
|
||||
cd csv-metadata-quality
|
||||
poetry install
|
||||
- pytest: |
|
||||
cd csv-metadata-quality
|
||||
poetry run pytest
|
||||
- testcli: |
|
||||
cd csv-metadata-quality
|
||||
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dc.subject,cg.coverage.country
|
49
.drone.yml
Normal file
49
.drone.yml
Normal file
@ -0,0 +1,49 @@
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: python39
|
||||
|
||||
steps:
|
||||
- name: test
|
||||
image: python:3.9-slim
|
||||
commands:
|
||||
- id
|
||||
- python -V
|
||||
- pip install -r requirements-dev.txt
|
||||
- pytest
|
||||
- python setup.py install
|
||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dc.subject,cg.coverage.country
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: python38
|
||||
|
||||
steps:
|
||||
- name: test
|
||||
image: python:3.8-slim
|
||||
commands:
|
||||
- id
|
||||
- python -V
|
||||
- pip install -r requirements-dev.txt
|
||||
- pytest
|
||||
- python setup.py install
|
||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dc.subject,cg.coverage.country
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: python37
|
||||
|
||||
steps:
|
||||
- name: test
|
||||
image: python:3.7-slim
|
||||
commands:
|
||||
- id
|
||||
- python -V
|
||||
- pip install -r requirements-dev.txt
|
||||
- pytest
|
||||
- python setup.py install
|
||||
- csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dc.subject,cg.coverage.country
|
||||
|
||||
# vim: ts=2 sw=2 et
|
41
.github/workflows/python-app.yml
vendored
Normal file
41
.github/workflows/python-app.yml
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a single version of Python
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Build and Test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install flake8 pytest
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
pytest
|
||||
- name: Test CLI
|
||||
run: |
|
||||
python setup.py install
|
||||
csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dc.subject,cg.coverage.country
|
16
.travis.yml
16
.travis.yml
@ -1,16 +0,0 @@
|
||||
dist: bionic
|
||||
language: python
|
||||
python:
|
||||
- "3.6"
|
||||
- "3.7"
|
||||
- "3.8"
|
||||
- "3.8-dev" # 3.8 development branch
|
||||
jobs:
|
||||
allow_failures:
|
||||
- python: "3.8-dev"
|
||||
install:
|
||||
- "pip install -r requirements.txt"
|
||||
- "pip install -r requirements-dev.txt"
|
||||
script: pytest
|
||||
|
||||
# vim: ts=2 sw=2 et
|
20
CHANGELOG.md
20
CHANGELOG.md
@ -4,6 +4,26 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [0.4.4] - 2021-02-21
|
||||
### Added
|
||||
- Accept dates formatted in ISO 8601 extended with combined date and time, for
|
||||
example: 2020-08-31T11:04:56Z
|
||||
- Colorized output: red for errors, yellow for warnings and information, green
|
||||
for changes
|
||||
|
||||
### Updated
|
||||
- Run `poetry update` to update project dependencies
|
||||
|
||||
## [0.4.3] - 2021-01-26
|
||||
### Changed
|
||||
- Reformat with black
|
||||
- Requires Python 3.7+ for pandas 1.2.0
|
||||
|
||||
### Updated
|
||||
- Run `poetry update`
|
||||
- Expand check/fix for multi-value separators to include metadata with invalid
|
||||
separators at the end, for example "Kenya||Tanzania||"
|
||||
|
||||
## [0.4.2] - 2020-07-06
|
||||
### Changed
|
||||
- Add field name to the output for more fixes and checks to help identify where
|
||||
|
17
README.md
17
README.md
@ -1,7 +1,11 @@
|
||||
# CSV Metadata Quality [](https://travis-ci.org/ilri/csv-metadata-quality) [](https://builds.sr.ht/~alanorth/csv-metadata-quality?)
|
||||
A simple, but opinionated metadata quality checker and fixer designed to work with CSVs in the DSpace ecosystem (though it could theoretically work on any CSV that uses Dublin Core fields as columns). The implementation is essentially a pipeline of checks and fixes that begins with splitting multi-value fields on the standard DSpace "||" separator, trimming leading/trailing whitespace, and then proceeding to more specialized cases like ISSNs, ISBNs, languages, etc.
|
||||
# DSpace CSV Metadata Quality Checker  [](https://ci.mjanja.ch/alanorth/csv-metadata-quality)
|
||||
A simple, but opinionated metadata quality checker and fixer designed to work with CSVs in the DSpace ecosystem (though it could theoretically work on any CSV that uses Dublin Core fields as columns). The implementation is essentially a pipeline of checks and fixes that begins with splitting multi-value fields on the standard DSpace "||" separator, trimming leading/trailing whitespace, and then proceeding to more specialized cases like ISSNs, ISBNs, languages, unnecessary Unicode, AGROVOC terms, etc.
|
||||
|
||||
Requires Python 3.8 or greater. CSV and Excel support comes from the [Pandas](https://pandas.pydata.org/) library, though your mileage may vary with Excel because this is much less tested.
|
||||
Requires Python 3.7 or greater (3.8 recommended). CSV and Excel support comes from the [Pandas](https://pandas.pydata.org/) library, though your mileage may vary with Excel because this is much less tested.
|
||||
|
||||
If you use the DSpace CSV metadata quality checker please cite:
|
||||
|
||||
*Orth, A. 2019. DSpace CSV metadata quality checker. Nairobi, Kenya: ILRI. https://hdl.handle.net/10568/110997.*
|
||||
|
||||
## Functionality
|
||||
|
||||
@ -10,7 +14,7 @@ Requires Python 3.8 or greater. CSV and Excel support comes from the [Pandas](ht
|
||||
- Experimental validation of titles and abstracts against item's Dublin Core language field
|
||||
- Validate subjects against the AGROVOC REST API (see the `--agrovoc-fields` option)
|
||||
- Fix leading, trailing, and excessive (ie, more than one) whitespace
|
||||
- Fix invalid multi-value separators (`|`) using `--unsafe-fixes`
|
||||
- Fix invalid and unnecessary multi-value separators (`|`) using `--unsafe-fixes`
|
||||
- Fix problematic newlines (line feeds) using `--unsafe-fixes`
|
||||
- Remove unnecessary Unicode like [non-breaking spaces](https://en.wikipedia.org/wiki/Non-breaking_space), [replacement characters](https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character), etc
|
||||
- Check for "suspicious" characters that indicate encoding or copy/paste issues, for example "foreˆt" should be "forêt"
|
||||
@ -56,6 +60,8 @@ You can enable several "unsafe" fixes with the `--unsafe-fixes` option. Currentl
|
||||
### Invalid Multi-Value Separators
|
||||
This is considered "unsafe" because it is *theoretically* possible for a single `|` character to be used legitimately in a metadata value, though in my experience it is always a typo. For example, if a user mistakenly writes `Kenya|Tanzania` when attempting to indicate two countries, the result will be one metadata value with the literal text `Kenya|Tanzania`. The `--unsafe-fixes` option will correct the invalid multi-value separator so that there are two metadata values, ie `Kenya||Tanzania`.
|
||||
|
||||
This will also remove unnecessary trailing multi-value separators, for example `Kenya||Tanzania||`.
|
||||
|
||||
### Newlines
|
||||
This is considered "unsafe" because some systems give special importance to vertical space and render it properly. DSpace does not support rendering newlines in its XMLUI and has, at times, suffered from parsing errors that cause the import process to fail if an input file had newlines. The `--unsafe-fixes` option strips Unix line feeds (U+000A).
|
||||
|
||||
@ -102,6 +108,9 @@ This currently uses the [Python langid](https://github.com/saffsd/langid.py) lib
|
||||
- Warn if two items use the same file in `filename` column
|
||||
- Add an option to drop invalid AGROVOC subjects?
|
||||
- Add tests for application invocation, ie `tests/test_app.py`?
|
||||
- Validate ISSNs or journal titles against CrossRef API?
|
||||
- Better ISO 8601 date parsing (currently only supports simple dates, perhaps we need to use dateutil.parser.parseiso())
|
||||
- Fix lazy date check (assumes field name has "date" but could be dcterms.issued etc!)
|
||||
|
||||
## License
|
||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
||||
|
@ -4,6 +4,7 @@ import signal
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
from colorama import Fore
|
||||
|
||||
import csv_metadata_quality.check as check
|
||||
import csv_metadata_quality.experimental as experimental
|
||||
@ -77,7 +78,7 @@ def run(argv):
|
||||
if column == exclude and skip is False:
|
||||
skip = True
|
||||
if skip:
|
||||
print(f"Skipping {column}")
|
||||
print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")
|
||||
|
||||
continue
|
||||
|
||||
@ -103,13 +104,13 @@ def run(argv):
|
||||
# Fix: unnecessary Unicode
|
||||
df[column] = df[column].apply(fix.unnecessary_unicode)
|
||||
|
||||
# Check: invalid multi-value separator
|
||||
# Check: invalid and unnecessary multi-value separators
|
||||
df[column] = df[column].apply(check.separators, field_name=column)
|
||||
|
||||
# Check: suspicious characters
|
||||
df[column] = df[column].apply(check.suspicious_characters, field_name=column)
|
||||
|
||||
# Fix: invalid multi-value separator
|
||||
# Fix: invalid and unnecessary multi-value separators
|
||||
if args.unsafe_fixes:
|
||||
df[column] = df[column].apply(fix.separators, field_name=column)
|
||||
# Run whitespace fix again after fixing invalid separators
|
||||
|
@ -1,4 +1,10 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import pandas as pd
|
||||
import requests
|
||||
import requests_cache
|
||||
from colorama import Fore
|
||||
from pycountry import languages
|
||||
|
||||
|
||||
def issn(field):
|
||||
@ -21,7 +27,7 @@ def issn(field):
|
||||
for value in field.split("||"):
|
||||
|
||||
if not issn.is_valid(value):
|
||||
print(f"Invalid ISSN: {value}")
|
||||
print(f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}")
|
||||
|
||||
return field
|
||||
|
||||
@ -46,13 +52,17 @@ def isbn(field):
|
||||
for value in field.split("||"):
|
||||
|
||||
if not isbn.is_valid(value):
|
||||
print(f"Invalid ISBN: {value}")
|
||||
print(f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}")
|
||||
|
||||
return field
|
||||
|
||||
|
||||
def separators(field, field_name):
|
||||
"""Check for invalid multi-value separators (ie "|" or "|||").
|
||||
"""Check for invalid and unnecessary multi-value separators, for example:
|
||||
|
||||
value|value
|
||||
value|||value
|
||||
value||value||
|
||||
|
||||
Prints the field with the invalid multi-value separator.
|
||||
"""
|
||||
@ -65,12 +75,22 @@ def separators(field, field_name):
|
||||
|
||||
# Try to split multi-value field on "||" separator
|
||||
for value in field.split("||"):
|
||||
# Check if the current value is blank
|
||||
if value == "":
|
||||
print(
|
||||
f"{Fore.RED}Unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}"
|
||||
)
|
||||
|
||||
continue
|
||||
|
||||
# After splitting, see if there are any remaining "|" characters
|
||||
match = re.findall(r"^.*?\|.*$", value)
|
||||
|
||||
# Check if there was a match
|
||||
if match:
|
||||
print(f"Invalid multi-value separator ({field_name}): {field}")
|
||||
print(
|
||||
f"{Fore.RED}Invalid multi-value separator ({field_name}): {Fore.RESET}{field}"
|
||||
)
|
||||
|
||||
return field
|
||||
|
||||
@ -85,10 +105,9 @@ def date(field, field_name):
|
||||
|
||||
Prints the date if invalid.
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
if pd.isna(field):
|
||||
print(f"Missing date ({field_name}).")
|
||||
print(f"{Fore.RED}Missing date ({field_name}).{Fore.RESET}")
|
||||
|
||||
return
|
||||
|
||||
@ -97,7 +116,9 @@ def date(field, field_name):
|
||||
|
||||
# We don't allow multi-value date fields
|
||||
if len(multiple_dates) > 1:
|
||||
print(f"Multiple dates not allowed ({field_name}): {field}")
|
||||
print(
|
||||
f"{Fore.RED}Multiple dates not allowed ({field_name}): {Fore.RESET}{field}"
|
||||
)
|
||||
|
||||
return field
|
||||
|
||||
@ -123,7 +144,15 @@ def date(field, field_name):
|
||||
|
||||
return field
|
||||
except ValueError:
|
||||
print(f"Invalid date ({field_name}): {field}")
|
||||
pass
|
||||
|
||||
try:
|
||||
# Check if date is valid YYYY-MM-DDTHH:MM:SSZ format
|
||||
datetime.strptime(field, "%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
return field
|
||||
except ValueError:
|
||||
print(f"{Fore.RED}Invalid date ({field_name}): {Fore.RESET}{field}")
|
||||
|
||||
return field
|
||||
|
||||
@ -156,9 +185,7 @@ def suspicious_characters(field, field_name):
|
||||
# character and spanning enough of the rest to give a preview,
|
||||
# but not too much to cause the line to break in terminals with
|
||||
# a default of 80 characters width.
|
||||
suspicious_character_msg = (
|
||||
f"Suspicious character ({field_name}): {field_subset}"
|
||||
)
|
||||
suspicious_character_msg = f"{Fore.YELLOW}Suspicious character ({field_name}): {Fore.RESET}{field_subset}"
|
||||
print(f"{suspicious_character_msg:1.80}")
|
||||
|
||||
return field
|
||||
@ -170,8 +197,6 @@ def language(field):
|
||||
Prints the value if it is invalid.
|
||||
"""
|
||||
|
||||
from pycountry import languages
|
||||
|
||||
# Skip fields with missing values
|
||||
if pd.isna(field):
|
||||
return
|
||||
@ -185,16 +210,16 @@ def language(field):
|
||||
# can check it against ISO 639-1 or ISO 639-3 accordingly.
|
||||
if len(value) == 2:
|
||||
if not languages.get(alpha_2=value):
|
||||
print(f"Invalid ISO 639-1 language: {value}")
|
||||
print(f"{Fore.RED}Invalid ISO 639-1 language: {Fore.RESET}{value}")
|
||||
|
||||
pass
|
||||
elif len(value) == 3:
|
||||
if not languages.get(alpha_3=value):
|
||||
print(f"Invalid ISO 639-3 language: {value}")
|
||||
print(f"{Fore.RED}Invalid ISO 639-3 language: {Fore.RESET}{value}")
|
||||
|
||||
pass
|
||||
else:
|
||||
print(f"Invalid language: {value}")
|
||||
print(f"{Fore.RED}Invalid language: {Fore.RESET}{value}")
|
||||
|
||||
return field
|
||||
|
||||
@ -213,19 +238,13 @@ def agrovoc(field, field_name):
|
||||
Prints a warning if the value is invalid.
|
||||
"""
|
||||
|
||||
from datetime import timedelta
|
||||
import requests
|
||||
import requests_cache
|
||||
|
||||
# Skip fields with missing values
|
||||
if pd.isna(field):
|
||||
return
|
||||
|
||||
# enable transparent request cache with thirty days expiry
|
||||
expire_after = timedelta(days=30)
|
||||
requests_cache.install_cache(
|
||||
"agrovoc-response-cache", expire_after=expire_after
|
||||
)
|
||||
requests_cache.install_cache("agrovoc-response-cache", expire_after=expire_after)
|
||||
|
||||
# prune old cache entries
|
||||
requests_cache.core.remove_expired_responses()
|
||||
@ -242,7 +261,7 @@ def agrovoc(field, field_name):
|
||||
|
||||
# check if there are any results
|
||||
if len(data["results"]) == 0:
|
||||
print(f"Invalid AGROVOC ({field_name}): {value}")
|
||||
print(f"{Fore.RED}Invalid AGROVOC ({field_name}): {Fore.RESET}{value}")
|
||||
|
||||
return field
|
||||
|
||||
@ -295,6 +314,6 @@ def filename_extension(field):
|
||||
break
|
||||
|
||||
if filename_extension_match is False:
|
||||
print(f"Filename with uncommon extension: {value}")
|
||||
print(f"{Fore.YELLOW}Filename with uncommon extension: {Fore.RESET}{value}")
|
||||
|
||||
return field
|
||||
|
@ -1,4 +1,5 @@
|
||||
import pandas as pd
|
||||
from colorama import Fore
|
||||
|
||||
|
||||
def correct_language(row):
|
||||
@ -10,10 +11,11 @@ def correct_language(row):
|
||||
language and returns the value in the language field if it does match.
|
||||
"""
|
||||
|
||||
from pycountry import languages
|
||||
import langid
|
||||
import re
|
||||
|
||||
import langid
|
||||
from pycountry import languages
|
||||
|
||||
# Initialize some variables at global scope so that we can set them in the
|
||||
# loop scope below and still be able to access them afterwards.
|
||||
language = ""
|
||||
@ -83,12 +85,12 @@ def correct_language(row):
|
||||
detected_language = languages.get(alpha_2=langid_classification[0])
|
||||
if len(language) == 2 and language != detected_language.alpha_2:
|
||||
print(
|
||||
f"Possibly incorrect language {language} (detected {detected_language.alpha_2}): {title}"
|
||||
f"{Fore.YELLOW}Possibly incorrect language {language} (detected {detected_language.alpha_2}): {Fore.RESET}{title}"
|
||||
)
|
||||
|
||||
elif len(language) == 3 and language != detected_language.alpha_3:
|
||||
print(
|
||||
f"Possibly incorrect language {language} (detected {detected_language.alpha_3}): {title}"
|
||||
f"{Fore.YELLOW}Possibly incorrect language {language} (detected {detected_language.alpha_3}): {Fore.RESET}{title}"
|
||||
)
|
||||
|
||||
else:
|
||||
|
@ -1,6 +1,10 @@
|
||||
import re
|
||||
from unicodedata import normalize
|
||||
|
||||
import pandas as pd
|
||||
from colorama import Fore
|
||||
|
||||
from csv_metadata_quality.util import is_nfc
|
||||
|
||||
|
||||
def whitespace(field, field_name):
|
||||
@ -26,7 +30,9 @@ def whitespace(field, field_name):
|
||||
match = re.findall(pattern, value)
|
||||
|
||||
if match:
|
||||
print(f"Removing excessive whitespace ({field_name}): {value}")
|
||||
print(
|
||||
f"{Fore.GREEN}Removing excessive whitespace ({field_name}): {Fore.RESET}{value}"
|
||||
)
|
||||
value = re.sub(pattern, " ", value)
|
||||
|
||||
# Save cleaned value
|
||||
@ -39,7 +45,14 @@ def whitespace(field, field_name):
|
||||
|
||||
|
||||
def separators(field, field_name):
|
||||
"""Fix for invalid multi-value separators (ie "|")."""
|
||||
"""Fix for invalid and unnecessary multi-value separators, for example:
|
||||
|
||||
value|value
|
||||
value|||value
|
||||
value||value||
|
||||
|
||||
Prints the field with the invalid multi-value separator.
|
||||
"""
|
||||
|
||||
# Skip fields with missing values
|
||||
if pd.isna(field):
|
||||
@ -50,12 +63,22 @@ def separators(field, field_name):
|
||||
|
||||
# Try to split multi-value field on "||" separator
|
||||
for value in field.split("||"):
|
||||
# Check if the value is blank and skip it
|
||||
if value == "":
|
||||
print(
|
||||
f"{Fore.GREEN}Fixing unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}"
|
||||
)
|
||||
|
||||
continue
|
||||
|
||||
# After splitting, see if there are any remaining "|" characters
|
||||
pattern = re.compile(r"\|")
|
||||
match = re.findall(pattern, value)
|
||||
|
||||
if match:
|
||||
print(f"Fixing invalid multi-value separator ({field_name}): {value}")
|
||||
print(
|
||||
f"{Fore.RED}Fixing invalid multi-value separator ({field_name}): {Fore.RESET}{value}"
|
||||
)
|
||||
|
||||
value = re.sub(pattern, "||", value)
|
||||
|
||||
@ -91,7 +114,7 @@ def unnecessary_unicode(field):
|
||||
match = re.findall(pattern, field)
|
||||
|
||||
if match:
|
||||
print(f"Removing unnecessary Unicode (U+200B): {field}")
|
||||
print(f"{Fore.GREEN}Removing unnecessary Unicode (U+200B): {Fore.RESET}{field}")
|
||||
field = re.sub(pattern, "", field)
|
||||
|
||||
# Check for replacement characters (U+FFFD)
|
||||
@ -99,7 +122,7 @@ def unnecessary_unicode(field):
|
||||
match = re.findall(pattern, field)
|
||||
|
||||
if match:
|
||||
print(f"Removing unnecessary Unicode (U+FFFD): {field}")
|
||||
print(f"{Fore.GREEN}Removing unnecessary Unicode (U+FFFD): {Fore.RESET}{field}")
|
||||
field = re.sub(pattern, "", field)
|
||||
|
||||
# Check for no-break spaces (U+00A0)
|
||||
@ -107,7 +130,9 @@ def unnecessary_unicode(field):
|
||||
match = re.findall(pattern, field)
|
||||
|
||||
if match:
|
||||
print(f"Replacing unnecessary Unicode (U+00A0): {field}")
|
||||
print(
|
||||
f"{Fore.GREEN}Replacing unnecessary Unicode (U+00A0): {Fore.RESET}{field}"
|
||||
)
|
||||
field = re.sub(pattern, " ", field)
|
||||
|
||||
# Check for soft hyphens (U+00AD), sometimes preceeded with a normal hyphen
|
||||
@ -115,7 +140,9 @@ def unnecessary_unicode(field):
|
||||
match = re.findall(pattern, field)
|
||||
|
||||
if match:
|
||||
print(f"Replacing unnecessary Unicode (U+00AD): {field}")
|
||||
print(
|
||||
f"{Fore.GREEN}Replacing unnecessary Unicode (U+00AD): {Fore.RESET}{field}"
|
||||
)
|
||||
field = re.sub(pattern, "-", field)
|
||||
|
||||
return field
|
||||
@ -140,7 +167,9 @@ def duplicates(field, field_name):
|
||||
if value not in new_values:
|
||||
new_values.append(value)
|
||||
else:
|
||||
print(f"Removing duplicate value ({field_name}): {value}")
|
||||
print(
|
||||
f"{Fore.GREEN}Removing duplicate value ({field_name}): {Fore.RESET}{value}"
|
||||
)
|
||||
|
||||
# Create a new field consisting of all values joined with "||"
|
||||
new_field = "||".join(new_values)
|
||||
@ -173,7 +202,7 @@ def newlines(field):
|
||||
match = re.findall(r"\n", field)
|
||||
|
||||
if match:
|
||||
print(f"Removing newline: {field}")
|
||||
print(f"{Fore.GREEN}Removing newline: {Fore.RESET}{field}")
|
||||
field = field.replace("\n", "")
|
||||
|
||||
return field
|
||||
@ -197,7 +226,9 @@ def comma_space(field, field_name):
|
||||
match = re.findall(r",\w", field)
|
||||
|
||||
if match:
|
||||
print(f"Adding space after comma ({field_name}): {field}")
|
||||
print(
|
||||
f"{Fore.GREEN}Adding space after comma ({field_name}): {Fore.RESET}{field}"
|
||||
)
|
||||
field = re.sub(r",(\w)", r", \1", field)
|
||||
|
||||
return field
|
||||
@ -212,16 +243,13 @@ def normalize_unicode(field, field_name):
|
||||
Return normalized string.
|
||||
"""
|
||||
|
||||
from csv_metadata_quality.util import is_nfc
|
||||
from unicodedata import normalize
|
||||
|
||||
# Skip fields with missing values
|
||||
if pd.isna(field):
|
||||
return
|
||||
|
||||
# Check if the current string is using normalized Unicode (NFC)
|
||||
if not is_nfc(field):
|
||||
print(f"Normalizing Unicode ({field_name}): {field}")
|
||||
print(f"{Fore.GREEN}Normalizing Unicode ({field_name}): {Fore.RESET}{field}")
|
||||
field = normalize("NFC", field)
|
||||
|
||||
return field
|
||||
|
@ -1 +1 @@
|
||||
VERSION = "0.4.2"
|
||||
VERSION = "0.4.4"
|
||||
|
@ -28,3 +28,4 @@ Incorrect ISO 639-1 language,2019-09-26,,,es,,,
|
||||
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,
|
||||
Composéd Unicode,2020-01-14,,,,,,
|
||||
Decomposéd Unicode,2020-01-14,,,,,,
|
||||
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,
|
||||
|
|
973
poetry.lock
generated
973
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "csv-metadata-quality"
|
||||
version = "0.4.2"
|
||||
version = "0.4.4"
|
||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
|
||||
authors = ["Alan Orth <alan.orth@gmail.com>"]
|
||||
license="GPL-3.0-only"
|
||||
@ -16,14 +16,15 @@ requests = "^2.23.0"
|
||||
requests-cache = "^0.5.2"
|
||||
pycountry = "^19.8.18"
|
||||
langid = "^1.1.6"
|
||||
colorama = "^0.4.4"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
pytest = "^5.4.2"
|
||||
ipython = "^7.15.0"
|
||||
flake8 = "^3.8.2"
|
||||
pytest = "^6.1.1"
|
||||
ipython = { version = "^7.18.1", python = "^3.7" }
|
||||
flake8 = "^3.8.4"
|
||||
pytest-clarity = "^0.3.0-alpha.0"
|
||||
black = "^19.10b0"
|
||||
isort = "^4.3.21"
|
||||
black = "20.8b1"
|
||||
isort = "^5.5.4"
|
||||
csvkit = "^1.0.5"
|
||||
|
||||
[build-system]
|
||||
|
@ -1,5 +1,5 @@
|
||||
[pytest]
|
||||
addopts= -rsxX -s -v --strict --capture=sys
|
||||
addopts= -rsxX -s -v --strict-markers --capture=sys
|
||||
filterwarnings =
|
||||
error::UserWarning
|
||||
ignore:.*U.* is deprecated:DeprecationWarning
|
||||
|
@ -1,300 +1,71 @@
|
||||
agate==1.6.1 \
|
||||
--hash=sha256:48d6f80b35611c1ba25a642cbc5b90fcbdeeb2a54711c4a8d062ee2809334d1c \
|
||||
--hash=sha256:c93aaa500b439d71e4a5cf088d0006d2ce2c76f1950960c8843114e5f361dfd3
|
||||
agate-dbf==0.2.1 \
|
||||
--hash=sha256:00c93c498ec9a04cc587bf63dd7340e67e2541f0df4c9a7259d7cb3dd4ce372f \
|
||||
--hash=sha256:f618fadb413d41468c90d72fca945681d82d9e4d1b3d89f9bda52e607b828c0b
|
||||
agate-excel==0.2.3 \
|
||||
--hash=sha256:8f255ef2c87c436b7132049e1dd86c8e08bf82d8c773aea86f3069b461a17d52
|
||||
agate-sql==0.5.4 \
|
||||
--hash=sha256:9277490ba8b8e7c747a9ae3671f52fe486784b48d4a14e78ca197fb0e36f281b
|
||||
appdirs==1.4.4 \
|
||||
--hash=sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128 \
|
||||
--hash=sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41
|
||||
appnope==0.1.0; sys_platform == "darwin" \
|
||||
--hash=sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0 \
|
||||
--hash=sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71
|
||||
atomicwrites==1.4.0; sys_platform == "win32" \
|
||||
--hash=sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197 \
|
||||
--hash=sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a
|
||||
attrs==19.3.0 \
|
||||
--hash=sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c \
|
||||
--hash=sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72
|
||||
babel==2.8.0 \
|
||||
--hash=sha256:d670ea0b10f8b723672d3a6abeb87b565b244da220d76b4dba1b66269ec152d4 \
|
||||
--hash=sha256:1aac2ae2d0d8ea368fa90906567f5c08463d98ade155c0c4bfedd6a0f7160e38
|
||||
backcall==0.2.0 \
|
||||
--hash=sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255 \
|
||||
--hash=sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e
|
||||
black==19.10b0 \
|
||||
--hash=sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b \
|
||||
--hash=sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539
|
||||
certifi==2020.6.20 \
|
||||
--hash=sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41 \
|
||||
--hash=sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3
|
||||
chardet==3.0.4 \
|
||||
--hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691 \
|
||||
--hash=sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae
|
||||
click==7.1.2 \
|
||||
--hash=sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc \
|
||||
--hash=sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a
|
||||
colorama==0.4.3; sys_platform == "win32" \
|
||||
--hash=sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff \
|
||||
--hash=sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1
|
||||
csvkit==1.0.5 \
|
||||
--hash=sha256:7bd390f4d300e45dc9ed67a32af762a916bae7d9a85087a10fd4f64ce65fd5b9
|
||||
dbfread==2.0.7 \
|
||||
--hash=sha256:f604def58c59694fa0160d7be5d0b8d594467278d2bb6a47d46daf7162c84cec \
|
||||
--hash=sha256:07c8a9af06ffad3f6f03e8fe91ad7d2733e31a26d2b72c4dd4cfbae07ee3b73d
|
||||
decorator==4.4.2 \
|
||||
--hash=sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760 \
|
||||
--hash=sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7
|
||||
et-xmlfile==1.0.1 \
|
||||
--hash=sha256:614d9722d572f6246302c4491846d2c393c199cfa4edc9af593437691683335b
|
||||
flake8==3.8.3 \
|
||||
--hash=sha256:15e351d19611c887e482fb960eae4d44845013cc142d42896e9862f775d8cf5c \
|
||||
--hash=sha256:f04b9fcbac03b0a3e58c0ab3a0ecc462e023a9faf046d57794184028123aa208
|
||||
idna==2.10 \
|
||||
--hash=sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0 \
|
||||
--hash=sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6
|
||||
ipython==7.16.1 \
|
||||
--hash=sha256:2dbcc8c27ca7d3cfe4fcdff7f45b27f9a8d3edfa70ff8024a71c7a8eb5f09d64 \
|
||||
--hash=sha256:9f4fcb31d3b2c533333893b9172264e4821c1ac91839500f31bd43f2c59b3ccf
|
||||
ipython-genutils==0.2.0 \
|
||||
--hash=sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8 \
|
||||
--hash=sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8
|
||||
isodate==0.6.0 \
|
||||
--hash=sha256:aa4d33c06640f5352aca96e4b81afd8ab3b47337cc12089822d6f322ac772c81 \
|
||||
--hash=sha256:2e364a3d5759479cdb2d37cce6b9376ea504db2ff90252a2e5b7cc89cc9ff2d8
|
||||
isort==4.3.21 \
|
||||
--hash=sha256:6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd \
|
||||
--hash=sha256:54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1
|
||||
jdcal==1.4.1 \
|
||||
--hash=sha256:1abf1305fce18b4e8aa248cf8fe0c56ce2032392bc64bbd61b5dff2a19ec8bba \
|
||||
--hash=sha256:472872e096eb8df219c23f2689fc336668bdb43d194094b5cc1707e1640acfc8
|
||||
jedi==0.17.1 \
|
||||
--hash=sha256:1ddb0ec78059e8e27ec9eb5098360b4ea0a3dd840bedf21415ea820c21b40a22 \
|
||||
--hash=sha256:807d5d4f96711a2bcfdd5dfa3b1ae6d09aa53832b182090b222b5efb81f52f63
|
||||
langid==1.1.6 \
|
||||
--hash=sha256:044bcae1912dab85c33d8e98f2811b8f4ff1213e5e9a9e9510137b84da2cb293
|
||||
leather==0.3.3 \
|
||||
--hash=sha256:e0bb36a6d5f59fbf3c1a6e75e7c8bee29e67f06f5b48c0134407dde612eba5e2 \
|
||||
--hash=sha256:076d1603b5281488285718ce1a5ce78cf1027fe1e76adf9c548caf83c519b988
|
||||
mccabe==0.6.1 \
|
||||
--hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \
|
||||
--hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f
|
||||
more-itertools==8.4.0 \
|
||||
--hash=sha256:68c70cc7167bdf5c7c9d8f6954a7837089c6a36bf565383919bb595efb8a17e5 \
|
||||
--hash=sha256:b78134b2063dd214000685165d81c154522c3ee0a1c0d4d113c80361c234c5a2
|
||||
numpy==1.19.0 \
|
||||
--hash=sha256:63d971bb211ad3ca37b2adecdd5365f40f3b741a455beecba70fd0dde8b2a4cb \
|
||||
--hash=sha256:b6aaeadf1e4866ca0fdf7bb4eed25e521ae21a7947c59f78154b24fc7abbe1dd \
|
||||
--hash=sha256:13af0184177469192d80db9bd02619f6fa8b922f9f327e077d6f2a6acb1ce1c0 \
|
||||
--hash=sha256:356f96c9fbec59974a592452ab6a036cd6f180822a60b529a975c9467fcd5f23 \
|
||||
--hash=sha256:fa1fe75b4a9e18b66ae7f0b122543c42debcf800aaafa0212aaff3ad273c2596 \
|
||||
--hash=sha256:cbe326f6d364375a8e5a8ccb7e9cd73f4b2f6dc3b2ed205633a0db8243e2a96a \
|
||||
--hash=sha256:a2e3a39f43f0ce95204beb8fe0831199542ccab1e0c6e486a0b4947256215632 \
|
||||
--hash=sha256:7b852817800eb02e109ae4a9cef2beda8dd50d98b76b6cfb7b5c0099d27b52d4 \
|
||||
--hash=sha256:d97a86937cf9970453c3b62abb55a6475f173347b4cde7f8dcdb48c8e1b9952d \
|
||||
--hash=sha256:a86c962e211f37edd61d6e11bb4df7eddc4a519a38a856e20a6498c319efa6b0 \
|
||||
--hash=sha256:d34fbb98ad0d6b563b95de852a284074514331e6b9da0a9fc894fb1cdae7a79e \
|
||||
--hash=sha256:658624a11f6e1c252b2cd170d94bf28c8f9410acab9f2fd4369e11e1cd4e1aaf \
|
||||
--hash=sha256:4d054f013a1983551254e2379385e359884e5af105e3efe00418977d02f634a7 \
|
||||
--hash=sha256:26a45798ca2a4e168d00de75d4a524abf5907949231512f372b217ede3429e98 \
|
||||
--hash=sha256:3c40c827d36c6d1c3cf413694d7dc843d50997ebffbc7c87d888a203ed6403a7 \
|
||||
--hash=sha256:be62aeff8f2f054eff7725f502f6228298891fd648dc2630e03e44bf63e8cee0 \
|
||||
--hash=sha256:dd53d7c4a69e766e4900f29db5872f5824a06827d594427cf1a4aa542818b796 \
|
||||
--hash=sha256:30a59fb41bb6b8c465ab50d60a1b298d1cd7b85274e71f38af5a75d6c475d2d2 \
|
||||
--hash=sha256:df1889701e2dfd8ba4dc9b1a010f0a60950077fb5242bb92c8b5c7f1a6f2668a \
|
||||
--hash=sha256:33c623ef9ca5e19e05991f127c1be5aeb1ab5cdf30cb1c5cf3960752e58b599b \
|
||||
--hash=sha256:26f509450db547e4dfa3ec739419b31edad646d21fb8d0ed0734188b35ff6b27 \
|
||||
--hash=sha256:7b57f26e5e6ee2f14f960db46bd58ffdca25ca06dd997729b1b179fddd35f5a3 \
|
||||
--hash=sha256:a8705c5073fe3fcc297fb8e0b31aa794e05af6a329e81b7ca4ffecab7f2b95ef \
|
||||
--hash=sha256:c2edbb783c841e36ca0fa159f0ae97a88ce8137fb3a6cd82eae77349ba4b607b \
|
||||
--hash=sha256:8cde829f14bd38f6da7b2954be0f2837043e8b8d7a9110ec5e318ae6bf706610 \
|
||||
--hash=sha256:76766cc80d6128750075378d3bb7812cf146415bd29b588616f72c943c00d598
|
||||
openpyxl==3.0.4 \
|
||||
--hash=sha256:6e62f058d19b09b95d20ebfbfb04857ad08d0833190516c1660675f699c6186f \
|
||||
--hash=sha256:d88dd1480668019684c66cfff3e52a5de4ed41e9df5dd52e008cbf27af0dbf87
|
||||
packaging==20.4 \
|
||||
--hash=sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181 \
|
||||
--hash=sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8
|
||||
pandas==1.0.5 \
|
||||
--hash=sha256:faa42a78d1350b02a7d2f0dbe3c80791cf785663d6997891549d0f86dc49125e \
|
||||
--hash=sha256:9c31d52f1a7dd2bb4681d9f62646c7aa554f19e8e9addc17e8b1b20011d7522d \
|
||||
--hash=sha256:8778a5cc5a8437a561e3276b85367412e10ae9fff07db1eed986e427d9a674f8 \
|
||||
--hash=sha256:9871ef5ee17f388f1cb35f76dc6106d40cb8165c562d573470672f4cdefa59ef \
|
||||
--hash=sha256:35b670b0abcfed7cad76f2834041dcf7ae47fd9b22b63622d67cdc933d79f453 \
|
||||
--hash=sha256:c9410ce8a3dee77653bc0684cfa1535a7f9c291663bd7ad79e39f5ab58f67ab3 \
|
||||
--hash=sha256:02f1e8f71cd994ed7fcb9a35b6ddddeb4314822a0e09a9c5b2d278f8cb5d4096 \
|
||||
--hash=sha256:b3c4f93fcb6e97d993bf87cdd917883b7dab7d20c627699f360a8fb49e9e0b91 \
|
||||
--hash=sha256:5759edf0b686b6f25a5d4a447ea588983a33afc8a0081a0954184a4a87fd0dd7 \
|
||||
--hash=sha256:ab8173a8efe5418bbe50e43f321994ac6673afc5c7c4839014cf6401bbdd0705 \
|
||||
--hash=sha256:13f75fb18486759da3ff40f5345d9dd20e7d78f2a39c5884d013456cec9876f0 \
|
||||
--hash=sha256:5a7cf6044467c1356b2b49ef69e50bf4d231e773c3ca0558807cdba56b76820b \
|
||||
--hash=sha256:ae961f1f0e270f1e4e2273f6a539b2ea33248e0e3a11ffb479d757918a5e03a9 \
|
||||
--hash=sha256:f69e0f7b7c09f1f612b1f8f59e2df72faa8a6b41c5a436dde5b615aaf948f107 \
|
||||
--hash=sha256:4c73f373b0800eb3062ffd13d4a7a2a6d522792fa6eb204d67a4fad0a40f03dc \
|
||||
--hash=sha256:69c5d920a0b2a9838e677f78f4dde506b95ea8e4d30da25859db6469ded84fa8
|
||||
parsedatetime==2.6 \
|
||||
--hash=sha256:cb96edd7016872f58479e35879294258c71437195760746faffedb692aef000b \
|
||||
--hash=sha256:4cb368fbb18a0b7231f4d76119165451c8d2e35951455dfee97c62a87b04d455
|
||||
parso==0.7.0 \
|
||||
--hash=sha256:158c140fc04112dc45bca311633ae5033c2c2a7b732fa33d0955bad8152a8dd0 \
|
||||
--hash=sha256:908e9fae2144a076d72ae4e25539143d40b8e3eafbaeae03c1bfe226f4cdf12c
|
||||
pathspec==0.8.0 \
|
||||
--hash=sha256:7d91249d21749788d07a2d0f94147accd8f845507400749ea19c1ec9054a12b0 \
|
||||
--hash=sha256:da45173eb3a6f2a5a487efba21f050af2b41948be6ab52b6a1e3ff22bb8b7061
|
||||
pexpect==4.8.0; sys_platform != "win32" \
|
||||
--hash=sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937 \
|
||||
--hash=sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c
|
||||
pickleshare==0.7.5 \
|
||||
--hash=sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56 \
|
||||
--hash=sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca
|
||||
pluggy==0.13.1 \
|
||||
--hash=sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d \
|
||||
--hash=sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0
|
||||
prompt-toolkit==3.0.5 \
|
||||
--hash=sha256:df7e9e63aea609b1da3a65641ceaf5bc7d05e0a04de5bd45d05dbeffbabf9e04 \
|
||||
--hash=sha256:563d1a4140b63ff9dd587bda9557cffb2fe73650205ab6f4383092fb882e7dc8
|
||||
ptyprocess==0.6.0; sys_platform != "win32" \
|
||||
--hash=sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f \
|
||||
--hash=sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0
|
||||
py==1.9.0 \
|
||||
--hash=sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2 \
|
||||
--hash=sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342
|
||||
pycodestyle==2.6.0 \
|
||||
--hash=sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367 \
|
||||
--hash=sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e
|
||||
pycountry==19.8.18 \
|
||||
--hash=sha256:3c57aa40adcf293d59bebaffbe60d8c39976fba78d846a018dc0c2ec9c6cb3cb
|
||||
pyflakes==2.2.0 \
|
||||
--hash=sha256:0d94e0e05a19e57a99444b6ddcf9a6eb2e5c68d3ca1e98e90707af8152c90a92 \
|
||||
--hash=sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8
|
||||
pygments==2.6.1 \
|
||||
--hash=sha256:ff7a40b4860b727ab48fad6360eb351cc1b33cbf9b15a0f689ca5353e9463324 \
|
||||
--hash=sha256:647344a061c249a3b74e230c739f434d7ea4d8b1d5f3721bc0f3558049b38f44
|
||||
pyparsing==2.4.7 \
|
||||
--hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b \
|
||||
--hash=sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1
|
||||
pytest==5.4.3 \
|
||||
--hash=sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1 \
|
||||
--hash=sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8
|
||||
pytest-clarity==0.3.0a0 \
|
||||
--hash=sha256:5cc99e3d9b7969dfe17e5f6072d45a917c59d363b679686d3c958a1ded2e4dcf
|
||||
python-dateutil==2.8.1 \
|
||||
--hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \
|
||||
--hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a
|
||||
python-slugify==4.0.1 \
|
||||
--hash=sha256:69a517766e00c1268e5bbfc0d010a0a8508de0b18d30ad5a1ff357f8ae724270
|
||||
python-stdnum==1.13 \
|
||||
--hash=sha256:120f83d33fb8b8be1b282f20dd755a892d5facf84f54fa21f75bbd2633128160 \
|
||||
--hash=sha256:3d5d4430579cba88211d3ba4855a16faff235352a25a01d6ab70024686a75823
|
||||
pytimeparse==1.1.8 \
|
||||
--hash=sha256:04b7be6cc8bd9f5647a6325444926c3ac34ee6bc7e69da4367ba282f076036bd \
|
||||
--hash=sha256:e86136477be924d7e670646a98561957e8ca7308d44841e21f5ddea757556a0a
|
||||
pytz==2020.1 \
|
||||
--hash=sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed \
|
||||
--hash=sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048
|
||||
regex==2020.6.8 \
|
||||
--hash=sha256:fbff901c54c22425a5b809b914a3bfaf4b9570eee0e5ce8186ac71eb2025191c \
|
||||
--hash=sha256:112e34adf95e45158c597feea65d06a8124898bdeac975c9087fe71b572bd938 \
|
||||
--hash=sha256:92d8a043a4241a710c1cf7593f5577fbb832cf6c3a00ff3fc1ff2052aff5dd89 \
|
||||
--hash=sha256:bae83f2a56ab30d5353b47f9b2a33e4aac4de9401fb582b55c42b132a8ac3868 \
|
||||
--hash=sha256:b2ba0f78b3ef375114856cbdaa30559914d081c416b431f2437f83ce4f8b7f2f \
|
||||
--hash=sha256:95fa7726d073c87141f7bbfb04c284901f8328e2d430eeb71b8ffdd5742a5ded \
|
||||
--hash=sha256:e3cdc9423808f7e1bb9c2e0bdb1c9dc37b0607b30d646ff6faf0d4e41ee8fee3 \
|
||||
--hash=sha256:c78e66a922de1c95a208e4ec02e2e5cf0bb83a36ceececc10a72841e53fbf2bd \
|
||||
--hash=sha256:08997a37b221a3e27d68ffb601e45abfb0093d39ee770e4257bd2f5115e8cb0a \
|
||||
--hash=sha256:2f6f211633ee8d3f7706953e9d3edc7ce63a1d6aad0be5dcee1ece127eea13ae \
|
||||
--hash=sha256:55b4c25cbb3b29f8d5e63aeed27b49fa0f8476b0d4e1b3171d85db891938cc3a \
|
||||
--hash=sha256:89cda1a5d3e33ec9e231ece7307afc101b5217523d55ef4dc7fb2abd6de71ba3 \
|
||||
--hash=sha256:690f858d9a94d903cf5cada62ce069b5d93b313d7d05456dbcd99420856562d9 \
|
||||
--hash=sha256:1700419d8a18c26ff396b3b06ace315b5f2a6e780dad387e4c48717a12a22c29 \
|
||||
--hash=sha256:654cb773b2792e50151f0e22be0f2b6e1c3a04c5328ff1d9d59c0398d37ef610 \
|
||||
--hash=sha256:52e1b4bef02f4040b2fd547357a170fc1146e60ab310cdbdd098db86e929b387 \
|
||||
--hash=sha256:cf59bbf282b627130f5ba68b7fa3abdb96372b24b66bdf72a4920e8153fc7910 \
|
||||
--hash=sha256:5aaa5928b039ae440d775acea11d01e42ff26e1561c0ffcd3d805750973c6baf \
|
||||
--hash=sha256:97712e0d0af05febd8ab63d2ef0ab2d0cd9deddf4476f7aa153f76feef4b2754 \
|
||||
--hash=sha256:6ad8663c17db4c5ef438141f99e291c4d4edfeaacc0ce28b5bba2b0bf273d9b5 \
|
||||
--hash=sha256:e9b64e609d37438f7d6e68c2546d2cb8062f3adb27e6336bc129b51be20773ac
|
||||
requests==2.24.0 \
|
||||
--hash=sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898 \
|
||||
--hash=sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b
|
||||
requests-cache==0.5.2 \
|
||||
--hash=sha256:813023269686045f8e01e2289cc1e7e9ae5ab22ddd1e2849a9093ab3ab7270eb \
|
||||
--hash=sha256:81e13559baee64677a7d73b85498a5a8f0639e204517b5d05ff378e44a57831a
|
||||
six==1.15.0 \
|
||||
--hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \
|
||||
--hash=sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259
|
||||
sqlalchemy==1.3.18 \
|
||||
--hash=sha256:f11c2437fb5f812d020932119ba02d9e2bc29a6eca01a055233a8b449e3e1e7d \
|
||||
--hash=sha256:0ec575db1b54909750332c2e335c2bb11257883914a03bc5a3306a4488ecc772 \
|
||||
--hash=sha256:f57be5673e12763dd400fea568608700a63ce1c6bd5bdbc3cc3a2c5fdb045274 \
|
||||
--hash=sha256:8cac7bb373a5f1423e28de3fd5fc8063b9c8ffe8957dc1b1a59cb90453db6da1 \
|
||||
--hash=sha256:adad60eea2c4c2a1875eb6305a0b6e61a83163f8e233586a4d6a55221ef984fe \
|
||||
--hash=sha256:57aa843b783179ab72e863512e14bdcba186641daf69e4e3a5761d705dcc35b1 \
|
||||
--hash=sha256:621f58cd921cd71ba6215c42954ffaa8a918eecd8c535d97befa1a8acad986dd \
|
||||
--hash=sha256:fc728ece3d5c772c196fd338a99798e7efac7a04f9cb6416299a3638ee9a94cd \
|
||||
--hash=sha256:736d41cfebedecc6f159fc4ac0769dc89528a989471dc1d378ba07d29a60ba1c \
|
||||
--hash=sha256:427273b08efc16a85aa2b39892817e78e3ed074fcb89b2a51c4979bae7e7ba98 \
|
||||
--hash=sha256:cbe1324ef52ff26ccde2cb84b8593c8bf930069dfc06c1e616f1bfd4e47f48a3 \
|
||||
--hash=sha256:8fd452dc3d49b3cc54483e033de6c006c304432e6f84b74d7b2c68afa2569ae5 \
|
||||
--hash=sha256:e89e0d9e106f8a9180a4ca92a6adde60c58b1b0299e1b43bd5e0312f535fbf33 \
|
||||
--hash=sha256:6ac2558631a81b85e7fb7a44e5035347938b0a73f5fdc27a8566777d0792a6a4 \
|
||||
--hash=sha256:87fad64529cde4f1914a5b9c383628e1a8f9e3930304c09cf22c2ae118a1280e \
|
||||
--hash=sha256:e4624d7edb2576cd72bb83636cd71c8ce544d8e272f308bd80885056972ca299 \
|
||||
--hash=sha256:89494df7f93b1836cae210c42864b292f9b31eeabca4810193761990dc689cce \
|
||||
--hash=sha256:716754d0b5490bdcf68e1e4925edc02ac07209883314ad01a137642ddb2056f1 \
|
||||
--hash=sha256:50c4ee32f0e1581828843267d8de35c3298e86ceecd5e9017dc45788be70a864 \
|
||||
--hash=sha256:d98bc827a1293ae767c8f2f18be3bb5151fd37ddcd7da2a5f9581baeeb7a3fa1 \
|
||||
--hash=sha256:0942a3a0df3f6131580eddd26d99071b48cfe5aaf3eab2783076fbc5a1c1882e \
|
||||
--hash=sha256:16593fd748944726540cd20f7e83afec816c2ac96b082e26ae226e8f7e9688cf \
|
||||
--hash=sha256:c26f95e7609b821b5f08a72dab929baa0d685406b953efd7c89423a511d5c413 \
|
||||
--hash=sha256:512a85c3c8c3995cc91af3e90f38f460da5d3cade8dc3a229c8e0879037547c9 \
|
||||
--hash=sha256:d05c4adae06bd0c7f696ae3ec8d993ed8ffcc4e11a76b1b35a5af8a099bd2284 \
|
||||
--hash=sha256:109581ccc8915001e8037b73c29590e78ce74be49ca0a3630a23831f9e3ed6c7 \
|
||||
--hash=sha256:8619b86cb68b185a778635be5b3e6018623c0761dde4df2f112896424aa27bd8 \
|
||||
--hash=sha256:da2fb75f64792c1fc64c82313a00c728a7c301efe6a60b7a9fe35b16b4368ce7
|
||||
termcolor==1.1.0 \
|
||||
--hash=sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b
|
||||
text-unidecode==1.3 \
|
||||
--hash=sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93 \
|
||||
--hash=sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8
|
||||
toml==0.10.1 \
|
||||
--hash=sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88 \
|
||||
--hash=sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f
|
||||
traitlets==4.3.3 \
|
||||
--hash=sha256:70b4c6a1d9019d7b4f6846832288f86998aa3b9207c6821f3578a6a6a467fe44 \
|
||||
--hash=sha256:d023ee369ddd2763310e4c3eae1ff649689440d4ae59d7485eb4cfbbe3e359f7
|
||||
typed-ast==1.4.1 \
|
||||
--hash=sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3 \
|
||||
--hash=sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb \
|
||||
--hash=sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919 \
|
||||
--hash=sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01 \
|
||||
--hash=sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75 \
|
||||
--hash=sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652 \
|
||||
--hash=sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7 \
|
||||
--hash=sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1 \
|
||||
--hash=sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa \
|
||||
--hash=sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614 \
|
||||
--hash=sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41 \
|
||||
--hash=sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b \
|
||||
--hash=sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe \
|
||||
--hash=sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355 \
|
||||
--hash=sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6 \
|
||||
--hash=sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907 \
|
||||
--hash=sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d \
|
||||
--hash=sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c \
|
||||
--hash=sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4 \
|
||||
--hash=sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34 \
|
||||
--hash=sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b
|
||||
urllib3==1.25.9 \
|
||||
--hash=sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115 \
|
||||
--hash=sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527
|
||||
wcwidth==0.2.5 \
|
||||
--hash=sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784 \
|
||||
--hash=sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83
|
||||
xlrd==1.2.0 \
|
||||
--hash=sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde \
|
||||
--hash=sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2
|
||||
agate-dbf==0.2.2
|
||||
agate-excel==0.2.3
|
||||
agate-sql==0.5.5
|
||||
agate==1.6.1
|
||||
appdirs==1.4.4; python_version >= "3.6"
|
||||
appnope==0.1.2; python_version >= "3.7" and python_version < "4.0" and sys_platform == "darwin"
|
||||
atomicwrites==1.4.0; python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "win32" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6") or sys_platform == "win32" and python_version >= "3.6" and python_full_version >= "3.4.0" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6")
|
||||
attrs==20.3.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
babel==2.9.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||
backcall==0.2.0; python_version >= "3.7" and python_version < "4.0"
|
||||
black==20.8b1; python_version >= "3.6"
|
||||
certifi==2020.12.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
chardet==4.0.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
click==7.1.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
|
||||
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||
csvkit==1.0.5
|
||||
dbfread==2.0.7
|
||||
decorator==4.4.2; python_version >= "3.7" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.7" and python_version < "4.0" and python_full_version >= "3.2.0"
|
||||
et-xmlfile==1.0.1; python_version >= "3.6"
|
||||
flake8==3.8.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
|
||||
idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
iniconfig==1.1.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
ipython-genutils==0.2.0; python_version >= "3.7" and python_version < "4.0"
|
||||
ipython==7.20.0; python_version >= "3.7" and python_version < "4.0"
|
||||
isodate==0.6.0
|
||||
isort==5.7.0; python_version >= "3.6" and python_version < "4.0"
|
||||
jdcal==1.4.1; python_version >= "3.6"
|
||||
jedi==0.18.0; python_version >= "3.7" and python_version < "4.0"
|
||||
langid==1.1.6
|
||||
leather==0.3.3
|
||||
mccabe==0.6.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||
mypy-extensions==0.4.3; python_version >= "3.6"
|
||||
numpy==1.20.1; python_version >= "3.7" and python_full_version >= "3.7.1"
|
||||
openpyxl==3.0.6; python_version >= "3.6"
|
||||
packaging==20.9; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
pandas==1.2.2; python_full_version >= "3.7.1"
|
||||
parsedatetime==2.6
|
||||
parso==0.8.1; python_version >= "3.7" and python_version < "4.0"
|
||||
pathspec==0.8.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
|
||||
pexpect==4.8.0; python_version >= "3.7" and python_version < "4.0" and sys_platform != "win32"
|
||||
pickleshare==0.7.5; python_version >= "3.7" and python_version < "4.0"
|
||||
pluggy==0.13.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
prompt-toolkit==3.0.16; python_version >= "3.7" and python_version < "4.0" and python_full_version >= "3.6.1"
|
||||
ptyprocess==0.7.0; python_version >= "3.7" and python_version < "4.0" and sys_platform != "win32"
|
||||
py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
pycodestyle==2.6.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||
pycountry==19.8.18
|
||||
pyflakes==2.2.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||
pygments==2.8.0; python_version >= "3.7" and python_version < "4.0"
|
||||
pyparsing==2.4.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
pytest-clarity==0.3.0a0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
|
||||
pytest==6.2.2; python_version >= "3.6"
|
||||
python-dateutil==2.8.1; python_full_version >= "3.7.1"
|
||||
python-slugify==4.0.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
python-stdnum==1.16
|
||||
pytimeparse==1.1.8
|
||||
pytz==2021.1; python_full_version >= "3.7.1"
|
||||
regex==2020.11.13; python_version >= "3.6"
|
||||
requests-cache==0.5.2
|
||||
requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||
six==1.15.0; python_full_version >= "3.7.1"
|
||||
sqlalchemy==1.3.23; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||
termcolor==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
|
||||
text-unidecode==1.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
toml==0.10.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
|
||||
traitlets==5.0.5; python_version >= "3.7" and python_version < "4.0"
|
||||
typed-ast==1.4.2; python_version >= "3.6"
|
||||
typing-extensions==3.7.4.3; python_version >= "3.6"
|
||||
urllib3==1.26.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4"
|
||||
wcwidth==0.2.5; python_version >= "3.7" and python_version < "4.0" and python_full_version >= "3.6.1"
|
||||
xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
|
||||
|
@ -1,81 +1,16 @@
|
||||
certifi==2020.6.20 \
|
||||
--hash=sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41 \
|
||||
--hash=sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3
|
||||
chardet==3.0.4 \
|
||||
--hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691 \
|
||||
--hash=sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae
|
||||
idna==2.10 \
|
||||
--hash=sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0 \
|
||||
--hash=sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6
|
||||
langid==1.1.6 \
|
||||
--hash=sha256:044bcae1912dab85c33d8e98f2811b8f4ff1213e5e9a9e9510137b84da2cb293
|
||||
numpy==1.19.0 \
|
||||
--hash=sha256:63d971bb211ad3ca37b2adecdd5365f40f3b741a455beecba70fd0dde8b2a4cb \
|
||||
--hash=sha256:b6aaeadf1e4866ca0fdf7bb4eed25e521ae21a7947c59f78154b24fc7abbe1dd \
|
||||
--hash=sha256:13af0184177469192d80db9bd02619f6fa8b922f9f327e077d6f2a6acb1ce1c0 \
|
||||
--hash=sha256:356f96c9fbec59974a592452ab6a036cd6f180822a60b529a975c9467fcd5f23 \
|
||||
--hash=sha256:fa1fe75b4a9e18b66ae7f0b122543c42debcf800aaafa0212aaff3ad273c2596 \
|
||||
--hash=sha256:cbe326f6d364375a8e5a8ccb7e9cd73f4b2f6dc3b2ed205633a0db8243e2a96a \
|
||||
--hash=sha256:a2e3a39f43f0ce95204beb8fe0831199542ccab1e0c6e486a0b4947256215632 \
|
||||
--hash=sha256:7b852817800eb02e109ae4a9cef2beda8dd50d98b76b6cfb7b5c0099d27b52d4 \
|
||||
--hash=sha256:d97a86937cf9970453c3b62abb55a6475f173347b4cde7f8dcdb48c8e1b9952d \
|
||||
--hash=sha256:a86c962e211f37edd61d6e11bb4df7eddc4a519a38a856e20a6498c319efa6b0 \
|
||||
--hash=sha256:d34fbb98ad0d6b563b95de852a284074514331e6b9da0a9fc894fb1cdae7a79e \
|
||||
--hash=sha256:658624a11f6e1c252b2cd170d94bf28c8f9410acab9f2fd4369e11e1cd4e1aaf \
|
||||
--hash=sha256:4d054f013a1983551254e2379385e359884e5af105e3efe00418977d02f634a7 \
|
||||
--hash=sha256:26a45798ca2a4e168d00de75d4a524abf5907949231512f372b217ede3429e98 \
|
||||
--hash=sha256:3c40c827d36c6d1c3cf413694d7dc843d50997ebffbc7c87d888a203ed6403a7 \
|
||||
--hash=sha256:be62aeff8f2f054eff7725f502f6228298891fd648dc2630e03e44bf63e8cee0 \
|
||||
--hash=sha256:dd53d7c4a69e766e4900f29db5872f5824a06827d594427cf1a4aa542818b796 \
|
||||
--hash=sha256:30a59fb41bb6b8c465ab50d60a1b298d1cd7b85274e71f38af5a75d6c475d2d2 \
|
||||
--hash=sha256:df1889701e2dfd8ba4dc9b1a010f0a60950077fb5242bb92c8b5c7f1a6f2668a \
|
||||
--hash=sha256:33c623ef9ca5e19e05991f127c1be5aeb1ab5cdf30cb1c5cf3960752e58b599b \
|
||||
--hash=sha256:26f509450db547e4dfa3ec739419b31edad646d21fb8d0ed0734188b35ff6b27 \
|
||||
--hash=sha256:7b57f26e5e6ee2f14f960db46bd58ffdca25ca06dd997729b1b179fddd35f5a3 \
|
||||
--hash=sha256:a8705c5073fe3fcc297fb8e0b31aa794e05af6a329e81b7ca4ffecab7f2b95ef \
|
||||
--hash=sha256:c2edbb783c841e36ca0fa159f0ae97a88ce8137fb3a6cd82eae77349ba4b607b \
|
||||
--hash=sha256:8cde829f14bd38f6da7b2954be0f2837043e8b8d7a9110ec5e318ae6bf706610 \
|
||||
--hash=sha256:76766cc80d6128750075378d3bb7812cf146415bd29b588616f72c943c00d598
|
||||
pandas==1.0.5 \
|
||||
--hash=sha256:faa42a78d1350b02a7d2f0dbe3c80791cf785663d6997891549d0f86dc49125e \
|
||||
--hash=sha256:9c31d52f1a7dd2bb4681d9f62646c7aa554f19e8e9addc17e8b1b20011d7522d \
|
||||
--hash=sha256:8778a5cc5a8437a561e3276b85367412e10ae9fff07db1eed986e427d9a674f8 \
|
||||
--hash=sha256:9871ef5ee17f388f1cb35f76dc6106d40cb8165c562d573470672f4cdefa59ef \
|
||||
--hash=sha256:35b670b0abcfed7cad76f2834041dcf7ae47fd9b22b63622d67cdc933d79f453 \
|
||||
--hash=sha256:c9410ce8a3dee77653bc0684cfa1535a7f9c291663bd7ad79e39f5ab58f67ab3 \
|
||||
--hash=sha256:02f1e8f71cd994ed7fcb9a35b6ddddeb4314822a0e09a9c5b2d278f8cb5d4096 \
|
||||
--hash=sha256:b3c4f93fcb6e97d993bf87cdd917883b7dab7d20c627699f360a8fb49e9e0b91 \
|
||||
--hash=sha256:5759edf0b686b6f25a5d4a447ea588983a33afc8a0081a0954184a4a87fd0dd7 \
|
||||
--hash=sha256:ab8173a8efe5418bbe50e43f321994ac6673afc5c7c4839014cf6401bbdd0705 \
|
||||
--hash=sha256:13f75fb18486759da3ff40f5345d9dd20e7d78f2a39c5884d013456cec9876f0 \
|
||||
--hash=sha256:5a7cf6044467c1356b2b49ef69e50bf4d231e773c3ca0558807cdba56b76820b \
|
||||
--hash=sha256:ae961f1f0e270f1e4e2273f6a539b2ea33248e0e3a11ffb479d757918a5e03a9 \
|
||||
--hash=sha256:f69e0f7b7c09f1f612b1f8f59e2df72faa8a6b41c5a436dde5b615aaf948f107 \
|
||||
--hash=sha256:4c73f373b0800eb3062ffd13d4a7a2a6d522792fa6eb204d67a4fad0a40f03dc \
|
||||
--hash=sha256:69c5d920a0b2a9838e677f78f4dde506b95ea8e4d30da25859db6469ded84fa8
|
||||
pycountry==19.8.18 \
|
||||
--hash=sha256:3c57aa40adcf293d59bebaffbe60d8c39976fba78d846a018dc0c2ec9c6cb3cb
|
||||
python-dateutil==2.8.1 \
|
||||
--hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \
|
||||
--hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a
|
||||
python-stdnum==1.13 \
|
||||
--hash=sha256:120f83d33fb8b8be1b282f20dd755a892d5facf84f54fa21f75bbd2633128160 \
|
||||
--hash=sha256:3d5d4430579cba88211d3ba4855a16faff235352a25a01d6ab70024686a75823
|
||||
pytz==2020.1 \
|
||||
--hash=sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed \
|
||||
--hash=sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048
|
||||
requests==2.24.0 \
|
||||
--hash=sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898 \
|
||||
--hash=sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b
|
||||
requests-cache==0.5.2 \
|
||||
--hash=sha256:813023269686045f8e01e2289cc1e7e9ae5ab22ddd1e2849a9093ab3ab7270eb \
|
||||
--hash=sha256:81e13559baee64677a7d73b85498a5a8f0639e204517b5d05ff378e44a57831a
|
||||
six==1.15.0 \
|
||||
--hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \
|
||||
--hash=sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259
|
||||
urllib3==1.25.9 \
|
||||
--hash=sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115 \
|
||||
--hash=sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527
|
||||
xlrd==1.2.0 \
|
||||
--hash=sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde \
|
||||
--hash=sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2
|
||||
certifi==2020.12.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
chardet==4.0.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||
idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
|
||||
langid==1.1.6
|
||||
numpy==1.20.1; python_version >= "3.7" and python_full_version >= "3.7.1"
|
||||
pandas==1.2.2; python_full_version >= "3.7.1"
|
||||
pycountry==19.8.18
|
||||
python-dateutil==2.8.1; python_full_version >= "3.7.1"
|
||||
python-stdnum==1.16
|
||||
pytz==2021.1; python_full_version >= "3.7.1"
|
||||
requests-cache==0.5.2
|
||||
requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
|
||||
six==1.15.0; python_full_version >= "3.7.1"
|
||||
urllib3==1.26.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4"
|
||||
xlrd==1.2.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0")
|
||||
|
4
setup.py
4
setup.py
@ -14,7 +14,7 @@ install_requires = [
|
||||
|
||||
setuptools.setup(
|
||||
name="csv-metadata-quality",
|
||||
version="0.4.2",
|
||||
version="0.4.3",
|
||||
author="Alan Orth",
|
||||
author_email="aorth@mjanja.ch",
|
||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
||||
@ -23,9 +23,9 @@ setuptools.setup(
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/alanorth/csv-metadata-quality",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3.6",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||
"Operating System :: OS Independent",
|
||||
"Development Status :: 4 - Beta",
|
||||
|
@ -1,4 +1,5 @@
|
||||
import pandas as pd
|
||||
from colorama import Fore
|
||||
|
||||
import csv_metadata_quality.check as check
|
||||
import csv_metadata_quality.experimental as experimental
|
||||
@ -12,7 +13,7 @@ def test_check_invalid_issn(capsys):
|
||||
check.issn(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid ISSN: {value}\n"
|
||||
assert captured.out == f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}\n"
|
||||
|
||||
|
||||
def test_check_valid_issn():
|
||||
@ -33,7 +34,7 @@ def test_check_invalid_isbn(capsys):
|
||||
check.isbn(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid ISBN: {value}\n"
|
||||
assert captured.out == f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}\n"
|
||||
|
||||
|
||||
def test_check_valid_isbn():
|
||||
@ -56,7 +57,26 @@ def test_check_invalid_separators(capsys):
|
||||
check.separators(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid multi-value separator ({field_name}): {value}\n"
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.RED}Invalid multi-value separator ({field_name}): {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_unnecessary_separators(capsys):
|
||||
"""Test checking unnecessary multi-value separators."""
|
||||
|
||||
field = "Alan||Orth||"
|
||||
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
check.separators(field, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.RED}Unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_valid_separators():
|
||||
@ -81,7 +101,7 @@ def test_check_missing_date(capsys):
|
||||
check.date(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Missing date ({field_name}).\n"
|
||||
assert captured.out == f"{Fore.RED}Missing date ({field_name}).{Fore.RESET}\n"
|
||||
|
||||
|
||||
def test_check_multiple_dates(capsys):
|
||||
@ -94,7 +114,10 @@ def test_check_multiple_dates(capsys):
|
||||
check.date(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Multiple dates not allowed ({field_name}): {value}\n"
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.RED}Multiple dates not allowed ({field_name}): {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_invalid_date(capsys):
|
||||
@ -107,7 +130,9 @@ def test_check_invalid_date(capsys):
|
||||
check.date(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid date ({field_name}): {value}\n"
|
||||
assert (
|
||||
captured.out == f"{Fore.RED}Invalid date ({field_name}): {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_valid_date():
|
||||
@ -132,7 +157,10 @@ def test_check_suspicious_characters(capsys):
|
||||
check.suspicious_characters(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Suspicious character ({field_name}): ˆt\n"
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.YELLOW}Suspicious character ({field_name}): {Fore.RESET}ˆt\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_valid_iso639_1_language():
|
||||
@ -163,7 +191,9 @@ def test_check_invalid_iso639_1_language(capsys):
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid ISO 639-1 language: {value}\n"
|
||||
assert (
|
||||
captured.out == f"{Fore.RED}Invalid ISO 639-1 language: {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_invalid_iso639_3_language(capsys):
|
||||
@ -174,7 +204,9 @@ def test_check_invalid_iso639_3_language(capsys):
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid ISO 639-3 language: {value}\n"
|
||||
assert (
|
||||
captured.out == f"{Fore.RED}Invalid ISO 639-3 language: {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_invalid_language(capsys):
|
||||
@ -185,7 +217,7 @@ def test_check_invalid_language(capsys):
|
||||
check.language(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid language: {value}\n"
|
||||
assert captured.out == f"{Fore.RED}Invalid language: {Fore.RESET}{value}\n"
|
||||
|
||||
|
||||
def test_check_invalid_agrovoc(capsys):
|
||||
@ -197,7 +229,10 @@ def test_check_invalid_agrovoc(capsys):
|
||||
check.agrovoc(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid AGROVOC ({field_name}): {value}\n"
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.RED}Invalid AGROVOC ({field_name}): {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_valid_agrovoc():
|
||||
@ -219,7 +254,10 @@ def test_check_uncommon_filename_extension(capsys):
|
||||
check.filename_extension(value)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Filename with uncommon extension: {value}\n"
|
||||
assert (
|
||||
captured.out
|
||||
== f"{Fore.YELLOW}Filename with uncommon extension: {Fore.RESET}{value}\n"
|
||||
)
|
||||
|
||||
|
||||
def test_check_common_filename_extension():
|
||||
@ -247,7 +285,7 @@ def test_check_incorrect_iso_639_1_language(capsys):
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
captured.out
|
||||
== f"Possibly incorrect language {language} (detected en): {title}\n"
|
||||
== f"{Fore.YELLOW}Possibly incorrect language {language} (detected en): {Fore.RESET}{title}\n"
|
||||
)
|
||||
|
||||
|
||||
@ -266,7 +304,7 @@ def test_check_incorrect_iso_639_3_language(capsys):
|
||||
captured = capsys.readouterr()
|
||||
assert (
|
||||
captured.out
|
||||
== f"Possibly incorrect language {language} (detected eng): {title}\n"
|
||||
== f"{Fore.YELLOW}Possibly incorrect language {language} (detected eng): {Fore.RESET}{title}\n"
|
||||
)
|
||||
|
||||
|
||||
|
@ -41,6 +41,16 @@ def test_fix_invalid_separators():
|
||||
assert fix.separators(value, field_name) == "Alan||Orth"
|
||||
|
||||
|
||||
def test_fix_unnecessary_separators():
|
||||
"""Test fixing unnecessary multi-value separators."""
|
||||
|
||||
field = "Alan||Orth||"
|
||||
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
assert fix.separators(field, field_name) == "Alan||Orth"
|
||||
|
||||
|
||||
def test_fix_unnecessary_unicode():
|
||||
"""Test fixing unnecessary Unicode."""
|
||||
|
||||
|
Reference in New Issue
Block a user