mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-05-10 07:06:00 +02:00
Compare commits
25 Commits
Author | SHA1 | Date | |
---|---|---|---|
cb07d357d4
|
|||
65cd48a26f
|
|||
0f883f640c
|
|||
f4c5c5781e
|
|||
6aa784ad8c
|
|||
7b8da94f41
|
|||
2a1566af62
|
|||
5fcaa63bd5
|
|||
aa9e23b46c
|
|||
73acb1661f
|
|||
2a068fddc4
|
|||
c6c2f13e88
|
|||
56f16e37ed
|
|||
0c44b967b6
|
|||
8a267bb40b
|
|||
8fda8f1ef1
|
|||
5e471813e8
|
|||
79244b9ac3
|
|||
5e81a33482
|
|||
28b5996aa6
|
|||
40ba9bae6c
|
|||
0b2d211455
|
|||
7f1df0b47c
|
|||
365ecda324
|
|||
550ce7fb7e
|
12
.build.yml
12
.build.yml
@ -1,19 +1,15 @@
|
||||
image: archlinux
|
||||
packages:
|
||||
- python-pipenv
|
||||
- python-poetry
|
||||
sources:
|
||||
- https://git.sr.ht/~alanorth/csv-metadata-quality
|
||||
tasks:
|
||||
- setup: |
|
||||
cd csv-metadata-quality
|
||||
pipenv install --dev
|
||||
poetry install
|
||||
- pytest: |
|
||||
cd csv-metadata-quality
|
||||
pipenv run pytest
|
||||
poetry run pytest
|
||||
- testcli: |
|
||||
cd csv-metadata-quality
|
||||
pipenv run pip install .
|
||||
pipenv run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dc.subject,cg.coverage.country
|
||||
environment:
|
||||
PIPENV_NOSPIN: 'True'
|
||||
PIPENV_HIDE_EMOJIS: 'True'
|
||||
poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e -u --agrovoc-fields dc.subject,cg.coverage.country
|
||||
|
@ -4,6 +4,10 @@ python:
|
||||
- "3.6"
|
||||
- "3.7"
|
||||
- "3.8"
|
||||
- "3.8-dev" # 3.8 development branch
|
||||
jobs:
|
||||
allow_failures:
|
||||
- python: "3.8-dev"
|
||||
install:
|
||||
- "pip install -r requirements.txt"
|
||||
- "pip install -r requirements-dev.txt"
|
||||
|
15
CHANGELOG.md
15
CHANGELOG.md
@ -4,6 +4,21 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [0.4.2] - 2020-07-06
|
||||
### Changed
|
||||
- Add field name to the output for more fixes and checks to help identify where
|
||||
the error is
|
||||
- Minor optimizations to AGROVOC subject lookup
|
||||
- Use Poetry instead of Pipenv
|
||||
|
||||
### Updated
|
||||
- Update python dependencies to latest versions
|
||||
|
||||
## [0.4.1] - 2020-01-15
|
||||
### Changed
|
||||
- Reduce minimum Python version to 3.6 by working around the `is_normalized()`
|
||||
that only works in Python >= 3.8
|
||||
|
||||
## [0.4.0] - 2020-01-15
|
||||
### Added
|
||||
- Unicode normalization (enable with `--unsafe-fixes`, see README.md)
|
||||
|
29
Pipfile
29
Pipfile
@ -1,29 +0,0 @@
|
||||
[[source]]
|
||||
name = "pypi"
|
||||
url = "https://pypi.org/simple"
|
||||
verify_ssl = true
|
||||
|
||||
[dev-packages]
|
||||
pytest = "*"
|
||||
ipython = "*"
|
||||
flake8 = "*"
|
||||
pytest-clarity = "*"
|
||||
black = "==19.10b0"
|
||||
isort = "*"
|
||||
csvkit = "*"
|
||||
|
||||
[packages]
|
||||
pandas = "*"
|
||||
python-stdnum = "*"
|
||||
xlrd = "*"
|
||||
requests = "*"
|
||||
requests-cache = "*"
|
||||
pycountry = "*"
|
||||
csv-metadata-quality = {editable = true,path = "."}
|
||||
langid = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.8"
|
||||
|
||||
[pipenv]
|
||||
allow_prereleases = true
|
589
Pipfile.lock
generated
589
Pipfile.lock
generated
@ -1,589 +0,0 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "bc933a2deb26ed095c46d6ccddf0f305f84157bdd95548c6b6a4356537951890"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
"python_version": "3.8"
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "pypi",
|
||||
"url": "https://pypi.org/simple",
|
||||
"verify_ssl": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"default": {
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3",
|
||||
"sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f"
|
||||
],
|
||||
"version": "==2019.11.28"
|
||||
},
|
||||
"chardet": {
|
||||
"hashes": [
|
||||
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
|
||||
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
|
||||
],
|
||||
"version": "==3.0.4"
|
||||
},
|
||||
"csv-metadata-quality": {
|
||||
"editable": true,
|
||||
"path": "."
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
|
||||
"sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
|
||||
],
|
||||
"version": "==2.8"
|
||||
},
|
||||
"langid": {
|
||||
"hashes": [
|
||||
"sha256:044bcae1912dab85c33d8e98f2811b8f4ff1213e5e9a9e9510137b84da2cb293"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.1.6"
|
||||
},
|
||||
"numpy": {
|
||||
"hashes": [
|
||||
"sha256:1786a08236f2c92ae0e70423c45e1e62788ed33028f94ca99c4df03f5be6b3c6",
|
||||
"sha256:17aa7a81fe7599a10f2b7d95856dc5cf84a4eefa45bc96123cbbc3ebc568994e",
|
||||
"sha256:20b26aaa5b3da029942cdcce719b363dbe58696ad182aff0e5dcb1687ec946dc",
|
||||
"sha256:2d75908ab3ced4223ccba595b48e538afa5ecc37405923d1fea6906d7c3a50bc",
|
||||
"sha256:39d2c685af15d3ce682c99ce5925cc66efc824652e10990d2462dfe9b8918c6a",
|
||||
"sha256:56bc8ded6fcd9adea90f65377438f9fea8c05fcf7c5ba766bef258d0da1554aa",
|
||||
"sha256:590355aeade1a2eaba17617c19edccb7db8d78760175256e3cf94590a1a964f3",
|
||||
"sha256:70a840a26f4e61defa7bdf811d7498a284ced303dfbc35acb7be12a39b2aa121",
|
||||
"sha256:77c3bfe65d8560487052ad55c6998a04b654c2fbc36d546aef2b2e511e760971",
|
||||
"sha256:9537eecf179f566fd1c160a2e912ca0b8e02d773af0a7a1120ad4f7507cd0d26",
|
||||
"sha256:9acdf933c1fd263c513a2df3dceecea6f3ff4419d80bf238510976bf9bcb26cd",
|
||||
"sha256:ae0975f42ab1f28364dcda3dde3cf6c1ddab3e1d4b2909da0cb0191fa9ca0480",
|
||||
"sha256:b3af02ecc999c8003e538e60c89a2b37646b39b688d4e44d7373e11c2debabec",
|
||||
"sha256:b6ff59cee96b454516e47e7721098e6ceebef435e3e21ac2d6c3b8b02628eb77",
|
||||
"sha256:b765ed3930b92812aa698a455847141869ef755a87e099fddd4ccf9d81fffb57",
|
||||
"sha256:c98c5ffd7d41611407a1103ae11c8b634ad6a43606eca3e2a5a269e5d6e8eb07",
|
||||
"sha256:cf7eb6b1025d3e169989416b1adcd676624c2dbed9e3bcb7137f51bfc8cc2572",
|
||||
"sha256:d92350c22b150c1cae7ebb0ee8b5670cc84848f6359cf6b5d8f86617098a9b73",
|
||||
"sha256:e422c3152921cece8b6a2fb6b0b4d73b6579bd20ae075e7d15143e711f3ca2ca",
|
||||
"sha256:e840f552a509e3380b0f0ec977e8124d0dc34dc0e68289ca28f4d7c1d0d79474",
|
||||
"sha256:f3d0a94ad151870978fb93538e95411c83899c9dc63e6fb65542f769568ecfa5"
|
||||
],
|
||||
"version": "==1.18.1"
|
||||
},
|
||||
"pandas": {
|
||||
"hashes": [
|
||||
"sha256:0f52d8a2358de840eca388f50bcab137d9d2f161f55c9c32e888387ac2e4505b",
|
||||
"sha256:111d77cac6c0e2d8bb76bdad75b3a416729f5f31f705276becbf8035b26ac5e0",
|
||||
"sha256:223f97e52a4d82cf918da5dcbdc92c69ab00686e2b6adeb3012326ace3dc1aee",
|
||||
"sha256:3b09cae3d39e71187fcc6817c3f60a8c9bad5f503e6aa8d72e4cbb2e1cd7a585",
|
||||
"sha256:4a37ab58d7c3017d71650a7d9b44d056005c1d0d9be931d8af9c8b2ca2c8a8b8",
|
||||
"sha256:57628cd142f09165bca3ce0b2f82f14568ae14a6c2c125a29d167c9b9df6f76e",
|
||||
"sha256:5c42b463d25780d5d5addc79b1cfb1b8d8db44d4184186da8e2a25f2c794ad43",
|
||||
"sha256:656443bf914f5e9307fcc694d5f400d19e616d7aafa4faf57711e0449093272f",
|
||||
"sha256:7e5dc9137b9fc2e3ccd00df092fa3af6e01430dcba747f5f063b33ea1ed0999c",
|
||||
"sha256:8305fb7b2817e3da6071f0032b6ca1402cbe303094ab5594f552d7052782b8de",
|
||||
"sha256:a98b46eec0e245fd3dc0d11012109f41aa37c96066aa642d65f4a4c332d193c1",
|
||||
"sha256:b254f0c4308ff0c8c896a9de980642a55b716dff4d1fc8a730657e6d4711e35d",
|
||||
"sha256:cce070caeb357ef89267482c7dd1a9adaa57444be5663ea294675ab0cdb5f033",
|
||||
"sha256:f4e74a38cc48453bceda51c0d13122c38f0a49dd4c737f8091b8cdc88f47eb8c"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.0.0rc0"
|
||||
},
|
||||
"pycountry": {
|
||||
"hashes": [
|
||||
"sha256:3c57aa40adcf293d59bebaffbe60d8c39976fba78d846a018dc0c2ec9c6cb3cb"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==19.8.18"
|
||||
},
|
||||
"python-dateutil": {
|
||||
"hashes": [
|
||||
"sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
|
||||
"sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"
|
||||
],
|
||||
"version": "==2.8.1"
|
||||
},
|
||||
"python-stdnum": {
|
||||
"hashes": [
|
||||
"sha256:4c1347c414d7bdffb454924998f62c04d907a5c01faff0e35df659b0b52acba5",
|
||||
"sha256:bb58877dafc2e590dbfddc63fa04876ab2005c3f35c8356a2dd01f62a9bdc4d6"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.12"
|
||||
},
|
||||
"pytz": {
|
||||
"hashes": [
|
||||
"sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d",
|
||||
"sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"
|
||||
],
|
||||
"version": "==2019.3"
|
||||
},
|
||||
"requests": {
|
||||
"hashes": [
|
||||
"sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4",
|
||||
"sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.22.0"
|
||||
},
|
||||
"requests-cache": {
|
||||
"hashes": [
|
||||
"sha256:813023269686045f8e01e2289cc1e7e9ae5ab22ddd1e2849a9093ab3ab7270eb",
|
||||
"sha256:81e13559baee64677a7d73b85498a5a8f0639e204517b5d05ff378e44a57831a"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.5.2"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd",
|
||||
"sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66"
|
||||
],
|
||||
"version": "==1.13.0"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:a8a318824cc77d1fd4b2bec2ded92646630d7fe8619497b142c84a9e6f5a7293",
|
||||
"sha256:f3c5fd51747d450d4dcf6f923c81f78f811aab8205fda64b0aba34a4e48b0745"
|
||||
],
|
||||
"version": "==1.25.7"
|
||||
},
|
||||
"xlrd": {
|
||||
"hashes": [
|
||||
"sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2",
|
||||
"sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.2.0"
|
||||
}
|
||||
},
|
||||
"develop": {
|
||||
"agate": {
|
||||
"hashes": [
|
||||
"sha256:48d6f80b35611c1ba25a642cbc5b90fcbdeeb2a54711c4a8d062ee2809334d1c",
|
||||
"sha256:c93aaa500b439d71e4a5cf088d0006d2ce2c76f1950960c8843114e5f361dfd3"
|
||||
],
|
||||
"version": "==1.6.1"
|
||||
},
|
||||
"agate-dbf": {
|
||||
"hashes": [
|
||||
"sha256:00c93c498ec9a04cc587bf63dd7340e67e2541f0df4c9a7259d7cb3dd4ce372f"
|
||||
],
|
||||
"version": "==0.2.1"
|
||||
},
|
||||
"agate-excel": {
|
||||
"hashes": [
|
||||
"sha256:8f255ef2c87c436b7132049e1dd86c8e08bf82d8c773aea86f3069b461a17d52"
|
||||
],
|
||||
"version": "==0.2.3"
|
||||
},
|
||||
"agate-sql": {
|
||||
"hashes": [
|
||||
"sha256:9277490ba8b8e7c747a9ae3671f52fe486784b48d4a14e78ca197fb0e36f281b"
|
||||
],
|
||||
"version": "==0.5.4"
|
||||
},
|
||||
"appdirs": {
|
||||
"hashes": [
|
||||
"sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92",
|
||||
"sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e"
|
||||
],
|
||||
"version": "==1.4.3"
|
||||
},
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c",
|
||||
"sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"
|
||||
],
|
||||
"version": "==19.3.0"
|
||||
},
|
||||
"babel": {
|
||||
"hashes": [
|
||||
"sha256:1aac2ae2d0d8ea368fa90906567f5c08463d98ade155c0c4bfedd6a0f7160e38",
|
||||
"sha256:d670ea0b10f8b723672d3a6abeb87b565b244da220d76b4dba1b66269ec152d4"
|
||||
],
|
||||
"version": "==2.8.0"
|
||||
},
|
||||
"backcall": {
|
||||
"hashes": [
|
||||
"sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
|
||||
"sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
|
||||
],
|
||||
"version": "==0.1.0"
|
||||
},
|
||||
"black": {
|
||||
"hashes": [
|
||||
"sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b",
|
||||
"sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==19.10b0"
|
||||
},
|
||||
"click": {
|
||||
"hashes": [
|
||||
"sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13",
|
||||
"sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"
|
||||
],
|
||||
"version": "==7.0"
|
||||
},
|
||||
"csvkit": {
|
||||
"hashes": [
|
||||
"sha256:1353a383531bee191820edfb88418c13dfe1cdfa9dd3dc46f431c05cd2a260a0"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.0.4"
|
||||
},
|
||||
"dbfread": {
|
||||
"hashes": [
|
||||
"sha256:07c8a9af06ffad3f6f03e8fe91ad7d2733e31a26d2b72c4dd4cfbae07ee3b73d",
|
||||
"sha256:f604def58c59694fa0160d7be5d0b8d594467278d2bb6a47d46daf7162c84cec"
|
||||
],
|
||||
"version": "==2.0.7"
|
||||
},
|
||||
"decorator": {
|
||||
"hashes": [
|
||||
"sha256:54c38050039232e1db4ad7375cfce6748d7b41c29e95a081c8a6d2c30364a2ce",
|
||||
"sha256:5d19b92a3c8f7f101c8dd86afd86b0f061a8ce4540ab8cd401fa2542756bce6d"
|
||||
],
|
||||
"version": "==4.4.1"
|
||||
},
|
||||
"entrypoints": {
|
||||
"hashes": [
|
||||
"sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19",
|
||||
"sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"
|
||||
],
|
||||
"version": "==0.3"
|
||||
},
|
||||
"et-xmlfile": {
|
||||
"hashes": [
|
||||
"sha256:614d9722d572f6246302c4491846d2c393c199cfa4edc9af593437691683335b"
|
||||
],
|
||||
"version": "==1.0.1"
|
||||
},
|
||||
"flake8": {
|
||||
"hashes": [
|
||||
"sha256:45681a117ecc81e870cbf1262835ae4af5e7a8b08e40b944a8a6e6b895914cfb",
|
||||
"sha256:49356e766643ad15072a789a20915d3c91dc89fd313ccd71802303fd67e4deca"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.7.9"
|
||||
},
|
||||
"ipython": {
|
||||
"hashes": [
|
||||
"sha256:0f4bcf18293fb666df8511feec0403bdb7e061a5842ea6e88a3177b0ceb34ead",
|
||||
"sha256:387686dd7fc9caf29d2fddcf3116c4b07a11d9025701d220c589a430b0171d8a"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==7.11.1"
|
||||
},
|
||||
"ipython-genutils": {
|
||||
"hashes": [
|
||||
"sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
|
||||
"sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
|
||||
],
|
||||
"version": "==0.2.0"
|
||||
},
|
||||
"isodate": {
|
||||
"hashes": [
|
||||
"sha256:2e364a3d5759479cdb2d37cce6b9376ea504db2ff90252a2e5b7cc89cc9ff2d8",
|
||||
"sha256:aa4d33c06640f5352aca96e4b81afd8ab3b47337cc12089822d6f322ac772c81"
|
||||
],
|
||||
"version": "==0.6.0"
|
||||
},
|
||||
"isort": {
|
||||
"hashes": [
|
||||
"sha256:54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1",
|
||||
"sha256:6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.3.21"
|
||||
},
|
||||
"jdcal": {
|
||||
"hashes": [
|
||||
"sha256:1abf1305fce18b4e8aa248cf8fe0c56ce2032392bc64bbd61b5dff2a19ec8bba",
|
||||
"sha256:472872e096eb8df219c23f2689fc336668bdb43d194094b5cc1707e1640acfc8"
|
||||
],
|
||||
"version": "==1.4.1"
|
||||
},
|
||||
"jedi": {
|
||||
"hashes": [
|
||||
"sha256:1349c1e8c107095a55386628bb3b2a79422f3a2cab8381e34ce19909e0cf5064",
|
||||
"sha256:e909527104a903606dd63bea6e8e888833f0ef087057829b89a18364a856f807"
|
||||
],
|
||||
"version": "==0.15.2"
|
||||
},
|
||||
"leather": {
|
||||
"hashes": [
|
||||
"sha256:076d1603b5281488285718ce1a5ce78cf1027fe1e76adf9c548caf83c519b988",
|
||||
"sha256:e0bb36a6d5f59fbf3c1a6e75e7c8bee29e67f06f5b48c0134407dde612eba5e2"
|
||||
],
|
||||
"version": "==0.3.3"
|
||||
},
|
||||
"mccabe": {
|
||||
"hashes": [
|
||||
"sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
|
||||
"sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
|
||||
],
|
||||
"version": "==0.6.1"
|
||||
},
|
||||
"more-itertools": {
|
||||
"hashes": [
|
||||
"sha256:1a2a32c72400d365000412fe08eb4a24ebee89997c18d3d147544f70f5403b39",
|
||||
"sha256:c468adec578380b6281a114cb8a5db34eb1116277da92d7c46f904f0b52d3288"
|
||||
],
|
||||
"version": "==8.1.0"
|
||||
},
|
||||
"openpyxl": {
|
||||
"hashes": [
|
||||
"sha256:547a9fc6aafcf44abe358b89ed4438d077e9d92e4f182c87e2dc294186dc4b64"
|
||||
],
|
||||
"version": "==3.0.3"
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:aec3fdbb8bc9e4bb65f0634b9f551ced63983a529d6a8931817d52fdd0816ddb",
|
||||
"sha256:fe1d8331dfa7cc0a883b49d75fc76380b2ab2734b220fbb87d774e4fd4b851f8"
|
||||
],
|
||||
"version": "==20.0"
|
||||
},
|
||||
"parsedatetime": {
|
||||
"hashes": [
|
||||
"sha256:3b835fc54e472c17ef447be37458b400e3fefdf14bb1ffdedb5d2c853acf4ba1",
|
||||
"sha256:d2e9ddb1e463de871d32088a3f3cea3dc8282b1b2800e081bd0ef86900451667"
|
||||
],
|
||||
"version": "==2.5"
|
||||
},
|
||||
"parso": {
|
||||
"hashes": [
|
||||
"sha256:55cf25df1a35fd88b878715874d2c4dc1ad3f0eebd1e0266a67e1f55efccfbe1",
|
||||
"sha256:5c1f7791de6bd5dbbeac8db0ef5594b36799de198b3f7f7014643b0c5536b9d3"
|
||||
],
|
||||
"version": "==0.5.2"
|
||||
},
|
||||
"pathspec": {
|
||||
"hashes": [
|
||||
"sha256:163b0632d4e31cef212976cf57b43d9fd6b0bac6e67c26015d611a647d5e7424",
|
||||
"sha256:562aa70af2e0d434367d9790ad37aed893de47f1693e4201fd1d3dca15d19b96"
|
||||
],
|
||||
"version": "==0.7.0"
|
||||
},
|
||||
"pexpect": {
|
||||
"hashes": [
|
||||
"sha256:2094eefdfcf37a1fdbfb9aa090862c1a4878e5c7e0e7e7088bdb511c558e5cd1",
|
||||
"sha256:9e2c1fd0e6ee3a49b28f95d4b33bc389c89b20af6a1255906e90ff1262ce62eb"
|
||||
],
|
||||
"markers": "sys_platform != 'win32'",
|
||||
"version": "==4.7.0"
|
||||
},
|
||||
"pickleshare": {
|
||||
"hashes": [
|
||||
"sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
|
||||
"sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
|
||||
],
|
||||
"version": "==0.7.5"
|
||||
},
|
||||
"pluggy": {
|
||||
"hashes": [
|
||||
"sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
|
||||
"sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
|
||||
],
|
||||
"version": "==0.13.1"
|
||||
},
|
||||
"prompt-toolkit": {
|
||||
"hashes": [
|
||||
"sha256:0278d2f51b5ceba6ea8da39f76d15684e84c996b325475f6e5720edc584326a7",
|
||||
"sha256:63daee79aa8366c8f1c637f1a4876b890da5fc92a19ebd2f7080ebacb901e990"
|
||||
],
|
||||
"version": "==3.0.2"
|
||||
},
|
||||
"ptyprocess": {
|
||||
"hashes": [
|
||||
"sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
|
||||
"sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
|
||||
],
|
||||
"version": "==0.6.0"
|
||||
},
|
||||
"py": {
|
||||
"hashes": [
|
||||
"sha256:5e27081401262157467ad6e7f851b7aa402c5852dbcb3dae06768434de5752aa",
|
||||
"sha256:c20fdd83a5dbc0af9efd622bee9a5564e278f6380fffcacc43ba6f43db2813b0"
|
||||
],
|
||||
"version": "==1.8.1"
|
||||
},
|
||||
"pycodestyle": {
|
||||
"hashes": [
|
||||
"sha256:95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56",
|
||||
"sha256:e40a936c9a450ad81df37f549d676d127b1b66000a6c500caa2b085bc0ca976c"
|
||||
],
|
||||
"version": "==2.5.0"
|
||||
},
|
||||
"pyflakes": {
|
||||
"hashes": [
|
||||
"sha256:17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0",
|
||||
"sha256:d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2"
|
||||
],
|
||||
"version": "==2.1.1"
|
||||
},
|
||||
"pygments": {
|
||||
"hashes": [
|
||||
"sha256:2a3fe295e54a20164a9df49c75fa58526d3be48e14aceba6d6b1e8ac0bfd6f1b",
|
||||
"sha256:98c8aa5a9f778fcd1026a17361ddaf7330d1b7c62ae97c3bb0ae73e0b9b6b0fe"
|
||||
],
|
||||
"version": "==2.5.2"
|
||||
},
|
||||
"pyparsing": {
|
||||
"hashes": [
|
||||
"sha256:4c830582a84fb022400b85429791bc551f1f4871c33f23e44f353119e92f969f",
|
||||
"sha256:c342dccb5250c08d45fd6f8b4a559613ca603b57498511740e65cd11a2e7dcec"
|
||||
],
|
||||
"version": "==2.4.6"
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:6b571215b5a790f9b41f19f3531c53a45cf6bb8ef2988bc1ff9afb38270b25fa",
|
||||
"sha256:e41d489ff43948babd0fad7ad5e49b8735d5d55e26628a58673c39ff61d95de4"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==5.3.2"
|
||||
},
|
||||
"pytest-clarity": {
|
||||
"hashes": [
|
||||
"sha256:3f40d5ae7cb21cc95e622fc4f50d9466f80ae0f91460225b8c95c07afbf93e20"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.2.0a1"
|
||||
},
|
||||
"python-slugify": {
|
||||
"hashes": [
|
||||
"sha256:a8fc3433821140e8f409a9831d13ae5deccd0b033d4744d94b31fea141bdd84c"
|
||||
],
|
||||
"version": "==4.0.0"
|
||||
},
|
||||
"pytimeparse": {
|
||||
"hashes": [
|
||||
"sha256:04b7be6cc8bd9f5647a6325444926c3ac34ee6bc7e69da4367ba282f076036bd",
|
||||
"sha256:e86136477be924d7e670646a98561957e8ca7308d44841e21f5ddea757556a0a"
|
||||
],
|
||||
"version": "==1.1.8"
|
||||
},
|
||||
"pytz": {
|
||||
"hashes": [
|
||||
"sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d",
|
||||
"sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"
|
||||
],
|
||||
"version": "==2019.3"
|
||||
},
|
||||
"regex": {
|
||||
"hashes": [
|
||||
"sha256:07b39bf943d3d2fe63d46281d8504f8df0ff3fe4c57e13d1656737950e53e525",
|
||||
"sha256:0932941cdfb3afcbc26cc3bcf7c3f3d73d5a9b9c56955d432dbf8bbc147d4c5b",
|
||||
"sha256:0e182d2f097ea8549a249040922fa2b92ae28be4be4895933e369a525ba36576",
|
||||
"sha256:10671601ee06cf4dc1bc0b4805309040bb34c9af423c12c379c83d7895622bb5",
|
||||
"sha256:23e2c2c0ff50f44877f64780b815b8fd2e003cda9ce817a7fd00dea5600c84a0",
|
||||
"sha256:26ff99c980f53b3191d8931b199b29d6787c059f2e029b2b0c694343b1708c35",
|
||||
"sha256:27429b8d74ba683484a06b260b7bb00f312e7c757792628ea251afdbf1434003",
|
||||
"sha256:3e77409b678b21a056415da3a56abfd7c3ad03da71f3051bbcdb68cf44d3c34d",
|
||||
"sha256:4e8f02d3d72ca94efc8396f8036c0d3bcc812aefc28ec70f35bb888c74a25161",
|
||||
"sha256:4eae742636aec40cf7ab98171ab9400393360b97e8f9da67b1867a9ee0889b26",
|
||||
"sha256:6a6ae17bf8f2d82d1e8858a47757ce389b880083c4ff2498dba17c56e6c103b9",
|
||||
"sha256:6a6ba91b94427cd49cd27764679024b14a96874e0dc638ae6bdd4b1a3ce97be1",
|
||||
"sha256:7bcd322935377abcc79bfe5b63c44abd0b29387f267791d566bbb566edfdd146",
|
||||
"sha256:98b8ed7bb2155e2cbb8b76f627b2fd12cf4b22ab6e14873e8641f266e0fb6d8f",
|
||||
"sha256:bd25bb7980917e4e70ccccd7e3b5740614f1c408a642c245019cff9d7d1b6149",
|
||||
"sha256:d0f424328f9822b0323b3b6f2e4b9c90960b24743d220763c7f07071e0778351",
|
||||
"sha256:d58e4606da2a41659c84baeb3cfa2e4c87a74cec89a1e7c56bee4b956f9d7461",
|
||||
"sha256:e3cd21cc2840ca67de0bbe4071f79f031c81418deb544ceda93ad75ca1ee9f7b",
|
||||
"sha256:e6c02171d62ed6972ca8631f6f34fa3281d51db8b326ee397b9c83093a6b7242",
|
||||
"sha256:e7c7661f7276507bce416eaae22040fd91ca471b5b33c13f8ff21137ed6f248c",
|
||||
"sha256:ecc6de77df3ef68fee966bb8cb4e067e84d4d1f397d0ef6fce46913663540d77"
|
||||
],
|
||||
"version": "==2020.1.8"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd",
|
||||
"sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66"
|
||||
],
|
||||
"version": "==1.13.0"
|
||||
},
|
||||
"sqlalchemy": {
|
||||
"hashes": [
|
||||
"sha256:bfb8f464a5000b567ac1d350b9090cf081180ec1ab4aa87e7bca12dab25320ec"
|
||||
],
|
||||
"version": "==1.3.12"
|
||||
},
|
||||
"termcolor": {
|
||||
"hashes": [
|
||||
"sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
|
||||
],
|
||||
"version": "==1.1.0"
|
||||
},
|
||||
"text-unidecode": {
|
||||
"hashes": [
|
||||
"sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8",
|
||||
"sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"
|
||||
],
|
||||
"version": "==1.3"
|
||||
},
|
||||
"toml": {
|
||||
"hashes": [
|
||||
"sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
|
||||
"sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"
|
||||
],
|
||||
"version": "==0.10.0"
|
||||
},
|
||||
"traitlets": {
|
||||
"hashes": [
|
||||
"sha256:70b4c6a1d9019d7b4f6846832288f86998aa3b9207c6821f3578a6a6a467fe44",
|
||||
"sha256:d023ee369ddd2763310e4c3eae1ff649689440d4ae59d7485eb4cfbbe3e359f7"
|
||||
],
|
||||
"version": "==4.3.3"
|
||||
},
|
||||
"typed-ast": {
|
||||
"hashes": [
|
||||
"sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355",
|
||||
"sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919",
|
||||
"sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa",
|
||||
"sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652",
|
||||
"sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75",
|
||||
"sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01",
|
||||
"sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d",
|
||||
"sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1",
|
||||
"sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907",
|
||||
"sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c",
|
||||
"sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3",
|
||||
"sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b",
|
||||
"sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614",
|
||||
"sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb",
|
||||
"sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b",
|
||||
"sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41",
|
||||
"sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6",
|
||||
"sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34",
|
||||
"sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe",
|
||||
"sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4",
|
||||
"sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7"
|
||||
],
|
||||
"version": "==1.4.1"
|
||||
},
|
||||
"wcwidth": {
|
||||
"hashes": [
|
||||
"sha256:8fd29383f539be45b20bd4df0dc29c20ba48654a41e661925e612311e9f3c603",
|
||||
"sha256:f28b3e8a6483e5d49e7f8949ac1a78314e740333ae305b4ba5defd3e74fb37a8"
|
||||
],
|
||||
"version": "==0.1.8"
|
||||
},
|
||||
"xlrd": {
|
||||
"hashes": [
|
||||
"sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2",
|
||||
"sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.2.0"
|
||||
}
|
||||
}
|
||||
}
|
10
README.md
10
README.md
@ -18,16 +18,16 @@ Requires Python 3.8 or greater. CSV and Excel support comes from the [Pandas](ht
|
||||
- Perform [Unicode normalization](https://withblue.ink/2019/03/11/why-you-need-to-normalize-unicode-strings.html) on strings using `--unsafe-fixes`
|
||||
|
||||
## Installation
|
||||
The easiest way to install CSV Metadata Quality is with [pipenv](https://github.com/pypa/pipenv):
|
||||
The easiest way to install CSV Metadata Quality is with [poetry](https://python-poetry.org):
|
||||
|
||||
```
|
||||
$ git clone https://github.com/ilri/csv-metadata-quality.git
|
||||
$ cd csv-metadata-quality
|
||||
$ pipenv install
|
||||
$ pipenv shell
|
||||
$ poetry install
|
||||
$ poetry shell
|
||||
```
|
||||
|
||||
Otherwise, if you don't have pipenv, you can use a vanilla Python virtual environment:
|
||||
Otherwise, if you don't have poetry, you can use a vanilla Python virtual environment:
|
||||
|
||||
```
|
||||
$ git clone https://github.com/ilri/csv-metadata-quality.git
|
||||
@ -59,7 +59,7 @@ This is considered "unsafe" because it is *theoretically* possible for a single
|
||||
### Newlines
|
||||
This is considered "unsafe" because some systems give special importance to vertical space and render it properly. DSpace does not support rendering newlines in its XMLUI and has, at times, suffered from parsing errors that cause the import process to fail if an input file had newlines. The `--unsafe-fixes` option strips Unix line feeds (U+000A).
|
||||
|
||||
## Unicode Normalization
|
||||
### Unicode Normalization
|
||||
[Unicode](https://en.wikipedia.org/wiki/Unicode) is a standard for encoding text. As the standard aims to support most of the world's languages, characters can often be represented in different ways and still be valid Unicode. This leads to interesting problems that can be confusing unless you know what's going on behind the scenes. For example, the characters `é` and `é` *look* the same, but are not — technically they refer to different code points in the Unicode standard:
|
||||
|
||||
- `é` is the Unicode code point `U+00E9`
|
||||
|
@ -82,7 +82,7 @@ def run(argv):
|
||||
continue
|
||||
|
||||
# Fix: whitespace
|
||||
df[column] = df[column].apply(fix.whitespace)
|
||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||
|
||||
# Fix: newlines
|
||||
if args.unsafe_fixes:
|
||||
@ -104,19 +104,19 @@ def run(argv):
|
||||
df[column] = df[column].apply(fix.unnecessary_unicode)
|
||||
|
||||
# Check: invalid multi-value separator
|
||||
df[column] = df[column].apply(check.separators)
|
||||
df[column] = df[column].apply(check.separators, field_name=column)
|
||||
|
||||
# Check: suspicious characters
|
||||
df[column] = df[column].apply(check.suspicious_characters, field_name=column)
|
||||
|
||||
# Fix: invalid multi-value separator
|
||||
if args.unsafe_fixes:
|
||||
df[column] = df[column].apply(fix.separators)
|
||||
df[column] = df[column].apply(fix.separators, field_name=column)
|
||||
# Run whitespace fix again after fixing invalid separators
|
||||
df[column] = df[column].apply(fix.whitespace)
|
||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||
|
||||
# Fix: duplicate metadata values
|
||||
df[column] = df[column].apply(fix.duplicates)
|
||||
df[column] = df[column].apply(fix.duplicates, field_name=column)
|
||||
|
||||
# Check: invalid AGROVOC subject
|
||||
if args.agrovoc_fields:
|
||||
|
@ -51,7 +51,7 @@ def isbn(field):
|
||||
return field
|
||||
|
||||
|
||||
def separators(field):
|
||||
def separators(field, field_name):
|
||||
"""Check for invalid multi-value separators (ie "|" or "|||").
|
||||
|
||||
Prints the field with the invalid multi-value separator.
|
||||
@ -70,7 +70,7 @@ def separators(field):
|
||||
match = re.findall(r"^.*?\|.*$", value)
|
||||
|
||||
if match:
|
||||
print(f"Invalid multi-value separator: {field}")
|
||||
print(f"Invalid multi-value separator ({field_name}): {field}")
|
||||
|
||||
return field
|
||||
|
||||
@ -221,22 +221,21 @@ def agrovoc(field, field_name):
|
||||
if pd.isna(field):
|
||||
return
|
||||
|
||||
# enable transparent request cache with thirty days expiry
|
||||
expire_after = timedelta(days=30)
|
||||
requests_cache.install_cache(
|
||||
"agrovoc-response-cache", expire_after=expire_after
|
||||
)
|
||||
|
||||
# prune old cache entries
|
||||
requests_cache.core.remove_expired_responses()
|
||||
|
||||
# Try to split multi-value field on "||" separator
|
||||
for value in field.split("||"):
|
||||
request_url = (
|
||||
f"http://agrovoc.uniroma2.it/agrovoc/rest/v1/agrovoc/search?query={value}"
|
||||
)
|
||||
request_url = "http://agrovoc.uniroma2.it/agrovoc/rest/v1/agrovoc/search"
|
||||
request_params = {"query": value}
|
||||
|
||||
# enable transparent request cache with thirty days expiry
|
||||
expire_after = timedelta(days=30)
|
||||
requests_cache.install_cache(
|
||||
"agrovoc-response-cache", expire_after=expire_after
|
||||
)
|
||||
|
||||
request = requests.get(request_url)
|
||||
|
||||
# prune old cache entries
|
||||
requests_cache.core.remove_expired_responses()
|
||||
request = requests.get(request_url, params=request_params)
|
||||
|
||||
if request.status_code == requests.codes.ok:
|
||||
data = request.json()
|
||||
|
@ -3,7 +3,7 @@ import re
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def whitespace(field):
|
||||
def whitespace(field, field_name):
|
||||
"""Fix whitespace issues.
|
||||
|
||||
Return string with leading, trailing, and consecutive whitespace trimmed.
|
||||
@ -26,7 +26,7 @@ def whitespace(field):
|
||||
match = re.findall(pattern, value)
|
||||
|
||||
if match:
|
||||
print(f"Removing excessive whitespace: {value}")
|
||||
print(f"Removing excessive whitespace ({field_name}): {value}")
|
||||
value = re.sub(pattern, " ", value)
|
||||
|
||||
# Save cleaned value
|
||||
@ -38,7 +38,7 @@ def whitespace(field):
|
||||
return new_field
|
||||
|
||||
|
||||
def separators(field):
|
||||
def separators(field, field_name):
|
||||
"""Fix for invalid multi-value separators (ie "|")."""
|
||||
|
||||
# Skip fields with missing values
|
||||
@ -55,7 +55,7 @@ def separators(field):
|
||||
match = re.findall(pattern, value)
|
||||
|
||||
if match:
|
||||
print(f"Fixing invalid multi-value separator: {value}")
|
||||
print(f"Fixing invalid multi-value separator ({field_name}): {value}")
|
||||
|
||||
value = re.sub(pattern, "||", value)
|
||||
|
||||
@ -121,7 +121,7 @@ def unnecessary_unicode(field):
|
||||
return field
|
||||
|
||||
|
||||
def duplicates(field):
|
||||
def duplicates(field, field_name):
|
||||
"""Remove duplicate metadata values."""
|
||||
|
||||
# Skip fields with missing values
|
||||
@ -140,7 +140,7 @@ def duplicates(field):
|
||||
if value not in new_values:
|
||||
new_values.append(value)
|
||||
else:
|
||||
print(f"Removing duplicate value: {value}")
|
||||
print(f"Removing duplicate value ({field_name}): {value}")
|
||||
|
||||
# Create a new field consisting of all values joined with "||"
|
||||
new_field = "||".join(new_values)
|
||||
@ -212,7 +212,7 @@ def normalize_unicode(field, field_name):
|
||||
Return normalized string.
|
||||
"""
|
||||
|
||||
from unicodedata import is_normalized
|
||||
from csv_metadata_quality.util import is_nfc
|
||||
from unicodedata import normalize
|
||||
|
||||
# Skip fields with missing values
|
||||
@ -220,7 +220,7 @@ def normalize_unicode(field, field_name):
|
||||
return
|
||||
|
||||
# Check if the current string is using normalized Unicode (NFC)
|
||||
if not is_normalized("NFC", field):
|
||||
if not is_nfc(field):
|
||||
print(f"Normalizing Unicode ({field_name}): {field}")
|
||||
field = normalize("NFC", field)
|
||||
|
||||
|
14
csv_metadata_quality/util.py
Normal file
14
csv_metadata_quality/util.py
Normal file
@ -0,0 +1,14 @@
|
||||
def is_nfc(field):
|
||||
"""Utility function to check whether a string is using normalized Unicode.
|
||||
Python's built-in unicodedata library has the is_normalized() function, but
|
||||
it was only introduced in Python 3.8. By using a simple utility function we
|
||||
are able to run on Python >= 3.6 again.
|
||||
|
||||
See: https://docs.python.org/3/library/unicodedata.html
|
||||
|
||||
Return boolean.
|
||||
"""
|
||||
|
||||
from unicodedata import normalize
|
||||
|
||||
return field == normalize("NFC", field)
|
@ -1 +1 @@
|
||||
VERSION = "0.4.0"
|
||||
VERSION = "0.4.2"
|
||||
|
1130
poetry.lock
generated
Normal file
1130
poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
31
pyproject.toml
Normal file
31
pyproject.toml
Normal file
@ -0,0 +1,31 @@
|
||||
[tool.poetry]
|
||||
name = "csv-metadata-quality"
|
||||
version = "0.4.2"
|
||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
|
||||
authors = ["Alan Orth <alan.orth@gmail.com>"]
|
||||
license="GPL-3.0-only"
|
||||
repository = "https://github.com/ilri/csv-metadata-quality"
|
||||
homepage = "https://github.com/ilri/csv-metadata-quality"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.8"
|
||||
pandas = "^1.0.4"
|
||||
python-stdnum = "^1.13"
|
||||
xlrd = "^1.2.0"
|
||||
requests = "^2.23.0"
|
||||
requests-cache = "^0.5.2"
|
||||
pycountry = "^19.8.18"
|
||||
langid = "^1.1.6"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
pytest = "^5.4.2"
|
||||
ipython = "^7.15.0"
|
||||
flake8 = "^3.8.2"
|
||||
pytest-clarity = "^0.3.0-alpha.0"
|
||||
black = "^19.10b0"
|
||||
isort = "^4.3.21"
|
||||
csvkit = "^1.0.5"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry>=0.12"]
|
||||
build-backend = "poetry.masonry.api"
|
@ -1,56 +1,300 @@
|
||||
-i https://pypi.org/simple
|
||||
agate-dbf==0.2.1
|
||||
agate-excel==0.2.3
|
||||
agate-sql==0.5.4
|
||||
agate==1.6.1
|
||||
appdirs==1.4.3
|
||||
attrs==19.3.0
|
||||
babel==2.8.0
|
||||
backcall==0.1.0
|
||||
black==19.10b0
|
||||
click==7.0
|
||||
csvkit==1.0.4
|
||||
dbfread==2.0.7
|
||||
decorator==4.4.1
|
||||
entrypoints==0.3
|
||||
et-xmlfile==1.0.1
|
||||
flake8==3.7.9
|
||||
ipython-genutils==0.2.0
|
||||
ipython==7.11.1
|
||||
isodate==0.6.0
|
||||
isort==4.3.21
|
||||
jdcal==1.4.1
|
||||
jedi==0.15.2
|
||||
leather==0.3.3
|
||||
mccabe==0.6.1
|
||||
more-itertools==8.1.0
|
||||
openpyxl==3.0.3
|
||||
packaging==20.0
|
||||
parsedatetime==2.5
|
||||
parso==0.5.2
|
||||
pathspec==0.7.0
|
||||
pexpect==4.7.0 ; sys_platform != 'win32'
|
||||
pickleshare==0.7.5
|
||||
pluggy==0.13.1
|
||||
prompt-toolkit==3.0.2
|
||||
ptyprocess==0.6.0
|
||||
py==1.8.1
|
||||
pycodestyle==2.5.0
|
||||
pyflakes==2.1.1
|
||||
pygments==2.5.2
|
||||
pyparsing==2.4.6
|
||||
pytest-clarity==0.2.0a1
|
||||
pytest==5.3.2
|
||||
python-slugify==4.0.0
|
||||
pytimeparse==1.1.8
|
||||
pytz==2019.3
|
||||
regex==2020.1.8
|
||||
six==1.13.0
|
||||
sqlalchemy==1.3.12
|
||||
termcolor==1.1.0
|
||||
text-unidecode==1.3
|
||||
toml==0.10.0
|
||||
traitlets==4.3.3
|
||||
typed-ast==1.4.1
|
||||
wcwidth==0.1.8
|
||||
xlrd==1.2.0
|
||||
agate==1.6.1 \
|
||||
--hash=sha256:48d6f80b35611c1ba25a642cbc5b90fcbdeeb2a54711c4a8d062ee2809334d1c \
|
||||
--hash=sha256:c93aaa500b439d71e4a5cf088d0006d2ce2c76f1950960c8843114e5f361dfd3
|
||||
agate-dbf==0.2.1 \
|
||||
--hash=sha256:00c93c498ec9a04cc587bf63dd7340e67e2541f0df4c9a7259d7cb3dd4ce372f \
|
||||
--hash=sha256:f618fadb413d41468c90d72fca945681d82d9e4d1b3d89f9bda52e607b828c0b
|
||||
agate-excel==0.2.3 \
|
||||
--hash=sha256:8f255ef2c87c436b7132049e1dd86c8e08bf82d8c773aea86f3069b461a17d52
|
||||
agate-sql==0.5.4 \
|
||||
--hash=sha256:9277490ba8b8e7c747a9ae3671f52fe486784b48d4a14e78ca197fb0e36f281b
|
||||
appdirs==1.4.4 \
|
||||
--hash=sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128 \
|
||||
--hash=sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41
|
||||
appnope==0.1.0; sys_platform == "darwin" \
|
||||
--hash=sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0 \
|
||||
--hash=sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71
|
||||
atomicwrites==1.4.0; sys_platform == "win32" \
|
||||
--hash=sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197 \
|
||||
--hash=sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a
|
||||
attrs==19.3.0 \
|
||||
--hash=sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c \
|
||||
--hash=sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72
|
||||
babel==2.8.0 \
|
||||
--hash=sha256:d670ea0b10f8b723672d3a6abeb87b565b244da220d76b4dba1b66269ec152d4 \
|
||||
--hash=sha256:1aac2ae2d0d8ea368fa90906567f5c08463d98ade155c0c4bfedd6a0f7160e38
|
||||
backcall==0.2.0 \
|
||||
--hash=sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255 \
|
||||
--hash=sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e
|
||||
black==19.10b0 \
|
||||
--hash=sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b \
|
||||
--hash=sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539
|
||||
certifi==2020.6.20 \
|
||||
--hash=sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41 \
|
||||
--hash=sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3
|
||||
chardet==3.0.4 \
|
||||
--hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691 \
|
||||
--hash=sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae
|
||||
click==7.1.2 \
|
||||
--hash=sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc \
|
||||
--hash=sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a
|
||||
colorama==0.4.3; sys_platform == "win32" \
|
||||
--hash=sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff \
|
||||
--hash=sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1
|
||||
csvkit==1.0.5 \
|
||||
--hash=sha256:7bd390f4d300e45dc9ed67a32af762a916bae7d9a85087a10fd4f64ce65fd5b9
|
||||
dbfread==2.0.7 \
|
||||
--hash=sha256:f604def58c59694fa0160d7be5d0b8d594467278d2bb6a47d46daf7162c84cec \
|
||||
--hash=sha256:07c8a9af06ffad3f6f03e8fe91ad7d2733e31a26d2b72c4dd4cfbae07ee3b73d
|
||||
decorator==4.4.2 \
|
||||
--hash=sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760 \
|
||||
--hash=sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7
|
||||
et-xmlfile==1.0.1 \
|
||||
--hash=sha256:614d9722d572f6246302c4491846d2c393c199cfa4edc9af593437691683335b
|
||||
flake8==3.8.3 \
|
||||
--hash=sha256:15e351d19611c887e482fb960eae4d44845013cc142d42896e9862f775d8cf5c \
|
||||
--hash=sha256:f04b9fcbac03b0a3e58c0ab3a0ecc462e023a9faf046d57794184028123aa208
|
||||
idna==2.10 \
|
||||
--hash=sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0 \
|
||||
--hash=sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6
|
||||
ipython==7.16.1 \
|
||||
--hash=sha256:2dbcc8c27ca7d3cfe4fcdff7f45b27f9a8d3edfa70ff8024a71c7a8eb5f09d64 \
|
||||
--hash=sha256:9f4fcb31d3b2c533333893b9172264e4821c1ac91839500f31bd43f2c59b3ccf
|
||||
ipython-genutils==0.2.0 \
|
||||
--hash=sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8 \
|
||||
--hash=sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8
|
||||
isodate==0.6.0 \
|
||||
--hash=sha256:aa4d33c06640f5352aca96e4b81afd8ab3b47337cc12089822d6f322ac772c81 \
|
||||
--hash=sha256:2e364a3d5759479cdb2d37cce6b9376ea504db2ff90252a2e5b7cc89cc9ff2d8
|
||||
isort==4.3.21 \
|
||||
--hash=sha256:6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd \
|
||||
--hash=sha256:54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1
|
||||
jdcal==1.4.1 \
|
||||
--hash=sha256:1abf1305fce18b4e8aa248cf8fe0c56ce2032392bc64bbd61b5dff2a19ec8bba \
|
||||
--hash=sha256:472872e096eb8df219c23f2689fc336668bdb43d194094b5cc1707e1640acfc8
|
||||
jedi==0.17.1 \
|
||||
--hash=sha256:1ddb0ec78059e8e27ec9eb5098360b4ea0a3dd840bedf21415ea820c21b40a22 \
|
||||
--hash=sha256:807d5d4f96711a2bcfdd5dfa3b1ae6d09aa53832b182090b222b5efb81f52f63
|
||||
langid==1.1.6 \
|
||||
--hash=sha256:044bcae1912dab85c33d8e98f2811b8f4ff1213e5e9a9e9510137b84da2cb293
|
||||
leather==0.3.3 \
|
||||
--hash=sha256:e0bb36a6d5f59fbf3c1a6e75e7c8bee29e67f06f5b48c0134407dde612eba5e2 \
|
||||
--hash=sha256:076d1603b5281488285718ce1a5ce78cf1027fe1e76adf9c548caf83c519b988
|
||||
mccabe==0.6.1 \
|
||||
--hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \
|
||||
--hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f
|
||||
more-itertools==8.4.0 \
|
||||
--hash=sha256:68c70cc7167bdf5c7c9d8f6954a7837089c6a36bf565383919bb595efb8a17e5 \
|
||||
--hash=sha256:b78134b2063dd214000685165d81c154522c3ee0a1c0d4d113c80361c234c5a2
|
||||
numpy==1.19.0 \
|
||||
--hash=sha256:63d971bb211ad3ca37b2adecdd5365f40f3b741a455beecba70fd0dde8b2a4cb \
|
||||
--hash=sha256:b6aaeadf1e4866ca0fdf7bb4eed25e521ae21a7947c59f78154b24fc7abbe1dd \
|
||||
--hash=sha256:13af0184177469192d80db9bd02619f6fa8b922f9f327e077d6f2a6acb1ce1c0 \
|
||||
--hash=sha256:356f96c9fbec59974a592452ab6a036cd6f180822a60b529a975c9467fcd5f23 \
|
||||
--hash=sha256:fa1fe75b4a9e18b66ae7f0b122543c42debcf800aaafa0212aaff3ad273c2596 \
|
||||
--hash=sha256:cbe326f6d364375a8e5a8ccb7e9cd73f4b2f6dc3b2ed205633a0db8243e2a96a \
|
||||
--hash=sha256:a2e3a39f43f0ce95204beb8fe0831199542ccab1e0c6e486a0b4947256215632 \
|
||||
--hash=sha256:7b852817800eb02e109ae4a9cef2beda8dd50d98b76b6cfb7b5c0099d27b52d4 \
|
||||
--hash=sha256:d97a86937cf9970453c3b62abb55a6475f173347b4cde7f8dcdb48c8e1b9952d \
|
||||
--hash=sha256:a86c962e211f37edd61d6e11bb4df7eddc4a519a38a856e20a6498c319efa6b0 \
|
||||
--hash=sha256:d34fbb98ad0d6b563b95de852a284074514331e6b9da0a9fc894fb1cdae7a79e \
|
||||
--hash=sha256:658624a11f6e1c252b2cd170d94bf28c8f9410acab9f2fd4369e11e1cd4e1aaf \
|
||||
--hash=sha256:4d054f013a1983551254e2379385e359884e5af105e3efe00418977d02f634a7 \
|
||||
--hash=sha256:26a45798ca2a4e168d00de75d4a524abf5907949231512f372b217ede3429e98 \
|
||||
--hash=sha256:3c40c827d36c6d1c3cf413694d7dc843d50997ebffbc7c87d888a203ed6403a7 \
|
||||
--hash=sha256:be62aeff8f2f054eff7725f502f6228298891fd648dc2630e03e44bf63e8cee0 \
|
||||
--hash=sha256:dd53d7c4a69e766e4900f29db5872f5824a06827d594427cf1a4aa542818b796 \
|
||||
--hash=sha256:30a59fb41bb6b8c465ab50d60a1b298d1cd7b85274e71f38af5a75d6c475d2d2 \
|
||||
--hash=sha256:df1889701e2dfd8ba4dc9b1a010f0a60950077fb5242bb92c8b5c7f1a6f2668a \
|
||||
--hash=sha256:33c623ef9ca5e19e05991f127c1be5aeb1ab5cdf30cb1c5cf3960752e58b599b \
|
||||
--hash=sha256:26f509450db547e4dfa3ec739419b31edad646d21fb8d0ed0734188b35ff6b27 \
|
||||
--hash=sha256:7b57f26e5e6ee2f14f960db46bd58ffdca25ca06dd997729b1b179fddd35f5a3 \
|
||||
--hash=sha256:a8705c5073fe3fcc297fb8e0b31aa794e05af6a329e81b7ca4ffecab7f2b95ef \
|
||||
--hash=sha256:c2edbb783c841e36ca0fa159f0ae97a88ce8137fb3a6cd82eae77349ba4b607b \
|
||||
--hash=sha256:8cde829f14bd38f6da7b2954be0f2837043e8b8d7a9110ec5e318ae6bf706610 \
|
||||
--hash=sha256:76766cc80d6128750075378d3bb7812cf146415bd29b588616f72c943c00d598
|
||||
openpyxl==3.0.4 \
|
||||
--hash=sha256:6e62f058d19b09b95d20ebfbfb04857ad08d0833190516c1660675f699c6186f \
|
||||
--hash=sha256:d88dd1480668019684c66cfff3e52a5de4ed41e9df5dd52e008cbf27af0dbf87
|
||||
packaging==20.4 \
|
||||
--hash=sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181 \
|
||||
--hash=sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8
|
||||
pandas==1.0.5 \
|
||||
--hash=sha256:faa42a78d1350b02a7d2f0dbe3c80791cf785663d6997891549d0f86dc49125e \
|
||||
--hash=sha256:9c31d52f1a7dd2bb4681d9f62646c7aa554f19e8e9addc17e8b1b20011d7522d \
|
||||
--hash=sha256:8778a5cc5a8437a561e3276b85367412e10ae9fff07db1eed986e427d9a674f8 \
|
||||
--hash=sha256:9871ef5ee17f388f1cb35f76dc6106d40cb8165c562d573470672f4cdefa59ef \
|
||||
--hash=sha256:35b670b0abcfed7cad76f2834041dcf7ae47fd9b22b63622d67cdc933d79f453 \
|
||||
--hash=sha256:c9410ce8a3dee77653bc0684cfa1535a7f9c291663bd7ad79e39f5ab58f67ab3 \
|
||||
--hash=sha256:02f1e8f71cd994ed7fcb9a35b6ddddeb4314822a0e09a9c5b2d278f8cb5d4096 \
|
||||
--hash=sha256:b3c4f93fcb6e97d993bf87cdd917883b7dab7d20c627699f360a8fb49e9e0b91 \
|
||||
--hash=sha256:5759edf0b686b6f25a5d4a447ea588983a33afc8a0081a0954184a4a87fd0dd7 \
|
||||
--hash=sha256:ab8173a8efe5418bbe50e43f321994ac6673afc5c7c4839014cf6401bbdd0705 \
|
||||
--hash=sha256:13f75fb18486759da3ff40f5345d9dd20e7d78f2a39c5884d013456cec9876f0 \
|
||||
--hash=sha256:5a7cf6044467c1356b2b49ef69e50bf4d231e773c3ca0558807cdba56b76820b \
|
||||
--hash=sha256:ae961f1f0e270f1e4e2273f6a539b2ea33248e0e3a11ffb479d757918a5e03a9 \
|
||||
--hash=sha256:f69e0f7b7c09f1f612b1f8f59e2df72faa8a6b41c5a436dde5b615aaf948f107 \
|
||||
--hash=sha256:4c73f373b0800eb3062ffd13d4a7a2a6d522792fa6eb204d67a4fad0a40f03dc \
|
||||
--hash=sha256:69c5d920a0b2a9838e677f78f4dde506b95ea8e4d30da25859db6469ded84fa8
|
||||
parsedatetime==2.6 \
|
||||
--hash=sha256:cb96edd7016872f58479e35879294258c71437195760746faffedb692aef000b \
|
||||
--hash=sha256:4cb368fbb18a0b7231f4d76119165451c8d2e35951455dfee97c62a87b04d455
|
||||
parso==0.7.0 \
|
||||
--hash=sha256:158c140fc04112dc45bca311633ae5033c2c2a7b732fa33d0955bad8152a8dd0 \
|
||||
--hash=sha256:908e9fae2144a076d72ae4e25539143d40b8e3eafbaeae03c1bfe226f4cdf12c
|
||||
pathspec==0.8.0 \
|
||||
--hash=sha256:7d91249d21749788d07a2d0f94147accd8f845507400749ea19c1ec9054a12b0 \
|
||||
--hash=sha256:da45173eb3a6f2a5a487efba21f050af2b41948be6ab52b6a1e3ff22bb8b7061
|
||||
pexpect==4.8.0; sys_platform != "win32" \
|
||||
--hash=sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937 \
|
||||
--hash=sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c
|
||||
pickleshare==0.7.5 \
|
||||
--hash=sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56 \
|
||||
--hash=sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca
|
||||
pluggy==0.13.1 \
|
||||
--hash=sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d \
|
||||
--hash=sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0
|
||||
prompt-toolkit==3.0.5 \
|
||||
--hash=sha256:df7e9e63aea609b1da3a65641ceaf5bc7d05e0a04de5bd45d05dbeffbabf9e04 \
|
||||
--hash=sha256:563d1a4140b63ff9dd587bda9557cffb2fe73650205ab6f4383092fb882e7dc8
|
||||
ptyprocess==0.6.0; sys_platform != "win32" \
|
||||
--hash=sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f \
|
||||
--hash=sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0
|
||||
py==1.9.0 \
|
||||
--hash=sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2 \
|
||||
--hash=sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342
|
||||
pycodestyle==2.6.0 \
|
||||
--hash=sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367 \
|
||||
--hash=sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e
|
||||
pycountry==19.8.18 \
|
||||
--hash=sha256:3c57aa40adcf293d59bebaffbe60d8c39976fba78d846a018dc0c2ec9c6cb3cb
|
||||
pyflakes==2.2.0 \
|
||||
--hash=sha256:0d94e0e05a19e57a99444b6ddcf9a6eb2e5c68d3ca1e98e90707af8152c90a92 \
|
||||
--hash=sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8
|
||||
pygments==2.6.1 \
|
||||
--hash=sha256:ff7a40b4860b727ab48fad6360eb351cc1b33cbf9b15a0f689ca5353e9463324 \
|
||||
--hash=sha256:647344a061c249a3b74e230c739f434d7ea4d8b1d5f3721bc0f3558049b38f44
|
||||
pyparsing==2.4.7 \
|
||||
--hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b \
|
||||
--hash=sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1
|
||||
pytest==5.4.3 \
|
||||
--hash=sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1 \
|
||||
--hash=sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8
|
||||
pytest-clarity==0.3.0a0 \
|
||||
--hash=sha256:5cc99e3d9b7969dfe17e5f6072d45a917c59d363b679686d3c958a1ded2e4dcf
|
||||
python-dateutil==2.8.1 \
|
||||
--hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \
|
||||
--hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a
|
||||
python-slugify==4.0.1 \
|
||||
--hash=sha256:69a517766e00c1268e5bbfc0d010a0a8508de0b18d30ad5a1ff357f8ae724270
|
||||
python-stdnum==1.13 \
|
||||
--hash=sha256:120f83d33fb8b8be1b282f20dd755a892d5facf84f54fa21f75bbd2633128160 \
|
||||
--hash=sha256:3d5d4430579cba88211d3ba4855a16faff235352a25a01d6ab70024686a75823
|
||||
pytimeparse==1.1.8 \
|
||||
--hash=sha256:04b7be6cc8bd9f5647a6325444926c3ac34ee6bc7e69da4367ba282f076036bd \
|
||||
--hash=sha256:e86136477be924d7e670646a98561957e8ca7308d44841e21f5ddea757556a0a
|
||||
pytz==2020.1 \
|
||||
--hash=sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed \
|
||||
--hash=sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048
|
||||
regex==2020.6.8 \
|
||||
--hash=sha256:fbff901c54c22425a5b809b914a3bfaf4b9570eee0e5ce8186ac71eb2025191c \
|
||||
--hash=sha256:112e34adf95e45158c597feea65d06a8124898bdeac975c9087fe71b572bd938 \
|
||||
--hash=sha256:92d8a043a4241a710c1cf7593f5577fbb832cf6c3a00ff3fc1ff2052aff5dd89 \
|
||||
--hash=sha256:bae83f2a56ab30d5353b47f9b2a33e4aac4de9401fb582b55c42b132a8ac3868 \
|
||||
--hash=sha256:b2ba0f78b3ef375114856cbdaa30559914d081c416b431f2437f83ce4f8b7f2f \
|
||||
--hash=sha256:95fa7726d073c87141f7bbfb04c284901f8328e2d430eeb71b8ffdd5742a5ded \
|
||||
--hash=sha256:e3cdc9423808f7e1bb9c2e0bdb1c9dc37b0607b30d646ff6faf0d4e41ee8fee3 \
|
||||
--hash=sha256:c78e66a922de1c95a208e4ec02e2e5cf0bb83a36ceececc10a72841e53fbf2bd \
|
||||
--hash=sha256:08997a37b221a3e27d68ffb601e45abfb0093d39ee770e4257bd2f5115e8cb0a \
|
||||
--hash=sha256:2f6f211633ee8d3f7706953e9d3edc7ce63a1d6aad0be5dcee1ece127eea13ae \
|
||||
--hash=sha256:55b4c25cbb3b29f8d5e63aeed27b49fa0f8476b0d4e1b3171d85db891938cc3a \
|
||||
--hash=sha256:89cda1a5d3e33ec9e231ece7307afc101b5217523d55ef4dc7fb2abd6de71ba3 \
|
||||
--hash=sha256:690f858d9a94d903cf5cada62ce069b5d93b313d7d05456dbcd99420856562d9 \
|
||||
--hash=sha256:1700419d8a18c26ff396b3b06ace315b5f2a6e780dad387e4c48717a12a22c29 \
|
||||
--hash=sha256:654cb773b2792e50151f0e22be0f2b6e1c3a04c5328ff1d9d59c0398d37ef610 \
|
||||
--hash=sha256:52e1b4bef02f4040b2fd547357a170fc1146e60ab310cdbdd098db86e929b387 \
|
||||
--hash=sha256:cf59bbf282b627130f5ba68b7fa3abdb96372b24b66bdf72a4920e8153fc7910 \
|
||||
--hash=sha256:5aaa5928b039ae440d775acea11d01e42ff26e1561c0ffcd3d805750973c6baf \
|
||||
--hash=sha256:97712e0d0af05febd8ab63d2ef0ab2d0cd9deddf4476f7aa153f76feef4b2754 \
|
||||
--hash=sha256:6ad8663c17db4c5ef438141f99e291c4d4edfeaacc0ce28b5bba2b0bf273d9b5 \
|
||||
--hash=sha256:e9b64e609d37438f7d6e68c2546d2cb8062f3adb27e6336bc129b51be20773ac
|
||||
requests==2.24.0 \
|
||||
--hash=sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898 \
|
||||
--hash=sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b
|
||||
requests-cache==0.5.2 \
|
||||
--hash=sha256:813023269686045f8e01e2289cc1e7e9ae5ab22ddd1e2849a9093ab3ab7270eb \
|
||||
--hash=sha256:81e13559baee64677a7d73b85498a5a8f0639e204517b5d05ff378e44a57831a
|
||||
six==1.15.0 \
|
||||
--hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \
|
||||
--hash=sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259
|
||||
sqlalchemy==1.3.18 \
|
||||
--hash=sha256:f11c2437fb5f812d020932119ba02d9e2bc29a6eca01a055233a8b449e3e1e7d \
|
||||
--hash=sha256:0ec575db1b54909750332c2e335c2bb11257883914a03bc5a3306a4488ecc772 \
|
||||
--hash=sha256:f57be5673e12763dd400fea568608700a63ce1c6bd5bdbc3cc3a2c5fdb045274 \
|
||||
--hash=sha256:8cac7bb373a5f1423e28de3fd5fc8063b9c8ffe8957dc1b1a59cb90453db6da1 \
|
||||
--hash=sha256:adad60eea2c4c2a1875eb6305a0b6e61a83163f8e233586a4d6a55221ef984fe \
|
||||
--hash=sha256:57aa843b783179ab72e863512e14bdcba186641daf69e4e3a5761d705dcc35b1 \
|
||||
--hash=sha256:621f58cd921cd71ba6215c42954ffaa8a918eecd8c535d97befa1a8acad986dd \
|
||||
--hash=sha256:fc728ece3d5c772c196fd338a99798e7efac7a04f9cb6416299a3638ee9a94cd \
|
||||
--hash=sha256:736d41cfebedecc6f159fc4ac0769dc89528a989471dc1d378ba07d29a60ba1c \
|
||||
--hash=sha256:427273b08efc16a85aa2b39892817e78e3ed074fcb89b2a51c4979bae7e7ba98 \
|
||||
--hash=sha256:cbe1324ef52ff26ccde2cb84b8593c8bf930069dfc06c1e616f1bfd4e47f48a3 \
|
||||
--hash=sha256:8fd452dc3d49b3cc54483e033de6c006c304432e6f84b74d7b2c68afa2569ae5 \
|
||||
--hash=sha256:e89e0d9e106f8a9180a4ca92a6adde60c58b1b0299e1b43bd5e0312f535fbf33 \
|
||||
--hash=sha256:6ac2558631a81b85e7fb7a44e5035347938b0a73f5fdc27a8566777d0792a6a4 \
|
||||
--hash=sha256:87fad64529cde4f1914a5b9c383628e1a8f9e3930304c09cf22c2ae118a1280e \
|
||||
--hash=sha256:e4624d7edb2576cd72bb83636cd71c8ce544d8e272f308bd80885056972ca299 \
|
||||
--hash=sha256:89494df7f93b1836cae210c42864b292f9b31eeabca4810193761990dc689cce \
|
||||
--hash=sha256:716754d0b5490bdcf68e1e4925edc02ac07209883314ad01a137642ddb2056f1 \
|
||||
--hash=sha256:50c4ee32f0e1581828843267d8de35c3298e86ceecd5e9017dc45788be70a864 \
|
||||
--hash=sha256:d98bc827a1293ae767c8f2f18be3bb5151fd37ddcd7da2a5f9581baeeb7a3fa1 \
|
||||
--hash=sha256:0942a3a0df3f6131580eddd26d99071b48cfe5aaf3eab2783076fbc5a1c1882e \
|
||||
--hash=sha256:16593fd748944726540cd20f7e83afec816c2ac96b082e26ae226e8f7e9688cf \
|
||||
--hash=sha256:c26f95e7609b821b5f08a72dab929baa0d685406b953efd7c89423a511d5c413 \
|
||||
--hash=sha256:512a85c3c8c3995cc91af3e90f38f460da5d3cade8dc3a229c8e0879037547c9 \
|
||||
--hash=sha256:d05c4adae06bd0c7f696ae3ec8d993ed8ffcc4e11a76b1b35a5af8a099bd2284 \
|
||||
--hash=sha256:109581ccc8915001e8037b73c29590e78ce74be49ca0a3630a23831f9e3ed6c7 \
|
||||
--hash=sha256:8619b86cb68b185a778635be5b3e6018623c0761dde4df2f112896424aa27bd8 \
|
||||
--hash=sha256:da2fb75f64792c1fc64c82313a00c728a7c301efe6a60b7a9fe35b16b4368ce7
|
||||
termcolor==1.1.0 \
|
||||
--hash=sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b
|
||||
text-unidecode==1.3 \
|
||||
--hash=sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93 \
|
||||
--hash=sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8
|
||||
toml==0.10.1 \
|
||||
--hash=sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88 \
|
||||
--hash=sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f
|
||||
traitlets==4.3.3 \
|
||||
--hash=sha256:70b4c6a1d9019d7b4f6846832288f86998aa3b9207c6821f3578a6a6a467fe44 \
|
||||
--hash=sha256:d023ee369ddd2763310e4c3eae1ff649689440d4ae59d7485eb4cfbbe3e359f7
|
||||
typed-ast==1.4.1 \
|
||||
--hash=sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3 \
|
||||
--hash=sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb \
|
||||
--hash=sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919 \
|
||||
--hash=sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01 \
|
||||
--hash=sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75 \
|
||||
--hash=sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652 \
|
||||
--hash=sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7 \
|
||||
--hash=sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1 \
|
||||
--hash=sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa \
|
||||
--hash=sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614 \
|
||||
--hash=sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41 \
|
||||
--hash=sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b \
|
||||
--hash=sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe \
|
||||
--hash=sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355 \
|
||||
--hash=sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6 \
|
||||
--hash=sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907 \
|
||||
--hash=sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d \
|
||||
--hash=sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c \
|
||||
--hash=sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4 \
|
||||
--hash=sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34 \
|
||||
--hash=sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b
|
||||
urllib3==1.25.9 \
|
||||
--hash=sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115 \
|
||||
--hash=sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527
|
||||
wcwidth==0.2.5 \
|
||||
--hash=sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784 \
|
||||
--hash=sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83
|
||||
xlrd==1.2.0 \
|
||||
--hash=sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde \
|
||||
--hash=sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2
|
||||
|
@ -1,17 +1,81 @@
|
||||
-i https://pypi.org/simple
|
||||
-e .
|
||||
certifi==2019.11.28
|
||||
chardet==3.0.4
|
||||
idna==2.8
|
||||
langid==1.1.6
|
||||
numpy==1.18.1
|
||||
pandas==1.0.0rc0
|
||||
pycountry==19.8.18
|
||||
python-dateutil==2.8.1
|
||||
python-stdnum==1.12
|
||||
pytz==2019.3
|
||||
requests-cache==0.5.2
|
||||
requests==2.22.0
|
||||
six==1.13.0
|
||||
urllib3==1.25.7
|
||||
xlrd==1.2.0
|
||||
certifi==2020.6.20 \
|
||||
--hash=sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41 \
|
||||
--hash=sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3
|
||||
chardet==3.0.4 \
|
||||
--hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691 \
|
||||
--hash=sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae
|
||||
idna==2.10 \
|
||||
--hash=sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0 \
|
||||
--hash=sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6
|
||||
langid==1.1.6 \
|
||||
--hash=sha256:044bcae1912dab85c33d8e98f2811b8f4ff1213e5e9a9e9510137b84da2cb293
|
||||
numpy==1.19.0 \
|
||||
--hash=sha256:63d971bb211ad3ca37b2adecdd5365f40f3b741a455beecba70fd0dde8b2a4cb \
|
||||
--hash=sha256:b6aaeadf1e4866ca0fdf7bb4eed25e521ae21a7947c59f78154b24fc7abbe1dd \
|
||||
--hash=sha256:13af0184177469192d80db9bd02619f6fa8b922f9f327e077d6f2a6acb1ce1c0 \
|
||||
--hash=sha256:356f96c9fbec59974a592452ab6a036cd6f180822a60b529a975c9467fcd5f23 \
|
||||
--hash=sha256:fa1fe75b4a9e18b66ae7f0b122543c42debcf800aaafa0212aaff3ad273c2596 \
|
||||
--hash=sha256:cbe326f6d364375a8e5a8ccb7e9cd73f4b2f6dc3b2ed205633a0db8243e2a96a \
|
||||
--hash=sha256:a2e3a39f43f0ce95204beb8fe0831199542ccab1e0c6e486a0b4947256215632 \
|
||||
--hash=sha256:7b852817800eb02e109ae4a9cef2beda8dd50d98b76b6cfb7b5c0099d27b52d4 \
|
||||
--hash=sha256:d97a86937cf9970453c3b62abb55a6475f173347b4cde7f8dcdb48c8e1b9952d \
|
||||
--hash=sha256:a86c962e211f37edd61d6e11bb4df7eddc4a519a38a856e20a6498c319efa6b0 \
|
||||
--hash=sha256:d34fbb98ad0d6b563b95de852a284074514331e6b9da0a9fc894fb1cdae7a79e \
|
||||
--hash=sha256:658624a11f6e1c252b2cd170d94bf28c8f9410acab9f2fd4369e11e1cd4e1aaf \
|
||||
--hash=sha256:4d054f013a1983551254e2379385e359884e5af105e3efe00418977d02f634a7 \
|
||||
--hash=sha256:26a45798ca2a4e168d00de75d4a524abf5907949231512f372b217ede3429e98 \
|
||||
--hash=sha256:3c40c827d36c6d1c3cf413694d7dc843d50997ebffbc7c87d888a203ed6403a7 \
|
||||
--hash=sha256:be62aeff8f2f054eff7725f502f6228298891fd648dc2630e03e44bf63e8cee0 \
|
||||
--hash=sha256:dd53d7c4a69e766e4900f29db5872f5824a06827d594427cf1a4aa542818b796 \
|
||||
--hash=sha256:30a59fb41bb6b8c465ab50d60a1b298d1cd7b85274e71f38af5a75d6c475d2d2 \
|
||||
--hash=sha256:df1889701e2dfd8ba4dc9b1a010f0a60950077fb5242bb92c8b5c7f1a6f2668a \
|
||||
--hash=sha256:33c623ef9ca5e19e05991f127c1be5aeb1ab5cdf30cb1c5cf3960752e58b599b \
|
||||
--hash=sha256:26f509450db547e4dfa3ec739419b31edad646d21fb8d0ed0734188b35ff6b27 \
|
||||
--hash=sha256:7b57f26e5e6ee2f14f960db46bd58ffdca25ca06dd997729b1b179fddd35f5a3 \
|
||||
--hash=sha256:a8705c5073fe3fcc297fb8e0b31aa794e05af6a329e81b7ca4ffecab7f2b95ef \
|
||||
--hash=sha256:c2edbb783c841e36ca0fa159f0ae97a88ce8137fb3a6cd82eae77349ba4b607b \
|
||||
--hash=sha256:8cde829f14bd38f6da7b2954be0f2837043e8b8d7a9110ec5e318ae6bf706610 \
|
||||
--hash=sha256:76766cc80d6128750075378d3bb7812cf146415bd29b588616f72c943c00d598
|
||||
pandas==1.0.5 \
|
||||
--hash=sha256:faa42a78d1350b02a7d2f0dbe3c80791cf785663d6997891549d0f86dc49125e \
|
||||
--hash=sha256:9c31d52f1a7dd2bb4681d9f62646c7aa554f19e8e9addc17e8b1b20011d7522d \
|
||||
--hash=sha256:8778a5cc5a8437a561e3276b85367412e10ae9fff07db1eed986e427d9a674f8 \
|
||||
--hash=sha256:9871ef5ee17f388f1cb35f76dc6106d40cb8165c562d573470672f4cdefa59ef \
|
||||
--hash=sha256:35b670b0abcfed7cad76f2834041dcf7ae47fd9b22b63622d67cdc933d79f453 \
|
||||
--hash=sha256:c9410ce8a3dee77653bc0684cfa1535a7f9c291663bd7ad79e39f5ab58f67ab3 \
|
||||
--hash=sha256:02f1e8f71cd994ed7fcb9a35b6ddddeb4314822a0e09a9c5b2d278f8cb5d4096 \
|
||||
--hash=sha256:b3c4f93fcb6e97d993bf87cdd917883b7dab7d20c627699f360a8fb49e9e0b91 \
|
||||
--hash=sha256:5759edf0b686b6f25a5d4a447ea588983a33afc8a0081a0954184a4a87fd0dd7 \
|
||||
--hash=sha256:ab8173a8efe5418bbe50e43f321994ac6673afc5c7c4839014cf6401bbdd0705 \
|
||||
--hash=sha256:13f75fb18486759da3ff40f5345d9dd20e7d78f2a39c5884d013456cec9876f0 \
|
||||
--hash=sha256:5a7cf6044467c1356b2b49ef69e50bf4d231e773c3ca0558807cdba56b76820b \
|
||||
--hash=sha256:ae961f1f0e270f1e4e2273f6a539b2ea33248e0e3a11ffb479d757918a5e03a9 \
|
||||
--hash=sha256:f69e0f7b7c09f1f612b1f8f59e2df72faa8a6b41c5a436dde5b615aaf948f107 \
|
||||
--hash=sha256:4c73f373b0800eb3062ffd13d4a7a2a6d522792fa6eb204d67a4fad0a40f03dc \
|
||||
--hash=sha256:69c5d920a0b2a9838e677f78f4dde506b95ea8e4d30da25859db6469ded84fa8
|
||||
pycountry==19.8.18 \
|
||||
--hash=sha256:3c57aa40adcf293d59bebaffbe60d8c39976fba78d846a018dc0c2ec9c6cb3cb
|
||||
python-dateutil==2.8.1 \
|
||||
--hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \
|
||||
--hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a
|
||||
python-stdnum==1.13 \
|
||||
--hash=sha256:120f83d33fb8b8be1b282f20dd755a892d5facf84f54fa21f75bbd2633128160 \
|
||||
--hash=sha256:3d5d4430579cba88211d3ba4855a16faff235352a25a01d6ab70024686a75823
|
||||
pytz==2020.1 \
|
||||
--hash=sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed \
|
||||
--hash=sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048
|
||||
requests==2.24.0 \
|
||||
--hash=sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898 \
|
||||
--hash=sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b
|
||||
requests-cache==0.5.2 \
|
||||
--hash=sha256:813023269686045f8e01e2289cc1e7e9ae5ab22ddd1e2849a9093ab3ab7270eb \
|
||||
--hash=sha256:81e13559baee64677a7d73b85498a5a8f0639e204517b5d05ff378e44a57831a
|
||||
six==1.15.0 \
|
||||
--hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \
|
||||
--hash=sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259
|
||||
urllib3==1.25.9 \
|
||||
--hash=sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115 \
|
||||
--hash=sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527
|
||||
xlrd==1.2.0 \
|
||||
--hash=sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde \
|
||||
--hash=sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2
|
||||
|
4
setup.py
4
setup.py
@ -14,7 +14,7 @@ install_requires = [
|
||||
|
||||
setuptools.setup(
|
||||
name="csv-metadata-quality",
|
||||
version="0.4.0",
|
||||
version="0.4.2",
|
||||
author="Alan Orth",
|
||||
author_email="aorth@mjanja.ch",
|
||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
||||
@ -23,6 +23,8 @@ setuptools.setup(
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/alanorth/csv-metadata-quality",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3.6",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||
"Operating System :: OS Independent",
|
||||
|
@ -51,10 +51,12 @@ def test_check_invalid_separators(capsys):
|
||||
|
||||
value = "Alan|Orth"
|
||||
|
||||
check.separators(value)
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
check.separators(value, field_name)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == f"Invalid multi-value separator: {value}\n"
|
||||
assert captured.out == f"Invalid multi-value separator ({field_name}): {value}\n"
|
||||
|
||||
|
||||
def test_check_valid_separators():
|
||||
@ -62,7 +64,9 @@ def test_check_valid_separators():
|
||||
|
||||
value = "Alan||Orth"
|
||||
|
||||
result = check.separators(value)
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
result = check.separators(value, field_name)
|
||||
|
||||
assert result == value
|
||||
|
||||
|
@ -6,7 +6,9 @@ def test_fix_leading_whitespace():
|
||||
|
||||
value = " Alan"
|
||||
|
||||
assert fix.whitespace(value) == "Alan"
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
assert fix.whitespace(value, field_name) == "Alan"
|
||||
|
||||
|
||||
def test_fix_trailing_whitespace():
|
||||
@ -14,7 +16,9 @@ def test_fix_trailing_whitespace():
|
||||
|
||||
value = "Alan "
|
||||
|
||||
assert fix.whitespace(value) == "Alan"
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
assert fix.whitespace(value, field_name) == "Alan"
|
||||
|
||||
|
||||
def test_fix_excessive_whitespace():
|
||||
@ -22,7 +26,9 @@ def test_fix_excessive_whitespace():
|
||||
|
||||
value = "Alan Orth"
|
||||
|
||||
assert fix.whitespace(value) == "Alan Orth"
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
assert fix.whitespace(value, field_name) == "Alan Orth"
|
||||
|
||||
|
||||
def test_fix_invalid_separators():
|
||||
@ -30,7 +36,9 @@ def test_fix_invalid_separators():
|
||||
|
||||
value = "Alan|Orth"
|
||||
|
||||
assert fix.separators(value) == "Alan||Orth"
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
assert fix.separators(value, field_name) == "Alan||Orth"
|
||||
|
||||
|
||||
def test_fix_unnecessary_unicode():
|
||||
@ -46,7 +54,9 @@ def test_fix_duplicates():
|
||||
|
||||
value = "Kenya||Kenya"
|
||||
|
||||
assert fix.duplicates(value) == "Kenya"
|
||||
field_name = "dc.contributor.author"
|
||||
|
||||
assert fix.duplicates(value, field_name) == "Kenya"
|
||||
|
||||
|
||||
def test_fix_newlines():
|
||||
|
Reference in New Issue
Block a user