mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-27 16:18:19 +01:00
Compare commits
No commits in common. "18780023910f12af323b9b765eba84aad27c2fe6" and "1fa81f755850b21c2abb5d6a24724067b34c4d01" have entirely different histories.
1878002391
...
1fa81f7558
@ -10,7 +10,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
`dcterms.bibliographicCitation` fields
|
||||
- Fixed regex so we run the comma space fix on `dcterms.bibliographicCitation`
|
||||
fields
|
||||
- Don't crash the country/region checker/fixer when a title field is missing
|
||||
|
||||
### Changed
|
||||
- Don't run newline fix on description fields
|
||||
|
@ -73,8 +73,7 @@ def run(argv):
|
||||
# set the signal handler for SIGINT (^C)
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
# Read all fields as strings so dates don't get converted from 1998 to 1998.0
|
||||
df = pd.read_csv(args.input_file, dtype_backend="pyarrow", dtype="str")
|
||||
df = pd.read_csv(args.input_file, dtype_backend="pyarrow")
|
||||
|
||||
# Check if the user requested to skip any fields
|
||||
if args.exclude_fields:
|
||||
|
@ -563,13 +563,8 @@ def countries_match_regions(row, exclude):
|
||||
un_region = cc.convert(names=country, to="UNRegion")
|
||||
|
||||
if un_region != "not found" and un_region not in regions:
|
||||
try:
|
||||
print(
|
||||
f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}{row[title_column_name]}"
|
||||
)
|
||||
except KeyError:
|
||||
print(
|
||||
f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}<title field not present>"
|
||||
)
|
||||
print(
|
||||
f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}{row[title_column_name]}"
|
||||
)
|
||||
|
||||
return
|
||||
|
@ -370,17 +370,9 @@ def countries_match_regions(row, exclude):
|
||||
# it doesn't already exist in regions.
|
||||
if un_region != "not found" and un_region not in regions:
|
||||
if un_region not in missing_regions:
|
||||
try:
|
||||
print(
|
||||
f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}{row[title_column_name]}"
|
||||
)
|
||||
except KeyError:
|
||||
# If there is no title column in the CSV we will print
|
||||
# the fix without the title instead of crashing.
|
||||
print(
|
||||
f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}<title field not present>"
|
||||
)
|
||||
|
||||
print(
|
||||
f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}{row[title_column_name]}"
|
||||
)
|
||||
missing_regions.append(un_region)
|
||||
|
||||
if len(missing_regions) > 0:
|
||||
|
54
poetry.lock
generated
54
poetry.lock
generated
@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "agate"
|
||||
@ -256,28 +256,18 @@ doc = ["gitpython", "numpydoc", "sphinx"]
|
||||
|
||||
[[package]]
|
||||
name = "cattrs"
|
||||
version = "23.1.2"
|
||||
version = "22.2.0"
|
||||
description = "Composable complex class support for attrs and dataclasses."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "cattrs-23.1.2-py3-none-any.whl", hash = "sha256:b2bb14311ac17bed0d58785e5a60f022e5431aca3932e3fc5cc8ed8639de50a4"},
|
||||
{file = "cattrs-23.1.2.tar.gz", hash = "sha256:db1c821b8c537382b2c7c66678c3790091ca0275ac486c76f3c8f3920e83c657"},
|
||||
{file = "cattrs-22.2.0-py3-none-any.whl", hash = "sha256:bc12b1f0d000b9f9bee83335887d532a1d3e99a833d1bf0882151c97d3e68c21"},
|
||||
{file = "cattrs-22.2.0.tar.gz", hash = "sha256:f0eed5642399423cf656e7b66ce92cdc5b963ecafd041d1b24d136fdde7acf6d"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
attrs = ">=20"
|
||||
exceptiongroup = {version = "*", markers = "python_version < \"3.11\""}
|
||||
typing_extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""}
|
||||
|
||||
[package.extras]
|
||||
bson = ["pymongo (>=4.2.0,<5.0.0)"]
|
||||
cbor2 = ["cbor2 (>=5.4.6,<6.0.0)"]
|
||||
msgpack = ["msgpack (>=1.0.2,<2.0.0)"]
|
||||
orjson = ["orjson (>=3.5.2,<4.0.0)"]
|
||||
pyyaml = ["PyYAML (>=6.0,<7.0)"]
|
||||
tomlkit = ["tomlkit (>=0.11.4,<0.12.0)"]
|
||||
ujson = ["ujson (>=5.4.0,<6.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
@ -1131,18 +1121,18 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "platformdirs"
|
||||
version = "3.5.3"
|
||||
version = "3.5.1"
|
||||
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "platformdirs-3.5.3-py3-none-any.whl", hash = "sha256:0ade98a4895e87dc51d47151f7d2ec290365a585151d97b4d8d6312ed6132fed"},
|
||||
{file = "platformdirs-3.5.3.tar.gz", hash = "sha256:e48fabd87db8f3a7df7150a4a5ea22c546ee8bc39bc2473244730d4b56d2cc4e"},
|
||||
{file = "platformdirs-3.5.1-py3-none-any.whl", hash = "sha256:e2378146f1964972c03c085bb5662ae80b2b8c06226c54b2ff4aa9483e8a13a5"},
|
||||
{file = "platformdirs-3.5.1.tar.gz", hash = "sha256:412dae91f52a6f84830f39a8078cecd0e866cb72294a5c66808e74d5e88d251f"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
|
||||
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)"]
|
||||
docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.2.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
|
||||
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
@ -1299,13 +1289,13 @@ plugins = ["importlib-metadata"]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "7.3.2"
|
||||
version = "7.3.1"
|
||||
description = "pytest: simple powerful testing with Python"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "pytest-7.3.2-py3-none-any.whl", hash = "sha256:cdcbd012c9312258922f8cd3f1b62a6580fdced17db6014896053d47cddf9295"},
|
||||
{file = "pytest-7.3.2.tar.gz", hash = "sha256:ee990a3cc55ba808b80795a79944756f315c67c12b56abd3ac993a7b8c17030b"},
|
||||
{file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"},
|
||||
{file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@ -1317,7 +1307,7 @@ pluggy = ">=0.12,<2.0"
|
||||
tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
|
||||
|
||||
[package.extras]
|
||||
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
|
||||
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-clarity"
|
||||
@ -1456,13 +1446,13 @@ yaml = ["pyyaml (>=5.4)"]
|
||||
|
||||
[[package]]
|
||||
name = "rich"
|
||||
version = "13.4.1"
|
||||
version = "13.3.5"
|
||||
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0"
|
||||
files = [
|
||||
{file = "rich-13.4.1-py3-none-any.whl", hash = "sha256:d204aadb50b936bf6b1a695385429d192bc1fdaf3e8b907e8e26f4c4e4b5bf75"},
|
||||
{file = "rich-13.4.1.tar.gz", hash = "sha256:76f6b65ea7e5c5d924ba80e322231d7cb5b5981aa60bfc1e694f1bc097fe6fe1"},
|
||||
{file = "rich-13.3.5-py3-none-any.whl", hash = "sha256:69cdf53799e63f38b95b9bf9c875f8c90e78dd62b2f00c13a911c7a3b9fa4704"},
|
||||
{file = "rich-13.3.5.tar.gz", hash = "sha256:2d11b9b8dd03868f09b4fffadc84a6a8cda574e40dc90821bd845720ebb8e89c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@ -1615,13 +1605,13 @@ test = ["argcomplete (>=2.0)", "pre-commit", "pytest", "pytest-mock"]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.6.3"
|
||||
version = "4.6.2"
|
||||
description = "Backported and Experimental Type Hints for Python 3.7+"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "typing_extensions-4.6.3-py3-none-any.whl", hash = "sha256:88a4153d8505aabbb4e13aacb7c486c2b4a33ca3b3f807914a9b4c844c471c26"},
|
||||
{file = "typing_extensions-4.6.3.tar.gz", hash = "sha256:d91d5919357fe7f681a9f2b5b4cb2a5f1ef0a1e9f59c4d8ff0d3491e05c0ffd5"},
|
||||
{file = "typing_extensions-4.6.2-py3-none-any.whl", hash = "sha256:3a8b36f13dd5fdc5d1b16fe317f5668545de77fa0b8e02006381fd49d731ab98"},
|
||||
{file = "typing_extensions-4.6.2.tar.gz", hash = "sha256:06006244c70ac8ee83fa8282cb188f697b8db25bc8b4df07be1873c43897060c"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1651,13 +1641,13 @@ six = "*"
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "2.0.3"
|
||||
version = "2.0.2"
|
||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "urllib3-2.0.3-py3-none-any.whl", hash = "sha256:48e7fafa40319d358848e1bc6809b208340fafe2096f1725d05d67443d0483d1"},
|
||||
{file = "urllib3-2.0.3.tar.gz", hash = "sha256:bee28b5e56addb8226c96f7f13ac28cb4c301dd5ea8a6ca179c0b9835e032825"},
|
||||
{file = "urllib3-2.0.2-py3-none-any.whl", hash = "sha256:d055c2f9d38dc53c808f6fdc8eab7360b6fdbbde02340ed25cfbcd817c62469e"},
|
||||
{file = "urllib3-2.0.2.tar.gz", hash = "sha256:61717a1095d7e155cdb737ac7bb2f4324a858a1e2e6466f6d03ff630ca68d3cc"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
|
Loading…
Reference in New Issue
Block a user