1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-11-09 17:09:09 +01:00

3 Commits

Author SHA1 Message Date
renovate[bot]
b457ad9c8c chore(deps): update dependency pytest to v9 2025-11-08 17:39:39 +00:00
d34e8aa6f1 Add black back
Rye had ruff built in for formatting, but now with uv we need to add
black ourselves.
2025-10-23 10:40:27 +03:00
74edd8db22 src: use urllib from Python stdlib for unquoting
Instead of manually replacing urlencoded characters.
2025-10-23 10:40:05 +03:00
4 changed files with 405 additions and 501 deletions

View File

@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased
### Changed
- New AGROVOC REST API URL
- Use urllib from Python stdlib instead of manual replacement for unquoting URLs
## [0.7.0] - 2025-01-31
### Added

View File

@@ -45,10 +45,11 @@ build-backend = "uv_build"
[dependency-groups]
dev = [
"pytest~=8.3",
"pytest~=9.0",
"isort~=6.0",
"csvkit~=2.0",
"ipython~=8.31",
"black~=25.9",
]
[tool.isort]

View File

@@ -3,6 +3,7 @@
import logging
import re
from unicodedata import normalize
from urllib.parse import unquote
import country_converter as coco
import pandas as pd
@@ -451,12 +452,8 @@ def normalize_dois(field):
if match:
new_value = re.sub(pattern, "doi.org", new_value)
# Convert erroneous %2f to /
pattern = re.compile("%2f")
match = re.findall(pattern, new_value)
if match:
new_value = re.sub(pattern, "/", new_value)
# Replace %xx escapes with their single-character equivalent.
new_value = unquote(new_value)
# Replace values like doi: 10.11648/j.jps.20140201.14
pattern = re.compile(r"^doi: 10\.")

893
uv.lock generated

File diff suppressed because it is too large Load Diff