mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-11-09 17:09:09 +01:00
Compare commits
3 Commits
renovate/l
...
renovate/p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b457ad9c8c | ||
|
d34e8aa6f1
|
|||
|
74edd8db22
|
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
## Unreleased
|
## Unreleased
|
||||||
### Changed
|
### Changed
|
||||||
- New AGROVOC REST API URL
|
- New AGROVOC REST API URL
|
||||||
|
- Use urllib from Python stdlib instead of manual replacement for unquoting URLs
|
||||||
|
|
||||||
## [0.7.0] - 2025-01-31
|
## [0.7.0] - 2025-01-31
|
||||||
### Added
|
### Added
|
||||||
|
|||||||
@@ -45,10 +45,11 @@ build-backend = "uv_build"
|
|||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
dev = [
|
dev = [
|
||||||
"pytest~=8.3",
|
"pytest~=9.0",
|
||||||
"isort~=6.0",
|
"isort~=6.0",
|
||||||
"csvkit~=2.0",
|
"csvkit~=2.0",
|
||||||
"ipython~=8.31",
|
"ipython~=8.31",
|
||||||
|
"black~=25.9",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.isort]
|
[tool.isort]
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from unicodedata import normalize
|
from unicodedata import normalize
|
||||||
|
from urllib.parse import unquote
|
||||||
|
|
||||||
import country_converter as coco
|
import country_converter as coco
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@@ -451,12 +452,8 @@ def normalize_dois(field):
|
|||||||
if match:
|
if match:
|
||||||
new_value = re.sub(pattern, "doi.org", new_value)
|
new_value = re.sub(pattern, "doi.org", new_value)
|
||||||
|
|
||||||
# Convert erroneous %2f to /
|
# Replace %xx escapes with their single-character equivalent.
|
||||||
pattern = re.compile("%2f")
|
new_value = unquote(new_value)
|
||||||
match = re.findall(pattern, new_value)
|
|
||||||
|
|
||||||
if match:
|
|
||||||
new_value = re.sub(pattern, "/", new_value)
|
|
||||||
|
|
||||||
# Replace values like doi: 10.11648/j.jps.20140201.14
|
# Replace values like doi: 10.11648/j.jps.20140201.14
|
||||||
pattern = re.compile(r"^doi: 10\.")
|
pattern = re.compile(r"^doi: 10\.")
|
||||||
|
|||||||
Reference in New Issue
Block a user