1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-09 22:56:01 +02:00

Normalize DOIs with %2f

These seem to be incorrectly URL encoded.
This commit is contained in:
2024-06-25 11:54:09 +03:00
parent ae38a826ec
commit 92ff0ee51b
2 changed files with 8 additions and 0 deletions

View File

@ -451,6 +451,13 @@ def normalize_dois(field):
if match:
new_value = re.sub(pattern, "doi.org", new_value)
# Convert erroneous %2f to /
pattern = re.compile("%2f")
match = re.findall(pattern, new_value)
if match:
new_value = re.sub(pattern, "/", new_value)
# Replace values like doi: 10.11648/j.jps.20140201.14
pattern = re.compile(r"^doi: 10\.")
match = re.findall(pattern, new_value)