1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-10 07:06:00 +02:00

Add fix for normalizing DOIs

This commit is contained in:
2024-04-25 12:49:19 +03:00
parent 736948ed2c
commit 5be2195325
6 changed files with 91 additions and 1 deletions

View File

@ -141,6 +141,11 @@ def run(argv):
# Fix: unnecessary Unicode
df[column] = df[column].apply(fix.unnecessary_unicode)
# Fix: normalize DOIs
match = re.match(r"^.*?identifier\.doi.*$", column)
if match is not None:
df[column] = df[column].apply(fix.normalize_dois)
# Fix: invalid and unnecessary multi-value separators. Skip the title
# and abstract fields because "|" is used to indicate something like
# a subtitle.