1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-09 14:46:00 +02:00

Add fix for duplicate metadata values

This commit is contained in:
2019-07-29 18:05:03 +03:00
parent d7888d59a8
commit 1e444cf040
4 changed files with 38 additions and 1 deletions

View File

@ -40,6 +40,9 @@ def main(argv):
# Run whitespace fix again after fixing invalid separators
df[column] = df[column].apply(fix.whitespace)
# Fix: duplicate metadata values
df[column] = df[column].apply(fix.duplicates)
# Check: invalid ISSN
match = re.match(r'^.*?issn.*$', column)
if match is not None: