1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-18 18:22:17 +02:00

Expand check/fix for multi-value separators

I just came across some metadata that had unnecessary multi-value
separators at the end of a field, causing a blank value to be used.

For example: "Kenya||Tanzania||"
This commit is contained in:
2021-01-03 15:30:03 +02:00
parent c26ad83534
commit 0dc66c5c4e
4 changed files with 30 additions and 5 deletions
README.md
csv_metadata_quality

@ -57,7 +57,11 @@ def isbn(field):
def separators(field, field_name):
"""Check for invalid multi-value separators (ie "|" or "|||").
"""Check for invalid and unnecessary multi-value separators, for example:
value|value
value|||value
value||value||
Prints the field with the invalid multi-value separator.
"""
@ -70,10 +74,16 @@ def separators(field, field_name):
# Try to split multi-value field on "||" separator
for value in field.split("||"):
# Check if the current value is blank
if value == "":
print(f"Unnecessary multi-value separator ({field_name}): {field}")
continue
# After splitting, see if there are any remaining "|" characters
match = re.findall(r"^.*?\|.*$", value)
# Check if there was a match
if match:
print(f"Invalid multi-value separator ({field_name}): {field}")