mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-05-18 18:22:17 +02:00
Expand check/fix for multi-value separators
I just came across some metadata that had unnecessary multi-value separators at the end of a field, causing a blank value to be used. For example: "Kenya||Tanzania||"
This commit is contained in:
@ -57,7 +57,11 @@ def isbn(field):
|
||||
|
||||
|
||||
def separators(field, field_name):
|
||||
"""Check for invalid multi-value separators (ie "|" or "|||").
|
||||
"""Check for invalid and unnecessary multi-value separators, for example:
|
||||
|
||||
value|value
|
||||
value|||value
|
||||
value||value||
|
||||
|
||||
Prints the field with the invalid multi-value separator.
|
||||
"""
|
||||
@ -70,10 +74,16 @@ def separators(field, field_name):
|
||||
|
||||
# Try to split multi-value field on "||" separator
|
||||
for value in field.split("||"):
|
||||
# Check if the current value is blank
|
||||
if value == "":
|
||||
print(f"Unnecessary multi-value separator ({field_name}): {field}")
|
||||
|
||||
continue
|
||||
|
||||
# After splitting, see if there are any remaining "|" characters
|
||||
match = re.findall(r"^.*?\|.*$", value)
|
||||
|
||||
# Check if there was a match
|
||||
if match:
|
||||
print(f"Invalid multi-value separator ({field_name}): {field}")
|
||||
|
||||
|
Reference in New Issue
Block a user