1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-08 06:06:00 +02:00

Remove checks for invalid multi-value separators

Now that I no longer treat the fix for these as "unsafe" I don't a
ctually need to check for them—I can just fix them when I see them.
This commit is contained in:
2021-03-14 21:01:21 +02:00
parent 3656e9f976
commit 10612cf891
3 changed files with 0 additions and 83 deletions

View File

@ -104,9 +104,6 @@ def run(argv):
# Fix: unnecessary Unicode
df[column] = df[column].apply(fix.unnecessary_unicode)
# Check: invalid and unnecessary multi-value separators
df[column] = df[column].apply(check.separators, field_name=column)
# Check: suspicious characters
df[column] = df[column].apply(check.suspicious_characters, field_name=column)

View File

@ -58,42 +58,6 @@ def isbn(field):
return field
def separators(field, field_name):
"""Check for invalid and unnecessary multi-value separators, for example:
value|value
value|||value
value||value||
Prints the field with the invalid multi-value separator.
"""
# Skip fields with missing values
if pd.isna(field):
return
# Try to split multi-value field on "||" separator
for value in field.split("||"):
# Check if the current value is blank
if value == "":
print(
f"{Fore.RED}Unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}"
)
continue
# After splitting, see if there are any remaining "|" characters
match = re.findall(r"^.*?\|.*$", value)
# Check if there was a match
if match:
print(
f"{Fore.RED}Invalid multi-value separator ({field_name}): {Fore.RESET}{field}"
)
return field
def date(field, field_name):
"""Check if a date is valid.