mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-22 05:45:02 +01:00
csv_metadata_quality/fix.py: Harmonize language in fix output
We should always say if we're removing or replacing something.
This commit is contained in:
parent
85ae16d9b7
commit
e55380b4d5
@ -26,7 +26,7 @@ def whitespace(field):
|
|||||||
match = re.findall(pattern, value)
|
match = re.findall(pattern, value)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
print(f"Excessive whitespace: {value}")
|
print(f"Removing excessive whitespace: {value}")
|
||||||
value = re.sub(pattern, " ", value)
|
value = re.sub(pattern, " ", value)
|
||||||
|
|
||||||
# Save cleaned value
|
# Save cleaned value
|
||||||
@ -140,7 +140,7 @@ def duplicates(field):
|
|||||||
if value not in new_values:
|
if value not in new_values:
|
||||||
new_values.append(value)
|
new_values.append(value)
|
||||||
else:
|
else:
|
||||||
print(f"Dropping duplicate value: {value}")
|
print(f"Removing duplicate value: {value}")
|
||||||
|
|
||||||
# Create a new field consisting of all values joined with "||"
|
# Create a new field consisting of all values joined with "||"
|
||||||
new_field = "||".join(new_values)
|
new_field = "||".join(new_values)
|
||||||
|
Loading…
Reference in New Issue
Block a user