1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-09 14:46:00 +02:00

csv_metadata_quality/fix.py: fix bug in regions

We need to make sure we're only manipulating the regions if we have
any missing. The previous code was always manipulating the existing
row, even when there were no missing regions, which resulted in new
values like "Eastern Africa||".
This commit is contained in:
2022-09-01 16:15:32 +03:00
parent 7ce20726d0
commit f49214fa2e

View File

@ -369,14 +369,15 @@ def countries_match_regions(row):
)
missing_regions.append(un_region)
# Add the missing regions back to the row, paying attention to whether
# or not the row's region column is None (aka null) or just an empty
# string (length would be 0).
if row[region_column_name] is not None and len(row[region_column_name]) > 0:
row[region_column_name] = (
row[region_column_name] + "||" + "||".join(missing_regions)
)
else:
row[region_column_name] = "||".join(missing_regions)
if len(missing_regions) > 0:
# Add the missing regions back to the row, paying attention to whether
# or not the row's region column is None (aka null) or just an empty
# string (length would be 0).
if row[region_column_name] is not None and len(row[region_column_name]) > 0:
row[region_column_name] = (
row[region_column_name] + "||" + "||".join(missing_regions)
)
else:
row[region_column_name] = "||".join(missing_regions)
return row