1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-21 21:35:02 +01:00

csv_metadata_quality/fix.py: Minor change

The country_converter documentation says we should instantiate the
CountryConverter() class once instead of calling coco.convert() in
each iteration of the loop so we don't end up loading the data file
more than once.
This commit is contained in:
Alan Orth 2022-09-01 15:40:45 +03:00
parent fdb7900cd0
commit ae16289637
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9

View File

@ -308,6 +308,11 @@ def countries_match_regions(row):
region_column_name = "" region_column_name = ""
title_column_name = "" title_column_name = ""
# Instantiate a CountryConverter() object here. According to the docs it is
# more performant to do that as opposed to calling coco.convert() directly
# because we don't need to re-load the country data with each iteration.
cc = coco.CountryConverter()
# Iterate over the labels of the current row's values to get the names of # Iterate over the labels of the current row's values to get the names of
# the title and citation columns. Then we check if the title is present in # the title and citation columns. Then we check if the title is present in
# the citation. # the citation.
@ -348,7 +353,7 @@ def countries_match_regions(row):
# Look up the UN M.49 regions for this country code. CoCo seems to # Look up the UN M.49 regions for this country code. CoCo seems to
# only list the direct region, ie Western Africa, rather than all # only list the direct region, ie Western Africa, rather than all
# the parent regions ("Sub-Saharan Africa", "Africa", "World") # the parent regions ("Sub-Saharan Africa", "Africa", "World")
un_region = coco.convert(names=country, to="UNRegion") un_region = cc.convert(names=country, to="UNRegion")
if un_region not in regions: if un_region not in regions:
if un_region not in missing_regions: if un_region not in missing_regions: