From ae1628963786c87dba17f700959d8128ffe89f9e Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Thu, 1 Sep 2022 15:40:45 +0300 Subject: [PATCH] csv_metadata_quality/fix.py: Minor change The country_converter documentation says we should instantiate the CountryConverter() class once instead of calling coco.convert() in each iteration of the loop so we don't end up loading the data file more than once. --- csv_metadata_quality/fix.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/csv_metadata_quality/fix.py b/csv_metadata_quality/fix.py index cebf47b..00aa6e7 100755 --- a/csv_metadata_quality/fix.py +++ b/csv_metadata_quality/fix.py @@ -308,6 +308,11 @@ def countries_match_regions(row): region_column_name = "" title_column_name = "" + # Instantiate a CountryConverter() object here. According to the docs it is + # more performant to do that as opposed to calling coco.convert() directly + # because we don't need to re-load the country data with each iteration. + cc = coco.CountryConverter() + # Iterate over the labels of the current row's values to get the names of # the title and citation columns. Then we check if the title is present in # the citation. @@ -348,7 +353,7 @@ def countries_match_regions(row): # Look up the UN M.49 regions for this country code. CoCo seems to # only list the direct region, ie Western Africa, rather than all # the parent regions ("Sub-Saharan Africa", "Africa", "World") - un_region = coco.convert(names=country, to="UNRegion") + un_region = cc.convert(names=country, to="UNRegion") if un_region not in regions: if un_region not in missing_regions: