1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-25 07:10:17 +01:00

Apply fixes from fixit

Apply recommended fix from fixit:

    RewriteToLiteral: It's slower to call list() than using the empty literal, because the name list must
    be looked up in the global scope in case it has been rebound.
This commit is contained in:
Alan Orth 2023-11-22 21:54:50 +03:00
parent 80c3f5b45a
commit f6018c51b6
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
4 changed files with 10 additions and 10 deletions

View File

@ -85,7 +85,7 @@ def run(argv):
# user should be careful to no include spaces here. # user should be careful to no include spaces here.
exclude = args.exclude_fields.split(",") exclude = args.exclude_fields.split(",")
else: else:
exclude = list() exclude = []
# enable transparent request cache with thirty days expiry # enable transparent request cache with thirty days expiry
expire_after = timedelta(days=30) expire_after = timedelta(days=30)

View File

@ -202,7 +202,7 @@ def agrovoc(field, field_name, drop):
return return
# Initialize an empty list to hold the validated AGROVOC values # Initialize an empty list to hold the validated AGROVOC values
values = list() values = []
# Try to split multi-value field on "||" separator # Try to split multi-value field on "||" separator
for value in field.split("||"): for value in field.split("||"):
@ -358,7 +358,7 @@ def duplicate_items(df):
if items_count_unique < items_count_total: if items_count_unique < items_count_total:
# Create a list to hold our items while we check for duplicates # Create a list to hold our items while we check for duplicates
items = list() items = []
for index, row in df.iterrows(): for index, row in df.iterrows():
item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}" item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}"
@ -539,7 +539,7 @@ def countries_match_regions(row, exclude):
if row[region_column_name] is not None: if row[region_column_name] is not None:
regions = row[region_column_name].split("||") regions = row[region_column_name].split("||")
else: else:
regions = list() regions = []
for country in countries: for country in countries:
# Look up the UN M.49 regions for this country code. CoCo seems to # Look up the UN M.49 regions for this country code. CoCo seems to

View File

@ -20,7 +20,7 @@ def correct_language(row, exclude):
# Initialize some variables at global scope so that we can set them in the # Initialize some variables at global scope so that we can set them in the
# loop scope below and still be able to access them afterwards. # loop scope below and still be able to access them afterwards.
language = "" language = ""
sample_strings = list() sample_strings = []
title = None title = None
# Iterate over the labels of the current row's values. Before we transposed # Iterate over the labels of the current row's values. Before we transposed

View File

@ -23,7 +23,7 @@ def whitespace(field, field_name):
return return
# Initialize an empty list to hold the cleaned values # Initialize an empty list to hold the cleaned values
values = list() values = []
# Try to split multi-value field on "||" separator # Try to split multi-value field on "||" separator
for value in field.split("||"): for value in field.split("||"):
@ -64,7 +64,7 @@ def separators(field, field_name):
return return
# Initialize an empty list to hold the cleaned values # Initialize an empty list to hold the cleaned values
values = list() values = []
# Try to split multi-value field on "||" separator # Try to split multi-value field on "||" separator
for value in field.split("||"): for value in field.split("||"):
@ -175,7 +175,7 @@ def duplicates(field, field_name):
values = field.split("||") values = field.split("||")
# Initialize an empty list to hold the de-duplicated values # Initialize an empty list to hold the de-duplicated values
new_values = list() new_values = []
# Iterate over all values # Iterate over all values
for value in values: for value in values:
@ -355,10 +355,10 @@ def countries_match_regions(row, exclude):
if row[region_column_name] is not None: if row[region_column_name] is not None:
regions = row[region_column_name].split("||") regions = row[region_column_name].split("||")
else: else:
regions = list() regions = []
# An empty list for our regions so we can keep track for all countries # An empty list for our regions so we can keep track for all countries
missing_regions = list() missing_regions = []
for country in countries: for country in countries:
# Look up the UN M.49 regions for this country code. CoCo seems to # Look up the UN M.49 regions for this country code. CoCo seems to