1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-02-19 00:06:21 +01:00

Compare commits

...

3 Commits

Author SHA1 Message Date
530cd5863b
poetry.lock: run poetry update
All checks were successful
continuous-integration/drone/push Build is passing
2023-11-22 22:07:30 +03:00
f6018c51b6
Apply fixes from fixit
Apply recommended fix from fixit:

    RewriteToLiteral: It's slower to call list() than using the empty literal, because the name list must
    be looked up in the global scope in case it has been rebound.
2023-11-22 21:54:50 +03:00
80c3f5b45a
Add fixit to dev dependencies 2023-11-22 21:54:09 +03:00
6 changed files with 544 additions and 345 deletions

View File

@ -85,7 +85,7 @@ def run(argv):
# user should be careful to no include spaces here. # user should be careful to no include spaces here.
exclude = args.exclude_fields.split(",") exclude = args.exclude_fields.split(",")
else: else:
exclude = list() exclude = []
# enable transparent request cache with thirty days expiry # enable transparent request cache with thirty days expiry
expire_after = timedelta(days=30) expire_after = timedelta(days=30)

View File

@ -202,7 +202,7 @@ def agrovoc(field, field_name, drop):
return return
# Initialize an empty list to hold the validated AGROVOC values # Initialize an empty list to hold the validated AGROVOC values
values = list() values = []
# Try to split multi-value field on "||" separator # Try to split multi-value field on "||" separator
for value in field.split("||"): for value in field.split("||"):
@ -358,7 +358,7 @@ def duplicate_items(df):
if items_count_unique < items_count_total: if items_count_unique < items_count_total:
# Create a list to hold our items while we check for duplicates # Create a list to hold our items while we check for duplicates
items = list() items = []
for index, row in df.iterrows(): for index, row in df.iterrows():
item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}" item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}"
@ -539,7 +539,7 @@ def countries_match_regions(row, exclude):
if row[region_column_name] is not None: if row[region_column_name] is not None:
regions = row[region_column_name].split("||") regions = row[region_column_name].split("||")
else: else:
regions = list() regions = []
for country in countries: for country in countries:
# Look up the UN M.49 regions for this country code. CoCo seems to # Look up the UN M.49 regions for this country code. CoCo seems to

View File

@ -20,7 +20,7 @@ def correct_language(row, exclude):
# Initialize some variables at global scope so that we can set them in the # Initialize some variables at global scope so that we can set them in the
# loop scope below and still be able to access them afterwards. # loop scope below and still be able to access them afterwards.
language = "" language = ""
sample_strings = list() sample_strings = []
title = None title = None
# Iterate over the labels of the current row's values. Before we transposed # Iterate over the labels of the current row's values. Before we transposed

View File

@ -23,7 +23,7 @@ def whitespace(field, field_name):
return return
# Initialize an empty list to hold the cleaned values # Initialize an empty list to hold the cleaned values
values = list() values = []
# Try to split multi-value field on "||" separator # Try to split multi-value field on "||" separator
for value in field.split("||"): for value in field.split("||"):
@ -64,7 +64,7 @@ def separators(field, field_name):
return return
# Initialize an empty list to hold the cleaned values # Initialize an empty list to hold the cleaned values
values = list() values = []
# Try to split multi-value field on "||" separator # Try to split multi-value field on "||" separator
for value in field.split("||"): for value in field.split("||"):
@ -175,7 +175,7 @@ def duplicates(field, field_name):
values = field.split("||") values = field.split("||")
# Initialize an empty list to hold the de-duplicated values # Initialize an empty list to hold the de-duplicated values
new_values = list() new_values = []
# Iterate over all values # Iterate over all values
for value in values: for value in values:
@ -355,10 +355,10 @@ def countries_match_regions(row, exclude):
if row[region_column_name] is not None: if row[region_column_name] is not None:
regions = row[region_column_name].split("||") regions = row[region_column_name].split("||")
else: else:
regions = list() regions = []
# An empty list for our regions so we can keep track for all countries # An empty list for our regions so we can keep track for all countries
missing_regions = list() missing_regions = []
for country in countries: for country in countries:
# Look up the UN M.49 regions for this country code. CoCo seems to # Look up the UN M.49 regions for this country code. CoCo seems to

868
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,7 @@ black = "^23.1.0"
isort = "^5.12.0" isort = "^5.12.0"
csvkit = "^1.1.0" csvkit = "^1.1.0"
ipython = "^8.10.0" ipython = "^8.10.0"
fixit = "^2.1.0"
[build-system] [build-system]
requires = ["poetry>=0.12"] requires = ["poetry>=0.12"]