1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-02-19 00:06:21 +01:00

Compare commits

..

No commits in common. "530cd5863b415d9e543308702cb9a1a1f954c871" and "ba4637ea34be7eb5e2fb313dd31556362b738118" have entirely different histories.

6 changed files with 345 additions and 544 deletions

View File

@ -85,7 +85,7 @@ def run(argv):
# user should be careful to no include spaces here.
exclude = args.exclude_fields.split(",")
else:
exclude = []
exclude = list()
# enable transparent request cache with thirty days expiry
expire_after = timedelta(days=30)

View File

@ -202,7 +202,7 @@ def agrovoc(field, field_name, drop):
return
# Initialize an empty list to hold the validated AGROVOC values
values = []
values = list()
# Try to split multi-value field on "||" separator
for value in field.split("||"):
@ -358,7 +358,7 @@ def duplicate_items(df):
if items_count_unique < items_count_total:
# Create a list to hold our items while we check for duplicates
items = []
items = list()
for index, row in df.iterrows():
item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}"
@ -539,7 +539,7 @@ def countries_match_regions(row, exclude):
if row[region_column_name] is not None:
regions = row[region_column_name].split("||")
else:
regions = []
regions = list()
for country in countries:
# Look up the UN M.49 regions for this country code. CoCo seems to

View File

@ -20,7 +20,7 @@ def correct_language(row, exclude):
# Initialize some variables at global scope so that we can set them in the
# loop scope below and still be able to access them afterwards.
language = ""
sample_strings = []
sample_strings = list()
title = None
# Iterate over the labels of the current row's values. Before we transposed

View File

@ -23,7 +23,7 @@ def whitespace(field, field_name):
return
# Initialize an empty list to hold the cleaned values
values = []
values = list()
# Try to split multi-value field on "||" separator
for value in field.split("||"):
@ -64,7 +64,7 @@ def separators(field, field_name):
return
# Initialize an empty list to hold the cleaned values
values = []
values = list()
# Try to split multi-value field on "||" separator
for value in field.split("||"):
@ -175,7 +175,7 @@ def duplicates(field, field_name):
values = field.split("||")
# Initialize an empty list to hold the de-duplicated values
new_values = []
new_values = list()
# Iterate over all values
for value in values:
@ -355,10 +355,10 @@ def countries_match_regions(row, exclude):
if row[region_column_name] is not None:
regions = row[region_column_name].split("||")
else:
regions = []
regions = list()
# An empty list for our regions so we can keep track for all countries
missing_regions = []
missing_regions = list()
for country in countries:
# Look up the UN M.49 regions for this country code. CoCo seems to

868
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -30,7 +30,6 @@ black = "^23.1.0"
isort = "^5.12.0"
csvkit = "^1.1.0"
ipython = "^8.10.0"
fixit = "^2.1.0"
[build-system]
requires = ["poetry>=0.12"]