mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-02-19 00:06:21 +01:00
Compare commits
3 Commits
ba4637ea34
...
530cd5863b
Author | SHA1 | Date | |
---|---|---|---|
530cd5863b | |||
f6018c51b6 | |||
80c3f5b45a |
@ -85,7 +85,7 @@ def run(argv):
|
||||
# user should be careful to no include spaces here.
|
||||
exclude = args.exclude_fields.split(",")
|
||||
else:
|
||||
exclude = list()
|
||||
exclude = []
|
||||
|
||||
# enable transparent request cache with thirty days expiry
|
||||
expire_after = timedelta(days=30)
|
||||
|
@ -202,7 +202,7 @@ def agrovoc(field, field_name, drop):
|
||||
return
|
||||
|
||||
# Initialize an empty list to hold the validated AGROVOC values
|
||||
values = list()
|
||||
values = []
|
||||
|
||||
# Try to split multi-value field on "||" separator
|
||||
for value in field.split("||"):
|
||||
@ -358,7 +358,7 @@ def duplicate_items(df):
|
||||
|
||||
if items_count_unique < items_count_total:
|
||||
# Create a list to hold our items while we check for duplicates
|
||||
items = list()
|
||||
items = []
|
||||
|
||||
for index, row in df.iterrows():
|
||||
item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}"
|
||||
@ -539,7 +539,7 @@ def countries_match_regions(row, exclude):
|
||||
if row[region_column_name] is not None:
|
||||
regions = row[region_column_name].split("||")
|
||||
else:
|
||||
regions = list()
|
||||
regions = []
|
||||
|
||||
for country in countries:
|
||||
# Look up the UN M.49 regions for this country code. CoCo seems to
|
||||
|
@ -20,7 +20,7 @@ def correct_language(row, exclude):
|
||||
# Initialize some variables at global scope so that we can set them in the
|
||||
# loop scope below and still be able to access them afterwards.
|
||||
language = ""
|
||||
sample_strings = list()
|
||||
sample_strings = []
|
||||
title = None
|
||||
|
||||
# Iterate over the labels of the current row's values. Before we transposed
|
||||
|
@ -23,7 +23,7 @@ def whitespace(field, field_name):
|
||||
return
|
||||
|
||||
# Initialize an empty list to hold the cleaned values
|
||||
values = list()
|
||||
values = []
|
||||
|
||||
# Try to split multi-value field on "||" separator
|
||||
for value in field.split("||"):
|
||||
@ -64,7 +64,7 @@ def separators(field, field_name):
|
||||
return
|
||||
|
||||
# Initialize an empty list to hold the cleaned values
|
||||
values = list()
|
||||
values = []
|
||||
|
||||
# Try to split multi-value field on "||" separator
|
||||
for value in field.split("||"):
|
||||
@ -175,7 +175,7 @@ def duplicates(field, field_name):
|
||||
values = field.split("||")
|
||||
|
||||
# Initialize an empty list to hold the de-duplicated values
|
||||
new_values = list()
|
||||
new_values = []
|
||||
|
||||
# Iterate over all values
|
||||
for value in values:
|
||||
@ -355,10 +355,10 @@ def countries_match_regions(row, exclude):
|
||||
if row[region_column_name] is not None:
|
||||
regions = row[region_column_name].split("||")
|
||||
else:
|
||||
regions = list()
|
||||
regions = []
|
||||
|
||||
# An empty list for our regions so we can keep track for all countries
|
||||
missing_regions = list()
|
||||
missing_regions = []
|
||||
|
||||
for country in countries:
|
||||
# Look up the UN M.49 regions for this country code. CoCo seems to
|
||||
|
868
poetry.lock
generated
868
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -30,6 +30,7 @@ black = "^23.1.0"
|
||||
isort = "^5.12.0"
|
||||
csvkit = "^1.1.0"
|
||||
ipython = "^8.10.0"
|
||||
fixit = "^2.1.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry>=0.12"]
|
||||
|
Loading…
x
Reference in New Issue
Block a user