mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-02-18 15:56:22 +01:00
Compare commits
3 Commits
ba4637ea34
...
530cd5863b
Author | SHA1 | Date | |
---|---|---|---|
530cd5863b | |||
f6018c51b6 | |||
80c3f5b45a |
@ -85,7 +85,7 @@ def run(argv):
|
|||||||
# user should be careful to no include spaces here.
|
# user should be careful to no include spaces here.
|
||||||
exclude = args.exclude_fields.split(",")
|
exclude = args.exclude_fields.split(",")
|
||||||
else:
|
else:
|
||||||
exclude = list()
|
exclude = []
|
||||||
|
|
||||||
# enable transparent request cache with thirty days expiry
|
# enable transparent request cache with thirty days expiry
|
||||||
expire_after = timedelta(days=30)
|
expire_after = timedelta(days=30)
|
||||||
|
@ -202,7 +202,7 @@ def agrovoc(field, field_name, drop):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Initialize an empty list to hold the validated AGROVOC values
|
# Initialize an empty list to hold the validated AGROVOC values
|
||||||
values = list()
|
values = []
|
||||||
|
|
||||||
# Try to split multi-value field on "||" separator
|
# Try to split multi-value field on "||" separator
|
||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
@ -358,7 +358,7 @@ def duplicate_items(df):
|
|||||||
|
|
||||||
if items_count_unique < items_count_total:
|
if items_count_unique < items_count_total:
|
||||||
# Create a list to hold our items while we check for duplicates
|
# Create a list to hold our items while we check for duplicates
|
||||||
items = list()
|
items = []
|
||||||
|
|
||||||
for index, row in df.iterrows():
|
for index, row in df.iterrows():
|
||||||
item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}"
|
item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}"
|
||||||
@ -539,7 +539,7 @@ def countries_match_regions(row, exclude):
|
|||||||
if row[region_column_name] is not None:
|
if row[region_column_name] is not None:
|
||||||
regions = row[region_column_name].split("||")
|
regions = row[region_column_name].split("||")
|
||||||
else:
|
else:
|
||||||
regions = list()
|
regions = []
|
||||||
|
|
||||||
for country in countries:
|
for country in countries:
|
||||||
# Look up the UN M.49 regions for this country code. CoCo seems to
|
# Look up the UN M.49 regions for this country code. CoCo seems to
|
||||||
|
@ -20,7 +20,7 @@ def correct_language(row, exclude):
|
|||||||
# Initialize some variables at global scope so that we can set them in the
|
# Initialize some variables at global scope so that we can set them in the
|
||||||
# loop scope below and still be able to access them afterwards.
|
# loop scope below and still be able to access them afterwards.
|
||||||
language = ""
|
language = ""
|
||||||
sample_strings = list()
|
sample_strings = []
|
||||||
title = None
|
title = None
|
||||||
|
|
||||||
# Iterate over the labels of the current row's values. Before we transposed
|
# Iterate over the labels of the current row's values. Before we transposed
|
||||||
|
@ -23,7 +23,7 @@ def whitespace(field, field_name):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Initialize an empty list to hold the cleaned values
|
# Initialize an empty list to hold the cleaned values
|
||||||
values = list()
|
values = []
|
||||||
|
|
||||||
# Try to split multi-value field on "||" separator
|
# Try to split multi-value field on "||" separator
|
||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
@ -64,7 +64,7 @@ def separators(field, field_name):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Initialize an empty list to hold the cleaned values
|
# Initialize an empty list to hold the cleaned values
|
||||||
values = list()
|
values = []
|
||||||
|
|
||||||
# Try to split multi-value field on "||" separator
|
# Try to split multi-value field on "||" separator
|
||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
@ -175,7 +175,7 @@ def duplicates(field, field_name):
|
|||||||
values = field.split("||")
|
values = field.split("||")
|
||||||
|
|
||||||
# Initialize an empty list to hold the de-duplicated values
|
# Initialize an empty list to hold the de-duplicated values
|
||||||
new_values = list()
|
new_values = []
|
||||||
|
|
||||||
# Iterate over all values
|
# Iterate over all values
|
||||||
for value in values:
|
for value in values:
|
||||||
@ -355,10 +355,10 @@ def countries_match_regions(row, exclude):
|
|||||||
if row[region_column_name] is not None:
|
if row[region_column_name] is not None:
|
||||||
regions = row[region_column_name].split("||")
|
regions = row[region_column_name].split("||")
|
||||||
else:
|
else:
|
||||||
regions = list()
|
regions = []
|
||||||
|
|
||||||
# An empty list for our regions so we can keep track for all countries
|
# An empty list for our regions so we can keep track for all countries
|
||||||
missing_regions = list()
|
missing_regions = []
|
||||||
|
|
||||||
for country in countries:
|
for country in countries:
|
||||||
# Look up the UN M.49 regions for this country code. CoCo seems to
|
# Look up the UN M.49 regions for this country code. CoCo seems to
|
||||||
|
868
poetry.lock
generated
868
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -30,6 +30,7 @@ black = "^23.1.0"
|
|||||||
isort = "^5.12.0"
|
isort = "^5.12.0"
|
||||||
csvkit = "^1.1.0"
|
csvkit = "^1.1.0"
|
||||||
ipython = "^8.10.0"
|
ipython = "^8.10.0"
|
||||||
|
fixit = "^2.1.0"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry>=0.12"]
|
requires = ["poetry>=0.12"]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user