mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-22 04:02:19 +01:00
csv_metadata_quality/app.py: Improve comments
This commit is contained in:
parent
42920e9c7c
commit
d73f7b54b1
@ -22,29 +22,29 @@ def main(argv):
|
||||
df = pd.read_csv(args.input_file, dtype=str)
|
||||
|
||||
for column in df.columns.values.tolist():
|
||||
# Run whitespace fix on all columns
|
||||
# Fix: whitespace
|
||||
df[column] = df[column].apply(fix.whitespace)
|
||||
|
||||
# Run invalid multi-value separator check on all columns
|
||||
# Check: invalid multi-value separator
|
||||
df[column] = df[column].apply(check.separators)
|
||||
|
||||
# Run invalid multi-value separator fix on all columns
|
||||
# Fix: invalid multi-value separator
|
||||
if args.unsafe_fixes:
|
||||
df[column] = df[column].apply(fix.separators)
|
||||
# Run whitespace fix again after fixing invalid separators
|
||||
df[column] = df[column].apply(fix.whitespace)
|
||||
|
||||
# check if column is an issn column like dc.identifier.issn
|
||||
# Check: invalid ISSN
|
||||
match = re.match(r'^.*?issn.*$', column)
|
||||
if match is not None:
|
||||
df[column] = df[column].apply(check.issn)
|
||||
|
||||
# check if column is an isbn column like dc.identifier.isbn
|
||||
# Check: invalid ISBN
|
||||
match = re.match(r'^.*?isbn.*$', column)
|
||||
if match is not None:
|
||||
df[column] = df[column].apply(check.isbn)
|
||||
|
||||
# check if column is a date column like dc.date.issued
|
||||
# Check: invalid date
|
||||
match = re.match(r'^.*?date.*$', column)
|
||||
if match is not None:
|
||||
df[column] = df[column].apply(check.date)
|
||||
|
Loading…
Reference in New Issue
Block a user