1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-16 19:17:03 +01:00
csv-metadata-quality/tests/test_check.py
Alan Orth fa4fa3491b
Add check for "suspicious" characters
These standalone characters often indicate issues with encoding or
copy/paste in languages with accents like French and Spanish. For
example: foreˆt should be forêt.

It is not possible to fix these issues automatically, but this will
print a warning so you can notify the owner of the data.
2019-07-29 17:08:49 +03:00

119 lines
2.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import csv_metadata_quality.check as check
def test_check_invalid_issn(capsys):
'''Test checking invalid ISSN.'''
value = '2321-2302'
check.issn(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid ISSN: {value}\n'
def test_check_valid_issn():
'''Test checking valid ISSN.'''
value = '0024-9319'
result = check.issn(value)
assert result == value
def test_check_invalid_isbn(capsys):
'''Test checking invalid ISBN.'''
value = '99921-58-10-6'
check.isbn(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid ISBN: {value}\n'
def test_check_valid_isbn():
'''Test checking valid ISBN.'''
value = '99921-58-10-7'
result = check.isbn(value)
assert result == value
def test_check_invalid_separators(capsys):
'''Test checking invalid multi-value separators.'''
value = 'Alan|Orth'
check.separators(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid multi-value separator: {value}\n'
def test_check_valid_separators():
'''Test checking valid multi-value separators.'''
value = 'Alan||Orth'
result = check.separators(value)
assert result == value
def test_check_missing_date(capsys):
'''Test checking missing date.'''
value = None
check.date(value)
captured = capsys.readouterr()
assert captured.out == f'Missing date.\n'
def test_check_multiple_dates(capsys):
'''Test checking multiple dates.'''
value = '1990||1991'
check.date(value)
captured = capsys.readouterr()
assert captured.out == f'Multiple dates not allowed: {value}\n'
def test_check_invalid_date(capsys):
'''Test checking invalid ISO8601 date.'''
value = '1990-0'
check.date(value)
captured = capsys.readouterr()
assert captured.out == f'Invalid date: {value}\n'
def test_check_valid_date():
'''Test checking valid ISO8601 date.'''
value = '1990'
result = check.date(value)
assert result == value
def test_check_suspicious_characters(capsys):
'''Test checking for suspicious characters.'''
value = 'foreˆt'
check.suspicious_characters(value)
captured = capsys.readouterr()
assert captured.out == f'Suspicious character: {value}\n'