1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-06-29 01:23:45 +02:00
csv-metadata-quality/tests/test_fix.py
Alan Orth 8047a57cc5
Add support for fixing "unnecessary" Unicode
These are things like non-breaking spaces, "replacement" characters,
etc that add nothing to the metadata and often cause errors during
parsing or displaying in a UI.
2019-07-29 16:38:10 +03:00

42 lines
820 B
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import csv_metadata_quality.fix as fix
def test_fix_leading_whitespace():
'''Test fixing leading whitespace.'''
value = ' Alan'
assert fix.whitespace(value) == 'Alan'
def test_fix_trailing_whitespace():
'''Test fixing trailing whitespace.'''
value = 'Alan '
assert fix.whitespace(value) == 'Alan'
def test_fix_excessive_whitespace():
'''Test fixing excessive whitespace.'''
value = 'Alan Orth'
assert fix.whitespace(value) == 'Alan Orth'
def test_fix_invalid_separators():
'''Test fixing invalid multi-value separators.'''
value = 'Alan|Orth'
assert fix.separators(value) == 'Alan||Orth'
def test_fix_unnecessary_unicode():
'''Test fixing unnecessary Unicode.'''
value = 'Alan Orth'
assert fix.unnecessary_unicode(value) == 'Alan Orth'