mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-18 03:57:03 +01:00
Alan Orth
8047a57cc5
These are things like non-breaking spaces, "replacement" characters, etc that add nothing to the metadata and often cause errors during parsing or displaying in a UI.
42 lines
820 B
Python
42 lines
820 B
Python
import csv_metadata_quality.fix as fix
|
||
|
||
|
||
def test_fix_leading_whitespace():
|
||
'''Test fixing leading whitespace.'''
|
||
|
||
value = ' Alan'
|
||
|
||
assert fix.whitespace(value) == 'Alan'
|
||
|
||
|
||
def test_fix_trailing_whitespace():
|
||
'''Test fixing trailing whitespace.'''
|
||
|
||
value = 'Alan '
|
||
|
||
assert fix.whitespace(value) == 'Alan'
|
||
|
||
|
||
def test_fix_excessive_whitespace():
|
||
'''Test fixing excessive whitespace.'''
|
||
|
||
value = 'Alan Orth'
|
||
|
||
assert fix.whitespace(value) == 'Alan Orth'
|
||
|
||
|
||
def test_fix_invalid_separators():
|
||
'''Test fixing invalid multi-value separators.'''
|
||
|
||
value = 'Alan|Orth'
|
||
|
||
assert fix.separators(value) == 'Alan||Orth'
|
||
|
||
|
||
def test_fix_unnecessary_unicode():
|
||
'''Test fixing unnecessary Unicode.'''
|
||
|
||
value = 'Alan Orth'
|
||
|
||
assert fix.unnecessary_unicode(value) == 'Alan Orth'
|