mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-23 04:32:21 +01:00
Alan Orth
40d5f7d81b
This was tricky because of the nature of newlines. In actuality we are removing Unix line feeds here (U+000A) because Windows carriage returns are actually already removed by the string stripping in the whitespace fix. Creating the test case in Vim was difficult because I couldn't fig- ure out how to manually enter a line feed character. In the end I used a search and replace on a known pattern like "ALAN", replacing it with \r. Neither entering the Unicode code point (U+000A) direc- tly or typing an "Enter" character after ^V worked. Grrr.
59 lines
1.1 KiB
Python
59 lines
1.1 KiB
Python
import csv_metadata_quality.fix as fix
|
||
|
||
|
||
def test_fix_leading_whitespace():
|
||
'''Test fixing leading whitespace.'''
|
||
|
||
value = ' Alan'
|
||
|
||
assert fix.whitespace(value) == 'Alan'
|
||
|
||
|
||
def test_fix_trailing_whitespace():
|
||
'''Test fixing trailing whitespace.'''
|
||
|
||
value = 'Alan '
|
||
|
||
assert fix.whitespace(value) == 'Alan'
|
||
|
||
|
||
def test_fix_excessive_whitespace():
|
||
'''Test fixing excessive whitespace.'''
|
||
|
||
value = 'Alan Orth'
|
||
|
||
assert fix.whitespace(value) == 'Alan Orth'
|
||
|
||
|
||
def test_fix_invalid_separators():
|
||
'''Test fixing invalid multi-value separators.'''
|
||
|
||
value = 'Alan|Orth'
|
||
|
||
assert fix.separators(value) == 'Alan||Orth'
|
||
|
||
|
||
def test_fix_unnecessary_unicode():
|
||
'''Test fixing unnecessary Unicode.'''
|
||
|
||
value = 'Alan Orth'
|
||
|
||
assert fix.unnecessary_unicode(value) == 'Alan Orth'
|
||
|
||
|
||
def test_fix_duplicates():
|
||
'''Test fixing duplicate metadata values.'''
|
||
|
||
value = 'Kenya||Kenya'
|
||
|
||
assert fix.duplicates(value) == 'Kenya'
|
||
|
||
|
||
def test_fix_newlines():
|
||
'''Test fixing newlines.'''
|
||
|
||
value = '''Ken
|
||
ya'''
|
||
|
||
assert fix.newlines(value) == 'Kenya'
|