mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-22 20:22:18 +01:00
Alan Orth
40e77db713
In this case it fixes occurences of invalid multi-value separators. DSpace uses "||" to separate multiple values in one field, but our editors sometimes give us files with mistakes like "|". We can fix these to be correct multi-value separators if we are sure that the metadata is not actually using "|" for some legitimate purpose.
34 lines
653 B
Python
34 lines
653 B
Python
import csv_metadata_quality.fix as fix
|
|
|
|
|
|
def test_fix_leading_whitespace():
|
|
'''Test fixing leading whitespace.'''
|
|
|
|
value = ' Alan'
|
|
|
|
assert fix.whitespace(value) == 'Alan'
|
|
|
|
|
|
def test_fix_trailing_whitespace():
|
|
'''Test fixing trailing whitespace.'''
|
|
|
|
value = 'Alan '
|
|
|
|
assert fix.whitespace(value) == 'Alan'
|
|
|
|
|
|
def test_fix_excessive_whitespace():
|
|
'''Test fixing excessive whitespace.'''
|
|
|
|
value = 'Alan Orth'
|
|
|
|
assert fix.whitespace(value) == 'Alan Orth'
|
|
|
|
|
|
def test_fix_invalid_separators():
|
|
'''Test fixing invalid multi-value separators.'''
|
|
|
|
value = 'Alan|Orth'
|
|
|
|
assert fix.separators(value) == 'Alan||Orth'
|