2019-07-26 23:36:40 +02:00
|
|
|
|
import csv_metadata_quality.fix as fix
|
|
|
|
|
|
2019-07-28 16:47:28 +02:00
|
|
|
|
|
2019-07-26 23:36:40 +02:00
|
|
|
|
def test_fix_leading_whitespace():
|
|
|
|
|
'''Test fixing leading whitespace.'''
|
|
|
|
|
|
|
|
|
|
value = ' Alan'
|
|
|
|
|
|
|
|
|
|
assert fix.whitespace(value) == 'Alan'
|
|
|
|
|
|
2019-07-28 16:47:28 +02:00
|
|
|
|
|
2019-07-26 23:36:40 +02:00
|
|
|
|
def test_fix_trailing_whitespace():
|
|
|
|
|
'''Test fixing trailing whitespace.'''
|
|
|
|
|
|
|
|
|
|
value = 'Alan '
|
|
|
|
|
|
|
|
|
|
assert fix.whitespace(value) == 'Alan'
|
|
|
|
|
|
2019-07-28 16:47:28 +02:00
|
|
|
|
|
2019-07-26 23:36:40 +02:00
|
|
|
|
def test_fix_excessive_whitespace():
|
|
|
|
|
'''Test fixing excessive whitespace.'''
|
|
|
|
|
|
|
|
|
|
value = 'Alan Orth'
|
|
|
|
|
|
|
|
|
|
assert fix.whitespace(value) == 'Alan Orth'
|
2019-07-28 21:53:39 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_fix_invalid_separators():
|
|
|
|
|
'''Test fixing invalid multi-value separators.'''
|
|
|
|
|
|
|
|
|
|
value = 'Alan|Orth'
|
|
|
|
|
|
|
|
|
|
assert fix.separators(value) == 'Alan||Orth'
|
2019-07-29 15:38:10 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_fix_unnecessary_unicode():
|
|
|
|
|
'''Test fixing unnecessary Unicode.'''
|
|
|
|
|
|
|
|
|
|
value = 'Alan Orth'
|
|
|
|
|
|
|
|
|
|
assert fix.unnecessary_unicode(value) == 'Alan Orth'
|
2019-07-29 17:05:03 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_fix_duplicates():
|
|
|
|
|
'''Test fixing duplicate metadata values.'''
|
|
|
|
|
|
|
|
|
|
value = 'Kenya||Kenya'
|
|
|
|
|
|
|
|
|
|
assert fix.duplicates(value) == 'Kenya'
|
2019-07-30 19:05:12 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_fix_newlines():
|
|
|
|
|
'''Test fixing newlines.'''
|
|
|
|
|
|
|
|
|
|
value = '''Ken
|
|
|
|
|
ya'''
|
|
|
|
|
|
|
|
|
|
assert fix.newlines(value) == 'Kenya'
|
2019-08-27 23:08:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_fix_comma_space():
|
|
|
|
|
'''Test adding space after comma.'''
|
|
|
|
|
|
|
|
|
|
value = 'Orth,Alan S.'
|
|
|
|
|
|
|
|
|
|
field_name = 'dc.contributor.author'
|
|
|
|
|
|
|
|
|
|
assert fix.comma_space(value, field_name) == 'Orth, Alan S.'
|