1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-12-23 20:44:29 +01:00

Update tests

This commit is contained in:
Alan Orth 2019-08-01 23:59:11 +03:00
parent 0ed390dbd5
commit 456b8a2f26
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
3 changed files with 28 additions and 25 deletions

View File

@ -13,7 +13,7 @@ tasks:
- testcli: |
cd csv-metadata-quality
pipenv run pip install .
pipenv run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
pipenv run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u --agrovoc-fields dc.subject,cg.coverage.country
environment:
PIPENV_NOSPIN: 'True'
PIPENV_HIDE_EMOJIS: 'True'

View File

@ -1,22 +1,23 @@
dc.contributor.author,birthdate,dc.identifier.issn,dc.identifier.isbn,dc.language.iso,dc.subject
Leading space,2019-07-29,,,,
Trailing space ,2019-07-29,,,,
Excessive space,2019-07-29,,,,
Miscellaenous ||whitespace | issues ,2019-07-29,,,,
Duplicate||Duplicate,2019-07-29,,,,
Invalid ISSN,2019-07-29,2321-2302,,,
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,
Invalid date,2019-07-260,,,,
Multiple dates,2019-07-26||2019-01-10,,,,
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,
Unnecessary Unicode,2019-07-29,,,,
Suspicious character||foreˆt,2019-07-29,,,,
Invalid ISO 639-2 language,2019-07-29,,,jp,
Invalid ISO 639-3 language,2019-07-29,,,chi,
Invalid language,2019-07-29,,,Span,
Invalid AGROVOC subject,2019-07-29,,,,FOREST
dc.contributor.author,birthdate,dc.identifier.issn,dc.identifier.isbn,dc.language.iso,dc.subject,cg.coverage.country
Leading space,2019-07-29,,,,,
Trailing space ,2019-07-29,,,,,
Excessive space,2019-07-29,,,,,
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,
Duplicate||Duplicate,2019-07-29,,,,,
Invalid ISSN,2019-07-29,2321-2302,,,,
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,
Invalid date,2019-07-260,,,,,
Multiple dates,2019-07-26||2019-01-10,,,,,
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,
Unnecessary Unicode,2019-07-29,,,,,
Suspicious character||foreˆt,2019-07-29,,,,,
Invalid ISO 639-2 language,2019-07-29,,,jp,,
Invalid ISO 639-3 language,2019-07-29,,,chi,,
Invalid language,2019-07-29,,,Span,,
Invalid AGROVOC subject,2019-07-29,,,,FOREST,
Newline (LF),2019-07-30,,,,"TANZA
NIA"
Missing date,,,,,
NIA",
Missing date,,,,,,
Invalid country,2019-08-01,,,,,KENYAA

1 dc.contributor.author birthdate dc.identifier.issn dc.identifier.isbn dc.language.iso dc.subject cg.coverage.country
2 Leading space 2019-07-29
3 Trailing space 2019-07-29
4 Excessive space 2019-07-29
5 Miscellaenous ||whitespace | issues 2019-07-29
6 Duplicate||Duplicate 2019-07-29
7 Invalid ISSN 2019-07-29 2321-2302
8 Invalid ISBN 2019-07-29 978-0-306-40615-6
9 Multiple valid ISSNs 2019-07-29 0378-5955||0024-9319
10 Multiple valid ISBNs 2019-07-29 99921-58-10-7||978-0-306-40615-7
11 Invalid date 2019-07-260
12 Multiple dates 2019-07-26||2019-01-10
13 Invalid multi-value separator 2019-07-29 0378-5955|0024-9319
14 Unnecessary Unicode​ 2019-07-29
15 Suspicious character||foreˆt 2019-07-29
16 Invalid ISO 639-2 language 2019-07-29 jp
17 Invalid ISO 639-3 language 2019-07-29 chi
18 Invalid language 2019-07-29 Span
19 Invalid AGROVOC subject 2019-07-29 FOREST
20 Newline (LF) 2019-07-30 TANZA NIA
21 Missing date
22 Invalid country 2019-08-01 KENYAA
23

View File

@ -175,18 +175,20 @@ def test_check_invalid_agrovoc(capsys):
'''Test invalid AGROVOC subject.'''
value = 'FOREST'
field_name = 'dc.subject'
check.agrovoc(value)
check.agrovoc(value, field_name)
captured = capsys.readouterr()
assert captured.out == f'Invalid AGROVOC subject: {value}\n'
assert captured.out == f'Invalid AGROVOC ({field_name}): {value}\n'
def test_check_valid_agrovoc():
'''Test valid AGROVOC subject.'''
value = 'FORESTS'
field_name = 'dc.subject'
result = check.agrovoc(value)
result = check.agrovoc(value, field_name)
assert result == value