1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-25 15:18:19 +01:00

data/test.csv: Add invalid SPDX license

Now we are checking dcterms.license against the list of SPDX license
identifiers using https://pypi.org/project/spdx-license-list/.
This commit is contained in:
Alan Orth 2021-03-11 10:34:58 +02:00
parent 6e4b0e5c1b
commit 3b17914002
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9

View File

@ -1,31 +1,32 @@
dc.title,dc.date.issued,dc.identifier.issn,dc.identifier.isbn,dc.language.iso,dc.subject,cg.coverage.country,filename
Leading space,2019-07-29,,,,,,
Trailing space ,2019-07-29,,,,,,
Excessive space,2019-07-29,,,,,,
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,
Duplicate||Duplicate,2019-07-29,,,,,,
Invalid ISSN,2019-07-29,2321-2302,,,,,
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,
Invalid date,2019-07-260,,,,,,
Multiple dates,2019-07-26||2019-01-10,,,,,,
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,
Unnecessary Unicode,2019-07-29,,,,,,
Suspicious character||foreˆt,2019-07-29,,,,,,
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,
Invalid language,2019-07-29,,,Span,,,
Invalid AGROVOC subject,2019-07-29,,,,FOREST,,
dc.title,dc.date.issued,dc.identifier.issn,dc.identifier.isbn,dc.language.iso,dc.subject,cg.coverage.country,filename,dcterms.license
Leading space,2019-07-29,,,,,,,
Trailing space ,2019-07-29,,,,,,,
Excessive space,2019-07-29,,,,,,,
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,,
Duplicate||Duplicate,2019-07-29,,,,,,,
Invalid ISSN,2019-07-29,2321-2302,,,,,,
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,,
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,,
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,,
Invalid date,2019-07-260,,,,,,,
Multiple dates,2019-07-26||2019-01-10,,,,,,,
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,,
Unnecessary Unicode,2019-07-29,,,,,,,
Suspicious character||foreˆt,2019-07-29,,,,,,,
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,
Invalid language,2019-07-29,,,Span,,,,
Invalid AGROVOC subject,2019-07-29,,,,FOREST,,,
Newline (LF),2019-07-30,,,,"TANZA
NIA",,
Missing date,,,,,,,
Invalid country,2019-08-01,,,,,KENYAA,
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-­92-­9043-­823-­6,,,,
"Missing space,after comma",2019-08-27,,,,,,
Incorrect ISO 639-1 language,2019-09-26,,,es,,,
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,
Composéd Unicode,2020-01-14,,,,,,
Decomposéd Unicode,2020-01-14,,,,,,
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,
NIA",,,
Missing date,,,,,,,,
Invalid country,2019-08-01,,,,,KENYAA,,
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck,
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-­92-­9043-­823-­6,,,,,
"Missing space,after comma",2019-08-27,,,,,,,
Incorrect ISO 639-1 language,2019-09-26,,,es,,,,
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,,
Composéd Unicode,2020-01-14,,,,,,,
Decomposéd Unicode,2020-01-14,,,,,,,
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY

1 dc.title dc.date.issued dc.identifier.issn dc.identifier.isbn dc.language.iso dc.subject cg.coverage.country filename dcterms.license
2 Leading space 2019-07-29
3 Trailing space 2019-07-29
4 Excessive space 2019-07-29
5 Miscellaenous ||whitespace | issues 2019-07-29
6 Duplicate||Duplicate 2019-07-29
7 Invalid ISSN 2019-07-29 2321-2302
8 Invalid ISBN 2019-07-29 978-0-306-40615-6
9 Multiple valid ISSNs 2019-07-29 0378-5955||0024-9319
10 Multiple valid ISBNs 2019-07-29 99921-58-10-7||978-0-306-40615-7
11 Invalid date 2019-07-260
12 Multiple dates 2019-07-26||2019-01-10
13 Invalid multi-value separator 2019-07-29 0378-5955|0024-9319
14 Unnecessary Unicode​ 2019-07-29
15 Suspicious character||foreˆt 2019-07-29
16 Invalid ISO 639-1 (alpha 2) language 2019-07-29 jp
17 Invalid ISO 639-3 (alpha 3) language 2019-07-29 chi
18 Invalid language 2019-07-29 Span
19 Invalid AGROVOC subject 2019-07-29 FOREST
20 Newline (LF) 2019-07-30 TANZA NIA
21 Missing date
22 Invalid country 2019-08-01 KENYAA
23 Uncommon filename extension 2019-08-10 file.pdf.lck
24 Unneccesary unicode (U+002D + U+00AD) 2019-08-10 978-­92-­9043-­823-­6
25 Missing space,after comma 2019-08-27
26 Incorrect ISO 639-1 language 2019-09-26 es
27 Incorrect ISO 639-3 language 2019-09-26 spa
28 Composéd Unicode 2020-01-14
29 Decomposéd Unicode 2020-01-14
30 Unnecessary multi-value separator 2021-01-03 0378-5955||
31 Invalid SPDX license identifier 2021-03-11 CC-BY
32