1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-12-22 12:12:18 +01:00

Bring test.csv into project

This commit is contained in:
Alan Orth 2019-07-26 23:14:37 +03:00
parent e160b17fb0
commit dfd961d720
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
2 changed files with 5 additions and 2 deletions

View File

@ -6,7 +6,7 @@ def run():
# Read all fields as strings so dates don't get converted from 1998 to 1998.0
#df = pd.read_csv('/home/aorth/Downloads/2019-07-26-Bioversity-Migration.csv', dtype=str)
#df = pd.read_csv('/tmp/quality.csv', dtype=str)
df = pd.read_csv('/tmp/omg.csv', dtype=str)
df = pd.read_csv('tests/test.csv', dtype=str)
# Fix whitespace in all columns
for column in df.columns.values.tolist():
@ -21,4 +21,4 @@ def run():
df[column] = df[column].apply(check.isbn)
# Write
df.to_csv('/tmp/omg.fixed.csv', index=False)
df.to_csv('/tmp/test.fixed.csv', index=False)

3
tests/test.csv Normal file
View File

@ -0,0 +1,3 @@
dc.contributor.author,birthdate,dc.identifier.issn,dc.identifier.isbn
Alan|| Alan||Alan Orth||Alan ||Alan Orth ||Alan ,1984,0378-5955,978-0-306-40615-6||99921-58-10-7
Stella|| Stella ||Stella Orth||Stella ,1984-11-27,2321-2302,99921-58-10-7
1 dc.contributor.author birthdate dc.identifier.issn dc.identifier.isbn
2 Alan|| Alan||Alan Orth||Alan ||Alan Orth ||Alan 1984 0378-5955 978-0-306-40615-6||99921-58-10-7
3 Stella|| Stella ||Stella Orth||Stella 1984-11-27 2321-2302 99921-58-10-7