mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-22 04:02:19 +01:00
Ignore subregion field for missing region checks
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
Due to a sloppy regex I was sometimes matching the subregion field when checking for missing UN M.49 regions in the region field.
This commit is contained in:
parent
58e956360a
commit
051777bcec
@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Unreleased
|
||||
### Fixed
|
||||
- Missing region check should ignore subregion field, if it exists
|
||||
|
||||
## [0.6.0] - 2022-09-02
|
||||
### Changed
|
||||
- Perform fix for "unnecessary" Unicode characters after we try to fix encoding
|
||||
|
@ -512,9 +512,9 @@ def countries_match_regions(row, exclude):
|
||||
if match is not None:
|
||||
country_column_name = label
|
||||
|
||||
# Find the name of the region column
|
||||
# Find the name of the region column, but make sure it's not subregion!
|
||||
match = re.match(r"^.*?region.*$", label)
|
||||
if match is not None:
|
||||
if match is not None and "sub" not in label:
|
||||
region_column_name = label
|
||||
|
||||
# Find the name of the title column
|
||||
|
@ -327,9 +327,9 @@ def countries_match_regions(row, exclude):
|
||||
if match is not None:
|
||||
country_column_name = label
|
||||
|
||||
# Find the name of the region column
|
||||
# Find the name of the region column, but make sure it's not subregion!
|
||||
match = re.match(r"^.*?region.*$", label)
|
||||
if match is not None:
|
||||
if match is not None and "sub" not in label:
|
||||
region_column_name = label
|
||||
|
||||
# Find the name of the title column
|
||||
|
@ -1,38 +1,39 @@
|
||||
dc.title,dcterms.issued,dc.identifier.issn,dc.identifier.isbn,dcterms.language,dcterms.subject,cg.coverage.country,filename,dcterms.license,dcterms.type,dcterms.bibliographicCitation,cg.identifier.doi,cg.coverage.region
|
||||
Leading space,2019-07-29,,,,,,,,,,,
|
||||
Trailing space ,2019-07-29,,,,,,,,,,,
|
||||
Excessive space,2019-07-29,,,,,,,,,,,
|
||||
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,,,,,,
|
||||
Duplicate||Duplicate,2019-07-29,,,,,,,,,,,
|
||||
Invalid ISSN,2019-07-29,2321-2302,,,,,,,,,,
|
||||
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,,,,,,
|
||||
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,,,,,,
|
||||
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,,,,,,
|
||||
Invalid date,2019-07-260,,,,,,,,,,,
|
||||
Multiple dates,2019-07-26||2019-01-10,,,,,,,,,,,
|
||||
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,,,,,,
|
||||
Unnecessary Unicode,2019-07-29,,,,,,,,,,,
|
||||
Suspicious character||foreˆt,2019-07-29,,,,,,,,,,,
|
||||
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,,,,,
|
||||
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,,,,,
|
||||
Invalid language,2019-07-29,,,Span,,,,,,,,
|
||||
Invalid AGROVOC subject,2019-07-29,,,,LIVESTOCK||FOREST,,,,,,,
|
||||
dc.title,dcterms.issued,dc.identifier.issn,dc.identifier.isbn,dcterms.language,dcterms.subject,cg.coverage.country,filename,dcterms.license,dcterms.type,dcterms.bibliographicCitation,cg.identifier.doi,cg.coverage.region,cg.coverage.subregion
|
||||
Leading space,2019-07-29,,,,,,,,,,,,
|
||||
Trailing space ,2019-07-29,,,,,,,,,,,,
|
||||
Excessive space,2019-07-29,,,,,,,,,,,,
|
||||
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,,,,,,,
|
||||
Duplicate||Duplicate,2019-07-29,,,,,,,,,,,,
|
||||
Invalid ISSN,2019-07-29,2321-2302,,,,,,,,,,,
|
||||
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,,,,,,,
|
||||
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,,,,,,,
|
||||
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,,,,,,,
|
||||
Invalid date,2019-07-260,,,,,,,,,,,,
|
||||
Multiple dates,2019-07-26||2019-01-10,,,,,,,,,,,,
|
||||
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,,,,,,,
|
||||
Unnecessary Unicode,2019-07-29,,,,,,,,,,,,
|
||||
Suspicious character||foreˆt,2019-07-29,,,,,,,,,,,,
|
||||
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,,,,,,
|
||||
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,,,,,,
|
||||
Invalid language,2019-07-29,,,Span,,,,,,,,,
|
||||
Invalid AGROVOC subject,2019-07-29,,,,LIVESTOCK||FOREST,,,,,,,,
|
||||
Newline (LF),2019-07-30,,,,"TANZA
|
||||
NIA",,,,,,,
|
||||
Missing date,,,,,,,,,,,,
|
||||
Invalid country,2019-08-01,,,,,KENYAA,,,,,,
|
||||
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck,,,,,
|
||||
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-92-9043-823-6,,,,,,,,,
|
||||
"Missing space,after comma",2019-08-27,,,,,,,,,,,
|
||||
Incorrect ISO 639-1 language,2019-09-26,,,es,,,,,,,,
|
||||
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,,,,,,
|
||||
Composéd Unicode,2020-01-14,,,,,,,,,,,
|
||||
Decomposéd Unicode,2020-01-14,,,,,,,,,,,
|
||||
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,,,,,
|
||||
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY,,,,
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report,,,
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report,,,
|
||||
Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,,,
|
||||
"DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218",,
|
||||
Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",,
|
||||
Country missing region,2021-12-08,,,,,Kenya,,,,,,
|
||||
NIA",,,,,,,,
|
||||
Missing date,,,,,,,,,,,,,
|
||||
Invalid country,2019-08-01,,,,,KENYAA,,,,,,,
|
||||
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck,,,,,,
|
||||
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-92-9043-823-6,,,,,,,,,,
|
||||
"Missing space,after comma",2019-08-27,,,,,,,,,,,,
|
||||
Incorrect ISO 639-1 language,2019-09-26,,,es,,,,,,,,,
|
||||
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,,,,,,,
|
||||
Composéd Unicode,2020-01-14,,,,,,,,,,,,
|
||||
Decomposéd Unicode,2020-01-14,,,,,,,,,,,,
|
||||
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,,,,,,
|
||||
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY,,,,,
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report,,,,
|
||||
Duplicate Title,2021-03-17,,,,,,,,Report,,,,
|
||||
Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,,,,
|
||||
"DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218",,,
|
||||
Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",,,
|
||||
Country missing region,2021-12-08,,,,,Kenya,,,,,,,
|
||||
Subregion field shouldn’t trigger region checks,2022-12-07,,,,,Kenya,,,,,,Eastern Africa,Baringo
|
||||
|
|
Loading…
Reference in New Issue
Block a user