mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-22 05:45:02 +01:00
Ignore subregion field for missing region checks
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
Due to a sloppy regex I was sometimes matching the subregion field when checking for missing UN M.49 regions in the region field.
This commit is contained in:
parent
58e956360a
commit
051777bcec
@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
### Fixed
|
||||||
|
- Missing region check should ignore subregion field, if it exists
|
||||||
|
|
||||||
## [0.6.0] - 2022-09-02
|
## [0.6.0] - 2022-09-02
|
||||||
### Changed
|
### Changed
|
||||||
- Perform fix for "unnecessary" Unicode characters after we try to fix encoding
|
- Perform fix for "unnecessary" Unicode characters after we try to fix encoding
|
||||||
|
@ -512,9 +512,9 @@ def countries_match_regions(row, exclude):
|
|||||||
if match is not None:
|
if match is not None:
|
||||||
country_column_name = label
|
country_column_name = label
|
||||||
|
|
||||||
# Find the name of the region column
|
# Find the name of the region column, but make sure it's not subregion!
|
||||||
match = re.match(r"^.*?region.*$", label)
|
match = re.match(r"^.*?region.*$", label)
|
||||||
if match is not None:
|
if match is not None and "sub" not in label:
|
||||||
region_column_name = label
|
region_column_name = label
|
||||||
|
|
||||||
# Find the name of the title column
|
# Find the name of the title column
|
||||||
|
@ -327,9 +327,9 @@ def countries_match_regions(row, exclude):
|
|||||||
if match is not None:
|
if match is not None:
|
||||||
country_column_name = label
|
country_column_name = label
|
||||||
|
|
||||||
# Find the name of the region column
|
# Find the name of the region column, but make sure it's not subregion!
|
||||||
match = re.match(r"^.*?region.*$", label)
|
match = re.match(r"^.*?region.*$", label)
|
||||||
if match is not None:
|
if match is not None and "sub" not in label:
|
||||||
region_column_name = label
|
region_column_name = label
|
||||||
|
|
||||||
# Find the name of the title column
|
# Find the name of the title column
|
||||||
|
@ -1,38 +1,39 @@
|
|||||||
dc.title,dcterms.issued,dc.identifier.issn,dc.identifier.isbn,dcterms.language,dcterms.subject,cg.coverage.country,filename,dcterms.license,dcterms.type,dcterms.bibliographicCitation,cg.identifier.doi,cg.coverage.region
|
dc.title,dcterms.issued,dc.identifier.issn,dc.identifier.isbn,dcterms.language,dcterms.subject,cg.coverage.country,filename,dcterms.license,dcterms.type,dcterms.bibliographicCitation,cg.identifier.doi,cg.coverage.region,cg.coverage.subregion
|
||||||
Leading space,2019-07-29,,,,,,,,,,,
|
Leading space,2019-07-29,,,,,,,,,,,,
|
||||||
Trailing space ,2019-07-29,,,,,,,,,,,
|
Trailing space ,2019-07-29,,,,,,,,,,,,
|
||||||
Excessive space,2019-07-29,,,,,,,,,,,
|
Excessive space,2019-07-29,,,,,,,,,,,,
|
||||||
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,,,,,,
|
Miscellaenous ||whitespace | issues ,2019-07-29,,,,,,,,,,,,
|
||||||
Duplicate||Duplicate,2019-07-29,,,,,,,,,,,
|
Duplicate||Duplicate,2019-07-29,,,,,,,,,,,,
|
||||||
Invalid ISSN,2019-07-29,2321-2302,,,,,,,,,,
|
Invalid ISSN,2019-07-29,2321-2302,,,,,,,,,,,
|
||||||
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,,,,,,
|
Invalid ISBN,2019-07-29,,978-0-306-40615-6,,,,,,,,,,
|
||||||
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,,,,,,
|
Multiple valid ISSNs,2019-07-29,0378-5955||0024-9319,,,,,,,,,,,
|
||||||
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,,,,,,
|
Multiple valid ISBNs,2019-07-29,,99921-58-10-7||978-0-306-40615-7,,,,,,,,,,
|
||||||
Invalid date,2019-07-260,,,,,,,,,,,
|
Invalid date,2019-07-260,,,,,,,,,,,,
|
||||||
Multiple dates,2019-07-26||2019-01-10,,,,,,,,,,,
|
Multiple dates,2019-07-26||2019-01-10,,,,,,,,,,,,
|
||||||
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,,,,,,
|
Invalid multi-value separator,2019-07-29,0378-5955|0024-9319,,,,,,,,,,,
|
||||||
Unnecessary Unicode,2019-07-29,,,,,,,,,,,
|
Unnecessary Unicode,2019-07-29,,,,,,,,,,,,
|
||||||
Suspicious character||foreˆt,2019-07-29,,,,,,,,,,,
|
Suspicious character||foreˆt,2019-07-29,,,,,,,,,,,,
|
||||||
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,,,,,
|
Invalid ISO 639-1 (alpha 2) language,2019-07-29,,,jp,,,,,,,,,
|
||||||
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,,,,,
|
Invalid ISO 639-3 (alpha 3) language,2019-07-29,,,chi,,,,,,,,,
|
||||||
Invalid language,2019-07-29,,,Span,,,,,,,,
|
Invalid language,2019-07-29,,,Span,,,,,,,,,
|
||||||
Invalid AGROVOC subject,2019-07-29,,,,LIVESTOCK||FOREST,,,,,,,
|
Invalid AGROVOC subject,2019-07-29,,,,LIVESTOCK||FOREST,,,,,,,,
|
||||||
Newline (LF),2019-07-30,,,,"TANZA
|
Newline (LF),2019-07-30,,,,"TANZA
|
||||||
NIA",,,,,,,
|
NIA",,,,,,,,
|
||||||
Missing date,,,,,,,,,,,,
|
Missing date,,,,,,,,,,,,,
|
||||||
Invalid country,2019-08-01,,,,,KENYAA,,,,,,
|
Invalid country,2019-08-01,,,,,KENYAA,,,,,,,
|
||||||
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck,,,,,
|
Uncommon filename extension,2019-08-10,,,,,,file.pdf.lck,,,,,,
|
||||||
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-92-9043-823-6,,,,,,,,,
|
Unneccesary unicode (U+002D + U+00AD),2019-08-10,,978-92-9043-823-6,,,,,,,,,,
|
||||||
"Missing space,after comma",2019-08-27,,,,,,,,,,,
|
"Missing space,after comma",2019-08-27,,,,,,,,,,,,
|
||||||
Incorrect ISO 639-1 language,2019-09-26,,,es,,,,,,,,
|
Incorrect ISO 639-1 language,2019-09-26,,,es,,,,,,,,,
|
||||||
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,,,,,,
|
Incorrect ISO 639-3 language,2019-09-26,,,spa,,,,,,,,,
|
||||||
Composéd Unicode,2020-01-14,,,,,,,,,,,
|
Composéd Unicode,2020-01-14,,,,,,,,,,,,
|
||||||
Decomposéd Unicode,2020-01-14,,,,,,,,,,,
|
Decomposéd Unicode,2020-01-14,,,,,,,,,,,,
|
||||||
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,,,,,
|
Unnecessary multi-value separator,2021-01-03,0378-5955||,,,,,,,,,,,
|
||||||
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY,,,,
|
Invalid SPDX license identifier,2021-03-11,,,,,,,CC-BY,,,,,
|
||||||
Duplicate Title,2021-03-17,,,,,,,,Report,,,
|
Duplicate Title,2021-03-17,,,,,,,,Report,,,,
|
||||||
Duplicate Title,2021-03-17,,,,,,,,Report,,,
|
Duplicate Title,2021-03-17,,,,,,,,Report,,,,
|
||||||
Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,,,
|
Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,,,,
|
||||||
"DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218",,
|
"DOI in citation, but missing cg.identifier.doi",2021-10-06,,,,,,,,,"Orth, A. 2021. DOI in citation, but missing cg.identifier.doi. doi: 10.1186/1743-422X-9-218",,,
|
||||||
Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",,
|
Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",,,
|
||||||
Country missing region,2021-12-08,,,,,Kenya,,,,,,
|
Country missing region,2021-12-08,,,,,Kenya,,,,,,,
|
||||||
|
Subregion field shouldn’t trigger region checks,2022-12-07,,,,,Kenya,,,,,,Eastern Africa,Baringo
|
||||||
|
|
Loading…
Reference in New Issue
Block a user