1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-25 23:28:18 +01:00

Compare commits

...

3 Commits

Author SHA1 Message Date
c9c277f8df
csv_metadata_quality/app.py: Update help text
All checks were successful
continuous-integration/drone/push Build is passing
Use DCTERMS fields where possible.
2021-03-14 10:52:58 +02:00
fb35afd937
CHANGELOG.md: Add note about requests cache 2021-03-14 09:13:51 +02:00
0e9176f0a6
csv_metadata_quality/check.py: requests cache
Allow overriding the directory for the requests cache. In the case
of csv-metadata-quality-web, which currently runs on Google's App
Engine, we can only write to /tmp.
2021-03-14 09:07:35 +02:00
3 changed files with 12 additions and 3 deletions

View File

@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fixing invalid multi-value separators like `|` and `|||` is no longer class-
ified as "unsafe" as I have yet to see a case where this was intentional
### Added
- Configurable directory for AGROVOC requests cache (to allow running the web
version from Google App Engine where we can only write to /tmp)
## [0.4.6] - 2021-03-11
### Added
- Validation of dcterms.license field against SPDX license identifiers

View File

@ -17,7 +17,7 @@ def parse_args(argv):
parser.add_argument(
"--agrovoc-fields",
"-a",
help="Comma-separated list of fields to validate against AGROVOC, for example: dc.subject,cg.coverage.country",
help="Comma-separated list of fields to validate against AGROVOC, for example: dcterms.subject,cg.coverage.country",
)
parser.add_argument(
"--experimental-checks",
@ -48,7 +48,7 @@ def parse_args(argv):
parser.add_argument(
"--exclude-fields",
"-x",
help="Comma-separated list of fields to skip, for example: dc.contributor.author,dc.identifier.citation",
help="Comma-separated list of fields to skip, for example: dc.contributor.author,dcterms.bibliographicCitation",
)
args = parser.parse_args()

View File

@ -1,3 +1,4 @@
import os
import re
from datetime import datetime, timedelta
@ -242,7 +243,11 @@ def agrovoc(field, field_name):
# enable transparent request cache with thirty days expiry
expire_after = timedelta(days=30)
requests_cache.install_cache("agrovoc-response-cache", expire_after=expire_after)
# Allow overriding the location of the requests cache, just in case we are
# running in an environment where we can't write to the current working di-
# rectory (for example from csv-metadata-quality-web).
REQUESTS_CACHE_DIR = os.environ.get("REQUESTS_CACHE_DIR", ".")
requests_cache.install_cache(f"{REQUESTS_CACHE_DIR}/agrovoc-response-cache", expire_after=expire_after)
# prune old cache entries
requests_cache.core.remove_expired_responses()