mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-05-10 07:06:00 +02:00
Rework requests-cache
We should only be running this once per invocation, not for every row we check. This should be more efficient, but it means that we don't cache responses when running via pytest, which is actually probably a good thing.
This commit is contained in:
@ -1,11 +1,14 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import sys
|
||||
from datetime import timedelta
|
||||
|
||||
import pandas as pd
|
||||
import requests_cache
|
||||
from colorama import Fore
|
||||
|
||||
import csv_metadata_quality.check as check
|
||||
@ -84,6 +87,19 @@ def run(argv):
|
||||
else:
|
||||
exclude = list()
|
||||
|
||||
# enable transparent request cache with thirty days expiry
|
||||
expire_after = timedelta(days=30)
|
||||
# Allow overriding the location of the requests cache, just in case we are
|
||||
# running in an environment where we can't write to the current working di-
|
||||
# rectory (for example from csv-metadata-quality-web).
|
||||
REQUESTS_CACHE_DIR = os.environ.get("REQUESTS_CACHE_DIR", ".")
|
||||
requests_cache.install_cache(
|
||||
f"{REQUESTS_CACHE_DIR}/agrovoc-response-cache", expire_after=expire_after
|
||||
)
|
||||
|
||||
# prune old cache entries
|
||||
requests_cache.delete()
|
||||
|
||||
for column in df.columns:
|
||||
if column in exclude:
|
||||
print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")
|
||||
|
Reference in New Issue
Block a user