1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-05-10 07:06:00 +02:00

Rework requests-cache

We should only be running this once per invocation, not for every
row we check. This should be more efficient, but it means that we
don't cache responses when running via pytest, which is actually
probably a good thing.
This commit is contained in:
2023-10-15 23:37:38 +03:00
parent b8241e919d
commit 1f637f32cd
3 changed files with 17 additions and 15 deletions

View File

@ -1,11 +1,14 @@
# SPDX-License-Identifier: GPL-3.0-only
import argparse
import os
import re
import signal
import sys
from datetime import timedelta
import pandas as pd
import requests_cache
from colorama import Fore
import csv_metadata_quality.check as check
@ -84,6 +87,19 @@ def run(argv):
else:
exclude = list()
# enable transparent request cache with thirty days expiry
expire_after = timedelta(days=30)
# Allow overriding the location of the requests cache, just in case we are
# running in an environment where we can't write to the current working di-
# rectory (for example from csv-metadata-quality-web).
REQUESTS_CACHE_DIR = os.environ.get("REQUESTS_CACHE_DIR", ".")
requests_cache.install_cache(
f"{REQUESTS_CACHE_DIR}/agrovoc-response-cache", expire_after=expire_after
)
# prune old cache entries
requests_cache.delete()
for column in df.columns:
if column in exclude:
print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")