1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2025-06-06 17:11:45 +02:00

8 Commits

Author SHA1 Message Date
47c9bbc1d7 fix(deps): update dependency pandas to ~=2.3.0 2025-06-05 04:25:39 +00:00
753f3340a3 renovate.json: adjust managers
We can apparently use the pep621 manager to manage uv dependencies
now. And we need to disable the pip_requirements manager so the bot
doesn't try to update our requirements.txt file (which is exported
from uv).
2025-04-01 12:55:12 +03:00
188097abe4 Revert "csv_metadata_quality/fix.py: minor logic fix"
This reverts commit b7a81b8ec73a97076a473b0cfd21bddb18af9706.
2025-03-19 16:04:12 +03:00
b7a81b8ec7 csv_metadata_quality/fix.py: minor logic fix
Minor logic fix in missing regions.
2025-02-19 16:01:36 +03:00
8a2c567d1f Version 0.7.0 2025-01-31 10:12:43 +03:00
42eb9437e3 pyproject.toml: bump a few deps 2025-01-31 10:11:38 +03:00
5400bcb19b Remove pytest-clarity dependency
I think pytest itself has gotten much more readable over the years.
2025-01-31 10:10:33 +03:00
febea54f1b Remove poetry.lock
We switched to rye and then uv, so this is not needed.
2025-01-31 10:03:03 +03:00
6 changed files with 515 additions and 2483 deletions

View File

@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
## [0.7.0] - 2025-01-31
### Added
- Ability to normalize DOIs to https://doi.org URI format
@ -20,6 +20,7 @@ fields
- Install requests-cache in main run() function instead of check.agrovoc() function so we only incur the overhead once
- Use py3langid instead of langid, see: [How to make language detection with langid.py faster](https://adrien.barbaresi.eu/blog/language-detection-langid-py-faster.html)
- Use uv instead of rye
- Remove pytest-clarity — I think pytest itself has gotten much better in the past few years
### Updated
- Python dependencies, including Pandas 2.0.0 and [Arrow-backed dtypes](https://datapythonista.me/blog/pandas-20-and-the-arrow-revolution-part-i)

View File

@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-3.0-only
VERSION = "0.6.1"
VERSION = "0.7.0"

1903
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,19 +1,19 @@
[project]
name = "csv-metadata-quality"
version = "0.6.1"
version = "0.7.0"
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
authors = [
{ name = "Alan Orth", email = "alan.orth@gmail.com" }
]
license= { file = "LICENSE.txt" }
dependencies = [
"pandas[feather,performance]~=2.2",
"pandas[feather,performance]~=2.3.0",
"python-stdnum~=1.20",
"requests~=2.32",
"requests~=2.32.3",
"requests-cache~=1.2.1",
"colorama~=0.4",
"ftfy~=6.3.0",
"country-converter~=1.2",
"country-converter~=1.3",
"pycountry~=24.6.1",
"py3langid~=0.3",
]
@ -47,10 +47,9 @@ build-backend = "hatchling.build"
[dependency-groups]
dev = [
"pytest~=8.3",
"pytest-clarity~=1.0",
"isort~=6.0",
"csvkit~=2.0",
"ipython~=8.26",
"ipython~=8.31",
]
# So hatch doesn't try to build other top-level directories like "data"

View File

@ -3,7 +3,7 @@
"extends": [
"config:recommended"
],
"pep621": {
"enabled": false
"pip_requirements": {
"enabled": false
}
}

1075
uv.lock generated

File diff suppressed because it is too large Load Diff