mirror of
				https://github.com/ilri/csv-metadata-quality.git
				synced 2025-10-25 02:41:14 +02:00 
			
		
		
		
	Compare commits
	
		
			140 Commits
		
	
	
		
			34142c3e6b
			...
			renovate/a
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 33aa3ed586 | ||
| cdd4e60d6b | |||
| 9264a9a8fa | |||
|  | d7c6323a3f | ||
|  | 42079cb37d | ||
| 584c456176 | |||
|  | 1042e46c6b | ||
| 69653a13a6 | |||
| c6d196dee1 | |||
| 69b37c612d | |||
|  | 87978b8bc9 | ||
| 78a5c00636 | |||
|  | 4a6f142800 | ||
| 9ed493aa9d | |||
| bb459c963a | |||
|  | 23c16c57cb | ||
|  | 11f61342fb | ||
| 071c2ac03a | |||
| d051841de0 | |||
| 22b2e3b8ae | |||
| be550e21f1 | |||
| 753f3340a3 | |||
| 188097abe4 | |||
| b7a81b8ec7 | |||
| 8a2c567d1f | |||
| 42eb9437e3 | |||
| 5400bcb19b | |||
| febea54f1b | |||
| b5565124de | |||
| 2869919507 | |||
| f7d66947f7 | |||
| 1d701f4056 | |||
|  | 1e339609a6 | ||
| 2b0568de30 | |||
| 9903ada97a | |||
| d4b20e282c | |||
| 9785c18301 | |||
|  | de5e292f1a | ||
| 2675cd288e | |||
| 78dc1336d0 | |||
| 28bbb919ce | |||
| b1de9552a4 | |||
| 81e3ca3d9c | |||
| c470f8b375 | |||
| 0f45448517 | |||
| 7dd52ca491 | |||
| 92ff0ee51b | |||
| ae38a826ec | |||
| c1f630c298 | |||
| 82b056f0ea | |||
| 7fca981b95 | |||
| 1a9424197b | |||
| f6c6c94a1e | |||
| f500fac64b | |||
| 8143a7d978 | |||
| 94cec080d6 | |||
| 9402af1e30 | |||
| d71ff9082b | |||
| f309b694c4 | |||
| 4d879f6d13 | |||
| a30fefcd52 | |||
| 2341c56c40 | |||
| 5be2195325 | |||
| 736948ed2c | |||
| ee0b448355 | |||
| 4f3174a543 | |||
| d5c25f82fa | |||
| 7b3e2b4e68 | |||
| f92b2fe206 | |||
|  | df040b70c7 | ||
|  | 10bc8f3e14 | ||
| 7e6e92ecaa | |||
| a21ffb0fa8 | |||
| fb341dd9fa | |||
| 2e943ee4db | |||
| 6d3a9870d6 | |||
| 82ecf7119a | |||
|  | 1db21cf275 | ||
|  | bcd1408798 | ||
|  | ee8d255811 | ||
| 2cc2dbe952 | |||
| 940a325d61 | |||
| 59b3b307c9 | |||
| b305da3f0b | |||
|  | 96a486471c | ||
| 530cd5863b | |||
| f6018c51b6 | |||
| 80c3f5b45a | |||
| ba4637ea34 | |||
| 355428a691 | |||
|  | 58d4de973e | ||
| e1216dae3c | |||
|  | 6b650ff1b3 | ||
| fa7bde6fc0 | |||
|  | f89159fe32 | ||
|  | 02058c5a65 | ||
| 8fed6b71ff | |||
| b005b28cbe | |||
|  | c626290599 | ||
|  | 1a06470b64 | ||
| d46a81672e | |||
| 2a50e75082 | |||
| 0d45e73983 | |||
|  | 3611aab425 | ||
|  | 5c4ad0eb41 | ||
|  | f1f39722f6 | ||
| 1c03999582 | |||
| 1f637f32cd | |||
| b8241e919d | |||
| b8dc19cc3f | |||
| 93c9b739ac | |||
| 4ed2786703 | |||
|  | 8728789183 | ||
| bf90464809 | |||
| 1878002391 | |||
| d21d2621e3 | |||
| f3fb1ff7fb | |||
| 1fa81f7558 | |||
|  | 7409193b6b | ||
| a84fcf0b7b | |||
| 25ac290df4 | |||
| 3f52bad1e3 | |||
| 0208ad0ade | |||
|  | 3632ae0fc9 | ||
| 17d089cc6e | |||
| bc470a4343 | |||
| be609a809d | |||
| de3387ded7 | |||
| f343e87f0c | |||
| 7d3524fbd5 | |||
| c614b71a52 | |||
|  | d159a839f3 | ||
| 36e2ebe5f4 | |||
| 33f67b7a7c | |||
| c0e1448439 | |||
| 5d0804a08f | |||
| f01c9edf17 | |||
| 8d4295b2b3 | |||
| e2d46e9495 | |||
| 1491e1edb0 | 
							
								
								
									
										61
									
								
								.drone.yml
									
									
									
									
									
								
							
							
						
						
									
										61
									
								
								.drone.yml
									
									
									
									
									
								
							| @@ -1,61 +0,0 @@ | |||||||
| --- |  | ||||||
| kind: pipeline |  | ||||||
| type: docker |  | ||||||
| name: python310 |  | ||||||
|  |  | ||||||
| steps: |  | ||||||
| - name: test |  | ||||||
|   image: python:3.10-slim |  | ||||||
|   commands: |  | ||||||
|   - id |  | ||||||
|   - python -V |  | ||||||
|   - apt update && apt install -y gcc g++ libicu-dev pkg-config git |  | ||||||
|   - pip install -r requirements-dev.txt |  | ||||||
|   - pytest |  | ||||||
|   - python setup.py install |  | ||||||
|   # Basic test |  | ||||||
|   - csv-metadata-quality -i data/test.csv -o /tmp/test.csv |  | ||||||
|   # Basic test with unsafe fixes |  | ||||||
|   - csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u |  | ||||||
|   # Geography test |  | ||||||
|   - csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv |  | ||||||
|   # Geography test with unsafe fixes |  | ||||||
|   - csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u |  | ||||||
|   # Test with experimental checks |  | ||||||
|   - csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e |  | ||||||
|   # Test with AGROVOC validation |  | ||||||
|   - csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject |  | ||||||
|   # Test with AGROVOC validation (and dropping invalid) |  | ||||||
|   - csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d |  | ||||||
|  |  | ||||||
| --- |  | ||||||
| kind: pipeline |  | ||||||
| type: docker |  | ||||||
| name: python39 |  | ||||||
|  |  | ||||||
| steps: |  | ||||||
| - name: test |  | ||||||
|   image: python:3.9-slim |  | ||||||
|   commands: |  | ||||||
|   - id |  | ||||||
|   - python -V |  | ||||||
|   - apt update && apt install -y gcc g++ libicu-dev pkg-config git |  | ||||||
|   - pip install -r requirements-dev.txt |  | ||||||
|   - pytest |  | ||||||
|   - python setup.py install |  | ||||||
|   # Basic test |  | ||||||
|   - csv-metadata-quality -i data/test.csv -o /tmp/test.csv |  | ||||||
|   # Basic test with unsafe fixes |  | ||||||
|   - csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u |  | ||||||
|   # Geography test |  | ||||||
|   - csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv |  | ||||||
|   # Geography test with unsafe fixes |  | ||||||
|   - csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u |  | ||||||
|   # Test with experimental checks |  | ||||||
|   - csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e |  | ||||||
|   # Test with AGROVOC validation |  | ||||||
|   - csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject |  | ||||||
|   # Test with AGROVOC validation (and dropping invalid) |  | ||||||
|   - csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d |  | ||||||
|  |  | ||||||
| # vim: ts=2 sw=2 et |  | ||||||
							
								
								
									
										38
									
								
								.github/workflows/python-app.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										38
									
								
								.github/workflows/python-app.yml
									
									
									
									
										vendored
									
									
								
							| @@ -12,40 +12,26 @@ on: | |||||||
| jobs: | jobs: | ||||||
|   build: |   build: | ||||||
|  |  | ||||||
|     runs-on: ubuntu-22.04 |     runs-on: ubuntu-latest | ||||||
|  |  | ||||||
|     steps: |     steps: | ||||||
|     - uses: actions/checkout@v3 |     - uses: actions/checkout@v5 | ||||||
|     - name: Set up Python 3.10 |     - name: Install uv | ||||||
|       uses: actions/setup-python@v4 |       uses: astral-sh/setup-uv@v7 | ||||||
|       with: |       with: | ||||||
|         python-version: '3.10' |         version: 'latest' | ||||||
|         cache: 'pip' |     - run: uv sync | ||||||
|     - name: Install dependencies |  | ||||||
|       run: | |  | ||||||
|         python -m pip install --upgrade pip |  | ||||||
|         pip install flake8 pytest |  | ||||||
|         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi |  | ||||||
|         if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi |  | ||||||
|     - name: Lint with flake8 |  | ||||||
|       run: | |  | ||||||
|         # stop the build if there are Python syntax errors or undefined names |  | ||||||
|         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics |  | ||||||
|         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide |  | ||||||
|         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics |  | ||||||
|     - name: Test with pytest |     - name: Test with pytest | ||||||
|       run: | |       run: uv run pytest | ||||||
|         pytest |  | ||||||
|     - name: Test CLI |     - name: Test CLI | ||||||
|       run: | |       run: | | ||||||
|         python setup.py install |  | ||||||
|         # Basic test |         # Basic test | ||||||
|         csv-metadata-quality -i data/test.csv -o /tmp/test.csv |         uv run csv-metadata-quality -i data/test.csv -o /tmp/test.csv | ||||||
|         # Test with unsafe fixes |         # Test with unsafe fixes | ||||||
|         csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u |         uv run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u | ||||||
|         # Test with experimental checks |         # Test with experimental checks | ||||||
|         csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e |         uv run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e | ||||||
|         # Test with AGROVOC validation |         # Test with AGROVOC validation | ||||||
|         csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject |         uv run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject | ||||||
|         # Test with AGROVOC validation (and dropping invalid) |         # Test with AGROVOC validation (and dropping invalid) | ||||||
|         csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d |         uv run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								.python-version
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.python-version
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | 3.13 | ||||||
							
								
								
									
										16
									
								
								CHANGELOG.md
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								CHANGELOG.md
									
									
									
									
									
								
							| @@ -5,14 +5,30 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), | |||||||
| and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). | ||||||
|  |  | ||||||
| ## Unreleased | ## Unreleased | ||||||
|  | ### Changed | ||||||
|  | - New AGROVOC REST API URL | ||||||
|  |  | ||||||
|  | ## [0.7.0] - 2025-01-31 | ||||||
|  | ### Added | ||||||
|  | - Ability to normalize DOIs to https://doi.org URI format | ||||||
|  |  | ||||||
| ### Fixed | ### Fixed | ||||||
| - Fixed regex so we don't run the invalid multi-value separator fix on | - Fixed regex so we don't run the invalid multi-value separator fix on | ||||||
| `dcterms.bibliographicCitation` fields | `dcterms.bibliographicCitation` fields | ||||||
| - Fixed regex so we run the comma space fix on `dcterms.bibliographicCitation` | - Fixed regex so we run the comma space fix on `dcterms.bibliographicCitation` | ||||||
| fields | fields | ||||||
|  | - Don't crash the country/region checker/fixer when a title field is missing | ||||||
|  |  | ||||||
|  | ### Changed | ||||||
|  | - Don't run newline fix on description fields | ||||||
|  | - Install requests-cache in main run() function instead of check.agrovoc() function so we only incur the overhead once | ||||||
|  | - Use py3langid instead of langid, see: [How to make language detection with langid.py faster](https://adrien.barbaresi.eu/blog/language-detection-langid-py-faster.html) | ||||||
|  | - Use uv instead of rye | ||||||
|  | - Remove pytest-clarity — I think pytest itself has gotten much better in the past few years | ||||||
|  |  | ||||||
| ### Updated | ### Updated | ||||||
| - Python dependencies, including Pandas 2.0.0 and [Arrow-backed dtypes](https://datapythonista.me/blog/pandas-20-and-the-arrow-revolution-part-i) | - Python dependencies, including Pandas 2.0.0 and [Arrow-backed dtypes](https://datapythonista.me/blog/pandas-20-and-the-arrow-revolution-part-i) | ||||||
|  | - SPDX license list | ||||||
|  |  | ||||||
| ## [0.6.1] - 2023-02-23 | ## [0.6.1] - 2023-02-23 | ||||||
| ### Fixed | ### Fixed | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | include csv_metadata_quality/data/licenses.json | ||||||
							
								
								
									
										15
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,7 +1,6 @@ | |||||||
| <h1 align="center">DSpace CSV Metadata Quality Checker</h1> | <h1 align="center">DSpace CSV Metadata Quality Checker</h1> | ||||||
|  |  | ||||||
| <p align="center"> | <p align="center"> | ||||||
|   <a href="https://ci.mjanja.ch/alanorth/csv-metadata-quality"><img alt="Build Status" src="https://ci.mjanja.ch/api/badges/alanorth/csv-metadata-quality/status.svg"></a> |  | ||||||
|   <a href="https://github.com/ilri/csv-metadata-quality/actions"><img alt="Build and Test" src="https://github.com/ilri/csv-metadata-quality/workflows/Build%20and%20Test/badge.svg"></a> |   <a href="https://github.com/ilri/csv-metadata-quality/actions"><img alt="Build and Test" src="https://github.com/ilri/csv-metadata-quality/workflows/Build%20and%20Test/badge.svg"></a> | ||||||
|   <a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a> |   <a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a> | ||||||
| </p> | </p> | ||||||
| @@ -31,24 +30,25 @@ If you use the DSpace CSV metadata quality checker please cite: | |||||||
| - Check for countries with missing regions (and attempt to fix with `--unsafe-fixes`) | - Check for countries with missing regions (and attempt to fix with `--unsafe-fixes`) | ||||||
| - Remove duplicate metadata values | - Remove duplicate metadata values | ||||||
| - Check for duplicate items, using the title, type, and date issued as an indicator | - Check for duplicate items, using the title, type, and date issued as an indicator | ||||||
|  | - [Normalize DOIs](https://www.crossref.org/documentation/member-setup/constructing-your-dois/) to https://doi.org URI format | ||||||
|  |  | ||||||
| ## Installation | ## Installation | ||||||
| The easiest way to install CSV Metadata Quality is with [poetry](https://python-poetry.org): | The easiest way to install CSV Metadata Quality is with [uv](https://docs.astral.sh/uv/): | ||||||
|  |  | ||||||
| ``` | ``` | ||||||
| $ git clone https://github.com/ilri/csv-metadata-quality.git | $ git clone https://github.com/ilri/csv-metadata-quality.git | ||||||
| $ cd csv-metadata-quality | $ cd csv-metadata-quality | ||||||
| $ poetry install | $ uv sync | ||||||
| $ poetry shell | $ source .venv/bin/activate | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| Otherwise, if you don't have poetry, you can use a vanilla Python virtual environment: | Otherwise, if you don't have uv, you can use a vanilla Python virtual environment: | ||||||
|  |  | ||||||
| ``` | ``` | ||||||
| $ git clone https://github.com/ilri/csv-metadata-quality.git | $ git clone https://github.com/ilri/csv-metadata-quality.git | ||||||
| $ cd csv-metadata-quality | $ cd csv-metadata-quality | ||||||
| $ python3 -m venv venv | $ python3 -m venv .venv | ||||||
| $ source venv/bin/activate | $ source .venv/bin/activate | ||||||
| $ pip install -r requirements.txt | $ pip install -r requirements.txt | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| @@ -125,7 +125,6 @@ This currently uses the [Python langid](https://github.com/saffsd/langid.py) lib | |||||||
| - Better logging, for example with INFO, WARN, and ERR levels | - Better logging, for example with INFO, WARN, and ERR levels | ||||||
| - Verbose, debug, or quiet options | - Verbose, debug, or quiet options | ||||||
| - Warn if an author is shorter than 3 characters? | - Warn if an author is shorter than 3 characters? | ||||||
| - Validate DOIs? Normalize to https://doi.org format? Or use just the DOI part: 10.1016/j.worlddev.2010.06.006 |  | ||||||
| - Warn if two items use the same file in `filename` column | - Warn if two items use the same file in `filename` column | ||||||
| - Add tests for application invocation, ie `tests/test_app.py`? | - Add tests for application invocation, ie `tests/test_app.py`? | ||||||
| - Validate ISSNs or journal titles against CrossRef API? | - Validate ISSNs or journal titles against CrossRef API? | ||||||
|   | |||||||
| @@ -37,3 +37,7 @@ Mojibake,2021-03-18,,,,Publicaçao CIAT,,,,Report,,,, | |||||||
| Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",,, | Title missing from citation,2021-12-05,,,,,,,,,"Orth, A. 2021. Title missing f rom citation.",,, | ||||||
| Country missing region,2021-12-08,,,,,Kenya,,,,,,, | Country missing region,2021-12-08,,,,,Kenya,,,,,,, | ||||||
| Subregion field shouldn’t trigger region checks,2022-12-07,,,,,Kenya,,,,,,Eastern Africa,Baringo | Subregion field shouldn’t trigger region checks,2022-12-07,,,,,Kenya,,,,,,Eastern Africa,Baringo | ||||||
|  | DOI with HTTP and dx.doi.org,2024-04-23,,,,,,,,,,http://dx.doi.org/10.1016/j.envc.2023.100794,, | ||||||
|  | DOI with colon,2024-04-23,,,,,,,,,,doi: 10.11648/j.jps.20140201.14,, | ||||||
|  | Upper case bare DOI,2024-04-23,,,,,,,,,,10.19103/AS.2018.0043.16,, | ||||||
|  | DOI with %2f,2024-06-25,,,,,,,,,,https://doi.org/10.1016%2fj.envc.2023.100794,, | ||||||
|   | |||||||
| 
 | 
							
								
								
									
										1608
									
								
								poetry.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										1608
									
								
								poetry.lock
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,40 +1,55 @@ | |||||||
| [tool.poetry] | [project] | ||||||
| name = "csv-metadata-quality" | name = "csv-metadata-quality" | ||||||
| version = "0.6.1" | version = "0.7.0" | ||||||
| description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem." | description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem." | ||||||
| authors = ["Alan Orth <alan.orth@gmail.com>"] | authors = [ | ||||||
| license="GPL-3.0-only" |     { name = "Alan Orth", email = "alan.orth@gmail.com" } | ||||||
|  | ] | ||||||
|  | license= { file = "LICENSE.txt" } | ||||||
|  | dependencies = [ | ||||||
|  |     "pandas[feather,performance]~=2.3.1", | ||||||
|  |     "python-stdnum~=2.1", | ||||||
|  |     "requests~=2.32.3", | ||||||
|  |     "requests-cache~=1.2.1", | ||||||
|  |     "colorama~=0.4", | ||||||
|  |     "ftfy~=6.3.0", | ||||||
|  |     "country-converter~=1.3", | ||||||
|  |     "pycountry~=24.6.1", | ||||||
|  |     "py3langid~=0.3", | ||||||
|  | ] | ||||||
|  | readme = "README.md" | ||||||
|  | requires-python = ">= 3.10" | ||||||
|  |  | ||||||
|  | classifiers = [ | ||||||
|  |   "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", | ||||||
|  |   "Natural Language :: English", | ||||||
|  |   "Operating System :: OS Independent", | ||||||
|  |   "Programming Language :: Python :: 3.10", | ||||||
|  |   "Programming Language :: Python :: 3.11", | ||||||
|  |   "Programming Language :: Python :: 3.12", | ||||||
|  |   "Programming Language :: Python :: 3.13", | ||||||
|  |   "Programming Language :: Python :: Implementation :: CPython", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [project.urls] | ||||||
| repository = "https://github.com/ilri/csv-metadata-quality" | repository = "https://github.com/ilri/csv-metadata-quality" | ||||||
| homepage = "https://github.com/ilri/csv-metadata-quality" | homepage = "https://github.com/ilri/csv-metadata-quality" | ||||||
|  |  | ||||||
| [tool.poetry.scripts] | [project.scripts] | ||||||
| csv-metadata-quality = 'csv_metadata_quality.__main__:main' | csv-metadata-quality = 'csv_metadata_quality.__main__:main' | ||||||
|  |  | ||||||
| [tool.poetry.dependencies] | # See: https://docs.astral.sh/uv/concepts/build-backend/ | ||||||
| python = "^3.9" |  | ||||||
| pandas = "^2.0.0" |  | ||||||
| python-stdnum = "^1.18" |  | ||||||
| requests = "^2.28.2" |  | ||||||
| requests-cache = "^0.9.8" |  | ||||||
| langid = "^1.1.6" |  | ||||||
| colorama = "^0.4.6" |  | ||||||
| ftfy = "^6.1.1" |  | ||||||
| country-converter = "~1.0.0" |  | ||||||
| pycountry = {git = "https://github.com/alanorth/pycountry", rev = "iso-codes-4.13.0"} |  | ||||||
| pyarrow = "^11.0.0" |  | ||||||
|  |  | ||||||
| [tool.poetry.group.dev.dependencies] |  | ||||||
| pytest = "^7.2.1" |  | ||||||
| flake8 = "^6.0.0" |  | ||||||
| pytest-clarity = "^1.0.1" |  | ||||||
| black = "^23.1.0" |  | ||||||
| isort = "^5.12.0" |  | ||||||
| csvkit = "^1.1.0" |  | ||||||
| ipython = "^8.10.0" |  | ||||||
|  |  | ||||||
| [build-system] | [build-system] | ||||||
| requires = ["poetry>=0.12"] | requires = ["uv_build>=0.8.8,<0.9.0"] | ||||||
| build-backend = "poetry.masonry.api" | build-backend = "uv_build" | ||||||
|  |  | ||||||
|  | [dependency-groups] | ||||||
|  | dev = [ | ||||||
|  |     "pytest~=8.3", | ||||||
|  |     "isort~=6.0", | ||||||
|  |     "csvkit~=2.0", | ||||||
|  |     "ipython~=8.31", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [tool.isort] | [tool.isort] | ||||||
| profile = "black" | profile = "black" | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								renovate.json5
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								renovate.json5
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | |||||||
|  | { | ||||||
|  |   $schema: "https://docs.renovatebot.com/renovate-schema.json", | ||||||
|  |   lockFileMaintenance: { | ||||||
|  |     enabled: true, | ||||||
|  |   }, | ||||||
|  |   pip_requirements: { | ||||||
|  |     enabled: false | ||||||
|  |   } | ||||||
|  | } | ||||||
| @@ -1,82 +0,0 @@ | |||||||
| agate-dbf==0.2.2 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| agate-excel==0.2.5 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| agate-sql==0.5.9 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| agate==1.7.1 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| appnope==0.1.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "darwin" |  | ||||||
| asttokens==2.2.1 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| babel==2.12.1 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| backcall==0.2.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| black==23.3.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4" |  | ||||||
| charset-normalizer==3.1.0 ; python_version >= "3.9" and python_version < "4" |  | ||||||
| click==8.1.3 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| country-converter==1.0.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| csvkit==1.1.1 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| dbfread==2.0.7 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| et-xmlfile==1.1.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| exceptiongroup==1.1.1 ; python_version >= "3.9" and python_version < "3.11" |  | ||||||
| executing==1.2.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| flake8==6.0.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4" |  | ||||||
| greenlet==2.0.2 ; python_version >= "3.9" and platform_machine == "aarch64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "ppc64le" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "x86_64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "amd64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "AMD64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "win32" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "WIN32" and python_version < "4.0" |  | ||||||
| idna==3.4 ; python_version >= "3.9" and python_version < "4" |  | ||||||
| iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| ipython==8.12.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| isodate==0.6.1 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| isort==5.12.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| jedi==0.18.2 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| leather==0.3.4 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| markdown-it-py==2.2.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| matplotlib-inline==0.1.6 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| mccabe==0.7.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| numpy==1.24.2 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| olefile==0.46 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| openpyxl==3.1.2 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| packaging==23.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pandas==2.0.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| parsedatetime==2.6 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pathspec==0.11.1 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pexpect==4.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32" |  | ||||||
| pickleshare==0.7.5 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| platformdirs==3.2.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pluggy==1.0.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pprintpp==0.4.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| prompt-toolkit==3.0.38 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32" |  | ||||||
| pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pyarrow==11.0.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pycodestyle==2.10.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.13.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pyflakes==3.0.1 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pygments==2.14.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pytest-clarity==1.0.1 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pytest==7.2.2 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| python-slugify==8.0.1 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pytimeparse==1.1.8 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| pytz==2023.3 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| requests==2.28.2 ; python_version >= "3.9" and python_version < "4" |  | ||||||
| rich==13.3.3 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| six==1.16.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| sqlalchemy==1.4.47 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| stack-data==0.6.2 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| text-unidecode==1.3 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11" |  | ||||||
| traitlets==5.9.0 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| typing-extensions==4.5.0 ; python_version >= "3.9" and python_version < "3.10" |  | ||||||
| tzdata==2023.3 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
| urllib3==1.26.15 ; python_version >= "3.9" and python_version < "4" |  | ||||||
| wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4" |  | ||||||
| xlrd==2.0.1 ; python_version >= "3.9" and python_version < "4.0" |  | ||||||
							
								
								
									
										448
									
								
								requirements.txt
									
									
									
									
									
								
							
							
						
						
									
										448
									
								
								requirements.txt
									
									
									
									
									
								
							| @@ -1,25 +1,423 @@ | |||||||
| appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0" | # This file was autogenerated by uv via the following command: | ||||||
| attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0" | #    uv export --no-dev | ||||||
| cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0" | -e . | ||||||
| certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4" | attrs==25.3.0 \ | ||||||
| charset-normalizer==3.1.0 ; python_version >= "3.9" and python_version < "4" |     --hash=sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3 \ | ||||||
| colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b | ||||||
| country-converter==1.0.0 ; python_version >= "3.9" and python_version < "4.0" |     # via | ||||||
| exceptiongroup==1.1.1 ; python_version >= "3.9" and python_version < "3.11" |     #   cattrs | ||||||
| ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4" |     #   requests-cache | ||||||
| idna==3.4 ; python_version >= "3.9" and python_version < "4" | bottleneck==1.5.0 \ | ||||||
| langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:049162927cf802208cc8691fb99b108afe74656cdc96b9e2067cf56cb9d84056 \ | ||||||
| numpy==1.24.2 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:07c2c1aa39917b5c9be77e85791aa598e8b2c00f8597a198b93628bbfde72a3f \ | ||||||
| pandas==2.0.0 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:0dca825048a3076f34c4a35409e3277b31ceeb3cbb117bbe2a13ff5c214bcabc \ | ||||||
| pyarrow==11.0.0 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:1043d95674566063f638582cc8700c24c4427f532f86b9e7cfc9f9ec84abc1ff \ | ||||||
| pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.13.0 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:1214a2bf3b36c66e3898aab821ad8366a3062db6f83a8f083e2f799d202e86ea \ | ||||||
| python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:1648f2a0d52b78f6e530385862e279ffa66baae2ce038bfdf5d8b29a638bac46 \ | ||||||
| python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:16fead35c0b5d307815997eef67d03c2151f255ca889e0fc3d68703f41aa5302 \ | ||||||
| pytz==2023.3 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:27e38e829497ca0a5eebdb79d3293aaa424f3c31c13806e5c607fd414536b7c3 \ | ||||||
| requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:2f5e863a4fdaf9c85416789aeb333d1cdd3603037fd854ad58b0e2ac73be16cf \ | ||||||
| requests==2.28.2 ; python_version >= "3.9" and python_version < "4" |     --hash=sha256:3886799cceb271eb67d057f6ecb13fb4582bda17a3b13b4fa0334638c59637c6 \ | ||||||
| six==1.16.0 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:3f3e308416886e29441a0b71bce8f3eb4c7a4943be541fd918244aaf25534d36 \ | ||||||
| tzdata==2023.3 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:436a402f0d60a9d6541d7adb0929501225a151ad03b96b756e0b607db6a106f1 \ | ||||||
| url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0" |     --hash=sha256:48c2657102f3288e178cc341f000475a32f49a3cd8b7067e091d5446fa899383 \ | ||||||
| urllib3==1.26.15 ; python_version >= "3.9" and python_version < "4" |     --hash=sha256:5c4c94cfcba46adfe71894c63c4b186c847965e73727dbaf5fd9ade41ef38e6e \ | ||||||
| wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4" |     --hash=sha256:613165ce39bf6bd80f5307da0f05842ba534b213a89526f1eba82ea0099592fc \ | ||||||
|  |     --hash=sha256:7962177b04b865b17e883ace01c68cf50353ef6a9437ec01bad1f5a1a2708490 \ | ||||||
|  |     --hash=sha256:7967e0189defe9f49025bd6469ff0fe22af5463926af55c7ba1e4592051d8ef8 \ | ||||||
|  |     --hash=sha256:80ef9eea2a92fc5a1c04734aa1bcf317253241062c962eaa6e7f123b583d0109 \ | ||||||
|  |     --hash=sha256:816c910c5d1fb53adb32581c52a513b206f503ae253ace70cb32d1fe4e45af1d \ | ||||||
|  |     --hash=sha256:8892f2d90d63a3dd5884e8f3fe7bbe8c569851a984023340ef926d2205332d96 \ | ||||||
|  |     --hash=sha256:8d123762f78717fc35ecf10cad45d08273fcb12ab40b3c847190b83fec236f03 \ | ||||||
|  |     --hash=sha256:97285cfedf3545d9a010b2db2123f9750bf920081e29364cc465052973bd0b5a \ | ||||||
|  |     --hash=sha256:9be5dfdf1a662d1d4423d7b7e8dd9a1b7046dcc2ce67b6e94a31d1cc57a8558f \ | ||||||
|  |     --hash=sha256:9ca39aca62f0e827fc8c9b352352224ecb38a98d8f9cbc30f071672c31904aa2 \ | ||||||
|  |     --hash=sha256:a107ed8b5f998918c24a1e476dbd2dfc3514ab0082df7132c460b01e6ffd8cf4 \ | ||||||
|  |     --hash=sha256:abc6a24a41f55765215005cec97dd69f41ac747ed0f4d446caa508531957eeda \ | ||||||
|  |     --hash=sha256:bda7c475d4a7e271dbd0b1d4bbce29065edc8891361857105b7212fe383c9a36 \ | ||||||
|  |     --hash=sha256:c15a5f009ea72f95d0a35e784c6944af2b6d7dab102341fb3c3412e41ce5adf6 \ | ||||||
|  |     --hash=sha256:c860242cf20e69d5aab2ec3c5d6c8c2a15f19e4b25b28b8fca2c2a12cefae9d8 \ | ||||||
|  |     --hash=sha256:dbb0f0d38feda63050aa253cf9435e81a0ecfac954b0df84896636be9eabd9b6 \ | ||||||
|  |     --hash=sha256:dc8d553d4bf033d3e025cd32d4c034d2daf10709e31ced3909811d1c843e451c \ | ||||||
|  |     --hash=sha256:f13b644207118564b95eb7b2130555fb4a4b2266a739b2a8f98a5276baa723ea \ | ||||||
|  |     --hash=sha256:f218e4dae6511180dcc4f06d8300e0c81e7f3df382091f464c5a919d289fab8e \ | ||||||
|  |     --hash=sha256:f26005740e6ef6013eba8a48241606a963e862a601671eab064b7835cd12ef3d \ | ||||||
|  |     --hash=sha256:f9545206daaffaecf88d176f657b7c939f6d909275991121dc8dee936dcd8985 \ | ||||||
|  |     --hash=sha256:fc0c0b661005b059fcb09988f8b5e2cd5e9c702e1bed24819ed38f85145140b5 | ||||||
|  |     # via pandas | ||||||
|  | cattrs==25.1.1 \ | ||||||
|  |     --hash=sha256:1b40b2d3402af7be79a7e7e097a9b4cd16d4c06e6d526644b0b26a063a1cc064 \ | ||||||
|  |     --hash=sha256:c914b734e0f2d59e5b720d145ee010f1fd9a13ee93900922a2f3f9d593b8382c | ||||||
|  |     # via requests-cache | ||||||
|  | certifi==2025.6.15 \ | ||||||
|  |     --hash=sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057 \ | ||||||
|  |     --hash=sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b | ||||||
|  |     # via requests | ||||||
|  | charset-normalizer==3.4.2 \ | ||||||
|  |     --hash=sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7 \ | ||||||
|  |     --hash=sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0 \ | ||||||
|  |     --hash=sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7 \ | ||||||
|  |     --hash=sha256:18dd2e350387c87dabe711b86f83c9c78af772c748904d372ade190b5c7c9d4d \ | ||||||
|  |     --hash=sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0 \ | ||||||
|  |     --hash=sha256:21b2899062867b0e1fde9b724f8aecb1af14f2778d69aacd1a5a1853a597a5db \ | ||||||
|  |     --hash=sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b \ | ||||||
|  |     --hash=sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8 \ | ||||||
|  |     --hash=sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff \ | ||||||
|  |     --hash=sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e \ | ||||||
|  |     --hash=sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148 \ | ||||||
|  |     --hash=sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a \ | ||||||
|  |     --hash=sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e \ | ||||||
|  |     --hash=sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63 \ | ||||||
|  |     --hash=sha256:5bf4545e3b962767e5c06fe1738f951f77d27967cb2caa64c28be7c4563e162c \ | ||||||
|  |     --hash=sha256:68a328e5f55ec37c57f19ebb1fdc56a248db2e3e9ad769919a58672958e8f366 \ | ||||||
|  |     --hash=sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5 \ | ||||||
|  |     --hash=sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c \ | ||||||
|  |     --hash=sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b \ | ||||||
|  |     --hash=sha256:7a6ab32f7210554a96cd9e33abe3ddd86732beeafc7a28e9955cdf22ffadbab0 \ | ||||||
|  |     --hash=sha256:7c48ed483eb946e6c04ccbe02c6b4d1d48e51944b6db70f697e089c193404941 \ | ||||||
|  |     --hash=sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0 \ | ||||||
|  |     --hash=sha256:8075c35cd58273fee266c58c0c9b670947c19df5fb98e7b66710e04ad4e9ff86 \ | ||||||
|  |     --hash=sha256:8755483f3c00d6c9a77f490c17e6ab0c8729e39e6390328e42521ef175380ae6 \ | ||||||
|  |     --hash=sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0 \ | ||||||
|  |     --hash=sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1 \ | ||||||
|  |     --hash=sha256:9cbfacf36cb0ec2897ce0ebc5d08ca44213af24265bd56eca54bee7923c48fd6 \ | ||||||
|  |     --hash=sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981 \ | ||||||
|  |     --hash=sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c \ | ||||||
|  |     --hash=sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980 \ | ||||||
|  |     --hash=sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645 \ | ||||||
|  |     --hash=sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7 \ | ||||||
|  |     --hash=sha256:b2d318c11350e10662026ad0eb71bb51c7812fc8590825304ae0bdd4ac283acd \ | ||||||
|  |     --hash=sha256:b33de11b92e9f75a2b545d6e9b6f37e398d86c3e9e9653c4864eb7e89c5773ef \ | ||||||
|  |     --hash=sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2 \ | ||||||
|  |     --hash=sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d \ | ||||||
|  |     --hash=sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3 \ | ||||||
|  |     --hash=sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd \ | ||||||
|  |     --hash=sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214 \ | ||||||
|  |     --hash=sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd \ | ||||||
|  |     --hash=sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a \ | ||||||
|  |     --hash=sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c \ | ||||||
|  |     --hash=sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f \ | ||||||
|  |     --hash=sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28 \ | ||||||
|  |     --hash=sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691 \ | ||||||
|  |     --hash=sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82 \ | ||||||
|  |     --hash=sha256:e8082b26888e2f8b36a042a58307d5b917ef2b1cacab921ad3323ef91901c71a \ | ||||||
|  |     --hash=sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf \ | ||||||
|  |     --hash=sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b \ | ||||||
|  |     --hash=sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9 \ | ||||||
|  |     --hash=sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544 \ | ||||||
|  |     --hash=sha256:f69a27e45c43520f5487f27627059b64aaf160415589230992cec34c5e18a509 \ | ||||||
|  |     --hash=sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a \ | ||||||
|  |     --hash=sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f | ||||||
|  |     # via requests | ||||||
|  | colorama==0.4.6 \ | ||||||
|  |     --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ | ||||||
|  |     --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 | ||||||
|  |     # via csv-metadata-quality | ||||||
|  | country-converter==1.3 \ | ||||||
|  |     --hash=sha256:006958c83adeada455d2f178921fdd051def736259ff250fada912eaf3ca8cf1 \ | ||||||
|  |     --hash=sha256:f6a1a14d1f98112ca90a5198f645f4e60bb73840e98f3f733893ff5b617c2f38 | ||||||
|  |     # via csv-metadata-quality | ||||||
|  | exceptiongroup==1.3.0 ; python_full_version < '3.11' \ | ||||||
|  |     --hash=sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10 \ | ||||||
|  |     --hash=sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88 | ||||||
|  |     # via cattrs | ||||||
|  | ftfy==6.3.1 \ | ||||||
|  |     --hash=sha256:7c70eb532015cd2f9adb53f101fb6c7945988d023a085d127d1573dc49dd0083 \ | ||||||
|  |     --hash=sha256:9b3c3d90f84fb267fe64d375a07b7f8912d817cf86009ae134aa03e1819506ec | ||||||
|  |     # via csv-metadata-quality | ||||||
|  | idna==3.10 \ | ||||||
|  |     --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ | ||||||
|  |     --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 | ||||||
|  |     # via | ||||||
|  |     #   requests | ||||||
|  |     #   url-normalize | ||||||
|  | llvmlite==0.44.0 \ | ||||||
|  |     --hash=sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4 \ | ||||||
|  |     --hash=sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad \ | ||||||
|  |     --hash=sha256:2fb7c4f2fb86cbae6dca3db9ab203eeea0e22d73b99bc2341cdf9de93612e930 \ | ||||||
|  |     --hash=sha256:319bddd44e5f71ae2689859b7203080716448a3cd1128fb144fe5c055219d516 \ | ||||||
|  |     --hash=sha256:40526fb5e313d7b96bda4cbb2c85cd5374e04d80732dd36a282d72a560bb6408 \ | ||||||
|  |     --hash=sha256:41e3839150db4330e1b2716c0be3b5c4672525b4c9005e17c7597f835f351ce2 \ | ||||||
|  |     --hash=sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf \ | ||||||
|  |     --hash=sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db \ | ||||||
|  |     --hash=sha256:7202b678cdf904823c764ee0fe2dfe38a76981f4c1e51715b4cb5abb6cf1d9e8 \ | ||||||
|  |     --hash=sha256:9c58867118bad04a0bb22a2e0068c693719658105e40009ffe95c7000fcde88e \ | ||||||
|  |     --hash=sha256:9fbadbfba8422123bab5535b293da1cf72f9f478a65645ecd73e781f962ca614 \ | ||||||
|  |     --hash=sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc \ | ||||||
|  |     --hash=sha256:ace564d9fa44bb91eb6e6d8e7754977783c68e90a471ea7ce913bff30bd62427 \ | ||||||
|  |     --hash=sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9 \ | ||||||
|  |     --hash=sha256:c5d22c3bfc842668168a786af4205ec8e3ad29fb1bc03fd11fd48460d0df64c1 \ | ||||||
|  |     --hash=sha256:cccf8eb28f24840f2689fb1a45f9c0f7e582dd24e088dcf96e424834af11f791 \ | ||||||
|  |     --hash=sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d \ | ||||||
|  |     --hash=sha256:d8489634d43c20cd0ad71330dde1d5bc7b9966937a263ff1ec1cebb90dc50955 \ | ||||||
|  |     --hash=sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1 \ | ||||||
|  |     --hash=sha256:eed7d5f29136bda63b6d7804c279e2b72e08c952b7c5df61f45db408e0ee52f3 \ | ||||||
|  |     --hash=sha256:f01a394e9c9b7b1d4e63c327b096d10f6f0ed149ef53d38a09b3749dcf8c9610 | ||||||
|  |     # via numba | ||||||
|  | numba==0.61.2 \ | ||||||
|  |     --hash=sha256:34fba9406078bac7ab052efbf0d13939426c753ad72946baaa5bf9ae0ebb8dd2 \ | ||||||
|  |     --hash=sha256:3945615cd73c2c7eba2a85ccc9c1730c21cd3958bfcf5a44302abae0fb07bb60 \ | ||||||
|  |     --hash=sha256:3a10a8fc9afac40b1eac55717cece1b8b1ac0b946f5065c89e00bde646b5b154 \ | ||||||
|  |     --hash=sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd \ | ||||||
|  |     --hash=sha256:49c980e4171948ffebf6b9a2520ea81feed113c1f4890747ba7f59e74be84b1b \ | ||||||
|  |     --hash=sha256:4ddce10009bc097b080fc96876d14c051cc0c7679e99de3e0af59014dab7dfe8 \ | ||||||
|  |     --hash=sha256:59321215e2e0ac5fa928a8020ab00b8e57cda8a97384963ac0dfa4d4e6aa54e7 \ | ||||||
|  |     --hash=sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546 \ | ||||||
|  |     --hash=sha256:5f154aaea625fb32cfbe3b80c5456d514d416fcdf79733dd69c0df3a11348e9e \ | ||||||
|  |     --hash=sha256:76bcec9f46259cedf888041b9886e257ae101c6268261b19fda8cfbc52bec9d1 \ | ||||||
|  |     --hash=sha256:7d3bcada3c9afba3bed413fba45845f2fb9cd0d2b27dd58a1be90257e293d140 \ | ||||||
|  |     --hash=sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d \ | ||||||
|  |     --hash=sha256:97cf4f12c728cf77c9c1d7c23707e4d8fb4632b46275f8f3397de33e5877af18 \ | ||||||
|  |     --hash=sha256:ae45830b129c6137294093b269ef0a22998ccc27bf7cf096ab8dcf7bca8946f9 \ | ||||||
|  |     --hash=sha256:ae8c7a522c26215d5f62ebec436e3d341f7f590079245a2f1008dfd498cc1642 \ | ||||||
|  |     --hash=sha256:bbfdf4eca202cebade0b7d43896978e146f39398909a42941c9303f82f403a18 \ | ||||||
|  |     --hash=sha256:bd1e74609855aa43661edffca37346e4e8462f6903889917e9f41db40907daa2 \ | ||||||
|  |     --hash=sha256:bdbca73ad81fa196bd53dc12e3aaf1564ae036e0c125f237c7644fe64a4928ab \ | ||||||
|  |     --hash=sha256:cf9f9fc00d6eca0c23fc840817ce9f439b9f03c8f03d6246c0e7f0cb15b7162a \ | ||||||
|  |     --hash=sha256:ea0247617edcb5dd61f6106a56255baab031acc4257bddaeddb3a1003b4ca3fd \ | ||||||
|  |     --hash=sha256:efd3db391df53aaa5cfbee189b6c910a5b471488749fd6606c3f33fc984c2ae2 | ||||||
|  |     # via pandas | ||||||
|  | numexpr==2.11.0 \ | ||||||
|  |     --hash=sha256:096ec768bee2ef14ac757b4178e3c5f05e5f1cb6cae83b2eea9b4ba3ec1a86dd \ | ||||||
|  |     --hash=sha256:097aa8835d32d6ac52f2be543384019b4b134d1fb67998cbfc4271155edfe54a \ | ||||||
|  |     --hash=sha256:0a184e5930c77ab91dd9beee4df403b825cd9dfc4e9ba4670d31c9fcb4e2c08e \ | ||||||
|  |     --hash=sha256:0db4c2dcad09f9594b45fce794f4b903345195a8c216e252de2aa92884fd81a8 \ | ||||||
|  |     --hash=sha256:2036be213a6a1b5ce49acf60de99b911a0f9d174aab7679dde1fae315134f826 \ | ||||||
|  |     --hash=sha256:238d19465a272ada3967600fada55e4c6900485aefb42122a78dfcaf2efca65f \ | ||||||
|  |     --hash=sha256:321736cb98f090ce864b58cc5c37661cb5548e394e0fe24d5f2c7892a89070c3 \ | ||||||
|  |     --hash=sha256:4229060be866813122385c608bbd3ea48fe0b33e91f2756810d28c1cdbfc98f1 \ | ||||||
|  |     --hash=sha256:450eba3c93c3e3e8070566ad8d70590949d6e574b1c960bf68edd789811e7da8 \ | ||||||
|  |     --hash=sha256:4aba2f640d9d45b986a613ce94fcf008c42cc72eeba2990fefdb575228b1d3d1 \ | ||||||
|  |     --hash=sha256:5ff337b36db141a1a0b49f01282783744f49f0d401cc83a512fc5596eb7db5c6 \ | ||||||
|  |     --hash=sha256:6b5fdfc86cbf5373ea67d554cc6f08863825ea8e928416bed8d5285e387420c6 \ | ||||||
|  |     --hash=sha256:6e68a9800a3fa37c438b73a669f507c4973801a456a864ac56b62c3bd63d08af \ | ||||||
|  |     --hash=sha256:7163b488bfdcd13c300a8407c309e4cee195ef95d07facf5ac2678d66c988805 \ | ||||||
|  |     --hash=sha256:75b2c01a4eda2e7c357bc67a3f5c3dd76506c15b5fd4dc42845ef2e182181bad \ | ||||||
|  |     --hash=sha256:7d9e76a77c9644fbd60da3984e516ead5b84817748c2da92515cd36f1941a04d \ | ||||||
|  |     --hash=sha256:7f082321c244ff5d0e252071fb2c4fe02063a45934144a1456a5370ca139bec2 \ | ||||||
|  |     --hash=sha256:7f471fd055a9e13cf5f4337ee12379b30b4dcda1ae0d85018d4649e841578c02 \ | ||||||
|  |     --hash=sha256:7f75797bc75a2e7edf52a1c9e68a1295fa84250161c8f4e41df9e72723332c65 \ | ||||||
|  |     --hash=sha256:8c9e6b07c136d06495c792f603099039bb1e7c6c29854cc5eb3d7640268df016 \ | ||||||
|  |     --hash=sha256:a1719788a787808c15c9bb98b6ff0c97d64a0e59c1a6ebe36d4ae4d7c5c09b95 \ | ||||||
|  |     --hash=sha256:a194e3684b3553ea199c3f4837f422a521c7e2f0cce13527adc3a6b4049f9e7c \ | ||||||
|  |     --hash=sha256:a69b5c02014448a412012752dc46091902d28932c3be0c6e02e73cecceffb700 \ | ||||||
|  |     --hash=sha256:ad5cf0ebc3cdb12edb5aa50472108807ffd0a0ce95f87c0366a479fa83a7c346 \ | ||||||
|  |     --hash=sha256:b5cc434eb4a4df2fe442bcc50df114e82ff7aa234657baf873b2c9cf3f851e8e \ | ||||||
|  |     --hash=sha256:b9854fa70edbe93242b8bb4840e58d1128c45766d9a70710f05b4f67eb0feb6e \ | ||||||
|  |     --hash=sha256:d7a19435ca3d7dd502b8d8dce643555eb1b6013989e3f7577857289f6db6be16 \ | ||||||
|  |     --hash=sha256:eb766218abad05c7c3ddad5367d0ec702d6152cb4a48d9fd56a6cef6abade70c \ | ||||||
|  |     --hash=sha256:f0eb88dbac8a7e61ee433006d0ddfd6eb921f5c6c224d1b50855bc98fb304c44 \ | ||||||
|  |     --hash=sha256:f326218262c8d8537887cc4bbd613c8409d62f2cac799835c0360e0d9cefaa5c \ | ||||||
|  |     --hash=sha256:f677668ab2bb2452fee955af3702fbb3b71919e61e4520762b1e5f54af59c0d8 | ||||||
|  |     # via pandas | ||||||
|  | numpy==2.2.6 \ | ||||||
|  |     --hash=sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff \ | ||||||
|  |     --hash=sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47 \ | ||||||
|  |     --hash=sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84 \ | ||||||
|  |     --hash=sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d \ | ||||||
|  |     --hash=sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6 \ | ||||||
|  |     --hash=sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f \ | ||||||
|  |     --hash=sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b \ | ||||||
|  |     --hash=sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49 \ | ||||||
|  |     --hash=sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163 \ | ||||||
|  |     --hash=sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571 \ | ||||||
|  |     --hash=sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42 \ | ||||||
|  |     --hash=sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff \ | ||||||
|  |     --hash=sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491 \ | ||||||
|  |     --hash=sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4 \ | ||||||
|  |     --hash=sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566 \ | ||||||
|  |     --hash=sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf \ | ||||||
|  |     --hash=sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40 \ | ||||||
|  |     --hash=sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd \ | ||||||
|  |     --hash=sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06 \ | ||||||
|  |     --hash=sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282 \ | ||||||
|  |     --hash=sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680 \ | ||||||
|  |     --hash=sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db \ | ||||||
|  |     --hash=sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3 \ | ||||||
|  |     --hash=sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90 \ | ||||||
|  |     --hash=sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1 \ | ||||||
|  |     --hash=sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289 \ | ||||||
|  |     --hash=sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab \ | ||||||
|  |     --hash=sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c \ | ||||||
|  |     --hash=sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d \ | ||||||
|  |     --hash=sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb \ | ||||||
|  |     --hash=sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d \ | ||||||
|  |     --hash=sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a \ | ||||||
|  |     --hash=sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf \ | ||||||
|  |     --hash=sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1 \ | ||||||
|  |     --hash=sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2 \ | ||||||
|  |     --hash=sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a \ | ||||||
|  |     --hash=sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543 \ | ||||||
|  |     --hash=sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00 \ | ||||||
|  |     --hash=sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c \ | ||||||
|  |     --hash=sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f \ | ||||||
|  |     --hash=sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd \ | ||||||
|  |     --hash=sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868 \ | ||||||
|  |     --hash=sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303 \ | ||||||
|  |     --hash=sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83 \ | ||||||
|  |     --hash=sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3 \ | ||||||
|  |     --hash=sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d \ | ||||||
|  |     --hash=sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87 \ | ||||||
|  |     --hash=sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa \ | ||||||
|  |     --hash=sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f \ | ||||||
|  |     --hash=sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae \ | ||||||
|  |     --hash=sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda \ | ||||||
|  |     --hash=sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915 \ | ||||||
|  |     --hash=sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249 \ | ||||||
|  |     --hash=sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de \ | ||||||
|  |     --hash=sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8 | ||||||
|  |     # via | ||||||
|  |     #   bottleneck | ||||||
|  |     #   numba | ||||||
|  |     #   numexpr | ||||||
|  |     #   pandas | ||||||
|  |     #   py3langid | ||||||
|  | pandas==2.2.3 \ | ||||||
|  |     --hash=sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a \ | ||||||
|  |     --hash=sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d \ | ||||||
|  |     --hash=sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5 \ | ||||||
|  |     --hash=sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4 \ | ||||||
|  |     --hash=sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0 \ | ||||||
|  |     --hash=sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32 \ | ||||||
|  |     --hash=sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28 \ | ||||||
|  |     --hash=sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f \ | ||||||
|  |     --hash=sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348 \ | ||||||
|  |     --hash=sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18 \ | ||||||
|  |     --hash=sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468 \ | ||||||
|  |     --hash=sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5 \ | ||||||
|  |     --hash=sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667 \ | ||||||
|  |     --hash=sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645 \ | ||||||
|  |     --hash=sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13 \ | ||||||
|  |     --hash=sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3 \ | ||||||
|  |     --hash=sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d \ | ||||||
|  |     --hash=sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb \ | ||||||
|  |     --hash=sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3 \ | ||||||
|  |     --hash=sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039 \ | ||||||
|  |     --hash=sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8 \ | ||||||
|  |     --hash=sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd \ | ||||||
|  |     --hash=sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659 \ | ||||||
|  |     --hash=sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57 \ | ||||||
|  |     --hash=sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4 \ | ||||||
|  |     --hash=sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a \ | ||||||
|  |     --hash=sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9 \ | ||||||
|  |     --hash=sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42 \ | ||||||
|  |     --hash=sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2 \ | ||||||
|  |     --hash=sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc \ | ||||||
|  |     --hash=sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698 \ | ||||||
|  |     --hash=sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed \ | ||||||
|  |     --hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \ | ||||||
|  |     --hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \ | ||||||
|  |     --hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319 | ||||||
|  |     # via | ||||||
|  |     #   country-converter | ||||||
|  |     #   csv-metadata-quality | ||||||
|  | platformdirs==4.3.8 \ | ||||||
|  |     --hash=sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc \ | ||||||
|  |     --hash=sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4 | ||||||
|  |     # via requests-cache | ||||||
|  | py3langid==0.3.0 \ | ||||||
|  |     --hash=sha256:0a875a031a58aaf9dbda7bb8285fd75e801a7bd276216ffabe037901d4b449ec \ | ||||||
|  |     --hash=sha256:38f022eec31cf9a2bf6f142acb2a9b350fd7d0d5ae7762b1392c6d3567401fd3 | ||||||
|  |     # via csv-metadata-quality | ||||||
|  | pyarrow==20.0.0 \ | ||||||
|  |     --hash=sha256:00138f79ee1b5aca81e2bdedb91e3739b987245e11fa3c826f9e57c5d102fb75 \ | ||||||
|  |     --hash=sha256:15aa1b3b2587e74328a730457068dc6c89e6dcbf438d4369f572af9d320a25ee \ | ||||||
|  |     --hash=sha256:204a846dca751428991346976b914d6d2a82ae5b8316a6ed99789ebf976551e6 \ | ||||||
|  |     --hash=sha256:211d5e84cecc640c7a3ab900f930aaff5cd2702177e0d562d426fb7c4f737781 \ | ||||||
|  |     --hash=sha256:24ca380585444cb2a31324c546a9a56abbe87e26069189e14bdba19c86c049f0 \ | ||||||
|  |     --hash=sha256:2c3a01f313ffe27ac4126f4c2e5ea0f36a5fc6ab51f8726cf41fee4b256680bd \ | ||||||
|  |     --hash=sha256:30b3051b7975801c1e1d387e17c588d8ab05ced9b1e14eec57915f79869b5031 \ | ||||||
|  |     --hash=sha256:3346babb516f4b6fd790da99b98bed9708e3f02e734c84971faccb20736848dc \ | ||||||
|  |     --hash=sha256:3e1f8a47f4b4ae4c69c4d702cfbdfe4d41e18e5c7ef6f1bb1c50918c1e81c57b \ | ||||||
|  |     --hash=sha256:4250e28a22302ce8692d3a0e8ec9d9dde54ec00d237cff4dfa9c1fbf79e472a8 \ | ||||||
|  |     --hash=sha256:4680f01ecd86e0dd63e39eb5cd59ef9ff24a9d166db328679e36c108dc993d4c \ | ||||||
|  |     --hash=sha256:4a8b029a07956b8d7bd742ffca25374dd3f634b35e46cc7a7c3fa4c75b297191 \ | ||||||
|  |     --hash=sha256:4ba3cf4182828be7a896cbd232aa8dd6a31bd1f9e32776cc3796c012855e1199 \ | ||||||
|  |     --hash=sha256:5605919fbe67a7948c1f03b9f3727d82846c053cd2ce9303ace791855923fd20 \ | ||||||
|  |     --hash=sha256:5f0fb1041267e9968c6d0d2ce3ff92e3928b243e2b6d11eeb84d9ac547308232 \ | ||||||
|  |     --hash=sha256:6102b4864d77102dbbb72965618e204e550135a940c2534711d5ffa787df2a5a \ | ||||||
|  |     --hash=sha256:6415a0d0174487456ddc9beaead703d0ded5966129fa4fd3114d76b5d1c5ceae \ | ||||||
|  |     --hash=sha256:6bb830757103a6cb300a04610e08d9636f0cd223d32f388418ea893a3e655f1c \ | ||||||
|  |     --hash=sha256:75a51a5b0eef32727a247707d4755322cb970be7e935172b6a3a9f9ae98404ba \ | ||||||
|  |     --hash=sha256:7a3a5dcf54286e6141d5114522cf31dd67a9e7c9133d150799f30ee302a7a1ab \ | ||||||
|  |     --hash=sha256:7f4c8534e2ff059765647aa69b75d6543f9fef59e2cd4c6d18015192565d2b70 \ | ||||||
|  |     --hash=sha256:82f1ee5133bd8f49d31be1299dc07f585136679666b502540db854968576faf9 \ | ||||||
|  |     --hash=sha256:89e030dc58fc760e4010148e6ff164d2f44441490280ef1e97a542375e41058e \ | ||||||
|  |     --hash=sha256:95b330059ddfdc591a3225f2d272123be26c8fa76e8c9ee1a77aad507361cfdb \ | ||||||
|  |     --hash=sha256:96d6a0a37d9c98be08f5ed6a10831d88d52cac7b13f5287f1e0f625a0de8062b \ | ||||||
|  |     --hash=sha256:96e37f0766ecb4514a899d9a3554fadda770fb57ddf42b63d80f14bc20aa7db3 \ | ||||||
|  |     --hash=sha256:97c8dc984ed09cb07d618d57d8d4b67a5100a30c3818c2fb0b04599f0da2de7b \ | ||||||
|  |     --hash=sha256:991f85b48a8a5e839b2128590ce07611fae48a904cae6cab1f089c5955b57eb5 \ | ||||||
|  |     --hash=sha256:9b71daf534f4745818f96c214dbc1e6124d7daf059167330b610fc69b6f3d3e3 \ | ||||||
|  |     --hash=sha256:a15532e77b94c61efadde86d10957950392999503b3616b2ffcef7621a002893 \ | ||||||
|  |     --hash=sha256:a1f60dc14658efaa927f8214734f6a01a806d7690be4b3232ba526836d216122 \ | ||||||
|  |     --hash=sha256:a2791f69ad72addd33510fec7bb14ee06c2a448e06b649e264c094c5b5f7ce28 \ | ||||||
|  |     --hash=sha256:a5704f29a74b81673d266e5ec1fe376f060627c2e42c5c7651288ed4b0db29e9 \ | ||||||
|  |     --hash=sha256:a6ad3e7758ecf559900261a4df985662df54fb7fdb55e8e3b3aa99b23d526b62 \ | ||||||
|  |     --hash=sha256:aa0d288143a8585806e3cc7c39566407aab646fb9ece164609dac1cfff45f6ae \ | ||||||
|  |     --hash=sha256:b6953f0114f8d6f3d905d98e987d0924dabce59c3cda380bdfaa25a6201563b4 \ | ||||||
|  |     --hash=sha256:b8ff87cc837601532cc8242d2f7e09b4e02404de1b797aee747dd4ba4bd6313f \ | ||||||
|  |     --hash=sha256:c7dd06fd7d7b410ca5dc839cc9d485d2bc4ae5240851bcd45d85105cc90a47d7 \ | ||||||
|  |     --hash=sha256:ca151afa4f9b7bc45bcc791eb9a89e90a9eb2772767d0b1e5389609c7d03db63 \ | ||||||
|  |     --hash=sha256:d5382de8dc34c943249b01c19110783d0d64b207167c728461add1ecc2db88e4 \ | ||||||
|  |     --hash=sha256:dd43f58037443af715f34f1322c782ec463a3c8a94a85fdb2d987ceb5658e061 \ | ||||||
|  |     --hash=sha256:e724a3fd23ae5b9c010e7be857f4405ed5e679db5c93e66204db1a69f733936a \ | ||||||
|  |     --hash=sha256:e8b88758f9303fa5a83d6c90e176714b2fd3852e776fc2d7e42a22dd6c2fb368 \ | ||||||
|  |     --hash=sha256:f2d67ac28f57a362f1a2c1e6fa98bfe2f03230f7e15927aecd067433b1e70ce8 \ | ||||||
|  |     --hash=sha256:f3b117b922af5e4c6b9a9115825726cac7d8b1421c37c2b5e24fbacc8930612c \ | ||||||
|  |     --hash=sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1 | ||||||
|  |     # via pandas | ||||||
|  | pycountry==24.6.1 \ | ||||||
|  |     --hash=sha256:b61b3faccea67f87d10c1f2b0fc0be714409e8fcdcc1315613174f6466c10221 \ | ||||||
|  |     --hash=sha256:f1a4fb391cd7214f8eefd39556d740adcc233c778a27f8942c8dca351d6ce06f | ||||||
|  |     # via csv-metadata-quality | ||||||
|  | python-dateutil==2.9.0.post0 \ | ||||||
|  |     --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ | ||||||
|  |     --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 | ||||||
|  |     # via pandas | ||||||
|  | python-stdnum==1.20 \ | ||||||
|  |     --hash=sha256:111008e10391d54fb2afad2a10df70d5cb0c6c0a7ec82fec6f022cb8712961d3 \ | ||||||
|  |     --hash=sha256:ad2a2cf2eb025de408210235f36b4ae31252de3186240ccaa8126e117cb82690 | ||||||
|  |     # via csv-metadata-quality | ||||||
|  | pytz==2025.2 \ | ||||||
|  |     --hash=sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3 \ | ||||||
|  |     --hash=sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00 | ||||||
|  |     # via pandas | ||||||
|  | requests==2.32.4 \ | ||||||
|  |     --hash=sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c \ | ||||||
|  |     --hash=sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422 | ||||||
|  |     # via | ||||||
|  |     #   csv-metadata-quality | ||||||
|  |     #   requests-cache | ||||||
|  | requests-cache==1.2.1 \ | ||||||
|  |     --hash=sha256:1285151cddf5331067baa82598afe2d47c7495a1334bfe7a7d329b43e9fd3603 \ | ||||||
|  |     --hash=sha256:68abc986fdc5b8d0911318fbb5f7c80eebcd4d01bfacc6685ecf8876052511d1 | ||||||
|  |     # via csv-metadata-quality | ||||||
|  | six==1.17.0 \ | ||||||
|  |     --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ | ||||||
|  |     --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 | ||||||
|  |     # via python-dateutil | ||||||
|  | typing-extensions==4.14.0 \ | ||||||
|  |     --hash=sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4 \ | ||||||
|  |     --hash=sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af | ||||||
|  |     # via | ||||||
|  |     #   cattrs | ||||||
|  |     #   exceptiongroup | ||||||
|  | tzdata==2025.2 \ | ||||||
|  |     --hash=sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8 \ | ||||||
|  |     --hash=sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9 | ||||||
|  |     # via pandas | ||||||
|  | url-normalize==2.2.1 \ | ||||||
|  |     --hash=sha256:3deb687587dc91f7b25c9ae5162ffc0f057ae85d22b1e15cf5698311247f567b \ | ||||||
|  |     --hash=sha256:74a540a3b6eba1d95bdc610c24f2c0141639f3ba903501e61a52a8730247ff37 | ||||||
|  |     # via requests-cache | ||||||
|  | urllib3==2.5.0 \ | ||||||
|  |     --hash=sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760 \ | ||||||
|  |     --hash=sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc | ||||||
|  |     # via | ||||||
|  |     #   requests | ||||||
|  |     #   requests-cache | ||||||
|  | wcwidth==0.2.13 \ | ||||||
|  |     --hash=sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859 \ | ||||||
|  |     --hash=sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5 | ||||||
|  |     # via ftfy | ||||||
|   | |||||||
							
								
								
									
										36
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										36
									
								
								setup.py
									
									
									
									
									
								
							| @@ -1,36 +0,0 @@ | |||||||
| import setuptools |  | ||||||
|  |  | ||||||
| with open("README.md", "r") as fh: |  | ||||||
|     long_description = fh.read() |  | ||||||
|  |  | ||||||
| install_requires = [ |  | ||||||
|     "pandas", |  | ||||||
|     "python-stdnum", |  | ||||||
|     "requests", |  | ||||||
|     "requests-cache", |  | ||||||
|     "pycountry", |  | ||||||
|     "langid", |  | ||||||
| ] |  | ||||||
|  |  | ||||||
| setuptools.setup( |  | ||||||
|     name="csv-metadata-quality", |  | ||||||
|     version="0.6.1", |  | ||||||
|     author="Alan Orth", |  | ||||||
|     author_email="aorth@mjanja.ch", |  | ||||||
|     description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.", |  | ||||||
|     license="GPLv3", |  | ||||||
|     long_description=long_description, |  | ||||||
|     long_description_content_type="text/markdown", |  | ||||||
|     url="https://github.com/alanorth/csv-metadata-quality", |  | ||||||
|     classifiers=[ |  | ||||||
|         "Programming Language :: Python :: 3.9", |  | ||||||
|         "Programming Language :: Python :: 3.10", |  | ||||||
|         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", |  | ||||||
|         "Operating System :: OS Independent", |  | ||||||
|     ], |  | ||||||
|     packages=["csv_metadata_quality"], |  | ||||||
|     entry_points={ |  | ||||||
|         "console_scripts": ["csv-metadata-quality = csv_metadata_quality.__main__:main"] |  | ||||||
|     }, |  | ||||||
|     install_requires=install_requires, |  | ||||||
| ) |  | ||||||
| @@ -1,11 +1,14 @@ | |||||||
| # SPDX-License-Identifier: GPL-3.0-only | # SPDX-License-Identifier: GPL-3.0-only | ||||||
| 
 | 
 | ||||||
| import argparse | import argparse | ||||||
|  | import os | ||||||
| import re | import re | ||||||
| import signal | import signal | ||||||
| import sys | import sys | ||||||
|  | from datetime import timedelta | ||||||
| 
 | 
 | ||||||
| import pandas as pd | import pandas as pd | ||||||
|  | import requests_cache | ||||||
| from colorama import Fore | from colorama import Fore | ||||||
| 
 | 
 | ||||||
| import csv_metadata_quality.check as check | import csv_metadata_quality.check as check | ||||||
| @@ -73,7 +76,8 @@ def run(argv): | |||||||
|     # set the signal handler for SIGINT (^C) |     # set the signal handler for SIGINT (^C) | ||||||
|     signal.signal(signal.SIGINT, signal_handler) |     signal.signal(signal.SIGINT, signal_handler) | ||||||
| 
 | 
 | ||||||
|     df = pd.read_csv(args.input_file, dtype_backend="pyarrow") |     # Read all fields as strings so dates don't get converted from 1998 to 1998.0 | ||||||
|  |     df = pd.read_csv(args.input_file, dtype_backend="pyarrow", dtype="str") | ||||||
| 
 | 
 | ||||||
|     # Check if the user requested to skip any fields |     # Check if the user requested to skip any fields | ||||||
|     if args.exclude_fields: |     if args.exclude_fields: | ||||||
| @@ -81,7 +85,20 @@ def run(argv): | |||||||
|         # user should be careful to no include spaces here. |         # user should be careful to no include spaces here. | ||||||
|         exclude = args.exclude_fields.split(",") |         exclude = args.exclude_fields.split(",") | ||||||
|     else: |     else: | ||||||
|         exclude = list() |         exclude = [] | ||||||
|  | 
 | ||||||
|  |     # enable transparent request cache with thirty days expiry | ||||||
|  |     expire_after = timedelta(days=30) | ||||||
|  |     # Allow overriding the location of the requests cache, just in case we are | ||||||
|  |     # running in an environment where we can't write to the current working di- | ||||||
|  |     # rectory (for example from csv-metadata-quality-web). | ||||||
|  |     REQUESTS_CACHE_DIR = os.environ.get("REQUESTS_CACHE_DIR", ".") | ||||||
|  |     requests_cache.install_cache( | ||||||
|  |         f"{REQUESTS_CACHE_DIR}/agrovoc-response-cache", expire_after=expire_after | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     # prune old cache entries | ||||||
|  |     requests_cache.delete() | ||||||
| 
 | 
 | ||||||
|     for column in df.columns: |     for column in df.columns: | ||||||
|         if column in exclude: |         if column in exclude: | ||||||
| @@ -90,7 +107,9 @@ def run(argv): | |||||||
|             continue |             continue | ||||||
| 
 | 
 | ||||||
|         if args.unsafe_fixes: |         if args.unsafe_fixes: | ||||||
|             match = re.match(r"^.*?abstract.*$", column) |             # Skip whitespace and newline fixes on abstracts and descriptions | ||||||
|  |             # because there are too many with legitimate multi-line metadata. | ||||||
|  |             match = re.match(r"^.*?(abstract|description).*$", column) | ||||||
|             if match is None: |             if match is None: | ||||||
|                 # Fix: whitespace |                 # Fix: whitespace | ||||||
|                 df[column] = df[column].apply(fix.whitespace, field_name=column) |                 df[column] = df[column].apply(fix.whitespace, field_name=column) | ||||||
| @@ -122,6 +141,11 @@ def run(argv): | |||||||
|         # Fix: unnecessary Unicode |         # Fix: unnecessary Unicode | ||||||
|         df[column] = df[column].apply(fix.unnecessary_unicode) |         df[column] = df[column].apply(fix.unnecessary_unicode) | ||||||
| 
 | 
 | ||||||
|  |         # Fix: normalize DOIs | ||||||
|  |         match = re.match(r"^.*?identifier\.doi.*$", column) | ||||||
|  |         if match is not None: | ||||||
|  |             df[column] = df[column].apply(fix.normalize_dois) | ||||||
|  | 
 | ||||||
|         # Fix: invalid and unnecessary multi-value separators. Skip the title |         # Fix: invalid and unnecessary multi-value separators. Skip the title | ||||||
|         # and abstract fields because "|" is used to indicate something like |         # and abstract fields because "|" is used to indicate something like | ||||||
|         # a subtitle. |         # a subtitle. | ||||||
| @@ -1,14 +1,12 @@ | |||||||
| # SPDX-License-Identifier: GPL-3.0-only | # SPDX-License-Identifier: GPL-3.0-only | ||||||
| 
 | 
 | ||||||
| import logging | import logging | ||||||
| import os |  | ||||||
| import re | import re | ||||||
| from datetime import datetime, timedelta | from datetime import datetime | ||||||
| 
 | 
 | ||||||
| import country_converter as coco | import country_converter as coco | ||||||
| import pandas as pd | import pandas as pd | ||||||
| import requests | import requests | ||||||
| import requests_cache |  | ||||||
| from colorama import Fore | from colorama import Fore | ||||||
| from pycountry import languages | from pycountry import languages | ||||||
| from stdnum import isbn as stdnum_isbn | from stdnum import isbn as stdnum_isbn | ||||||
| @@ -135,7 +133,7 @@ def suspicious_characters(field, field_name): | |||||||
|         return |         return | ||||||
| 
 | 
 | ||||||
|     # List of suspicious characters, for example:  ́ˆ~` |     # List of suspicious characters, for example:  ́ˆ~` | ||||||
|     suspicious_characters = ["\u00B4", "\u02C6", "\u007E", "\u0060"] |     suspicious_characters = ["\u00b4", "\u02c6", "\u007e", "\u0060"] | ||||||
| 
 | 
 | ||||||
|     for character in suspicious_characters: |     for character in suspicious_characters: | ||||||
|         # Find the position of the suspicious character in the string |         # Find the position of the suspicious character in the string | ||||||
| @@ -203,25 +201,12 @@ def agrovoc(field, field_name, drop): | |||||||
|     if pd.isna(field): |     if pd.isna(field): | ||||||
|         return |         return | ||||||
| 
 | 
 | ||||||
|     # enable transparent request cache with thirty days expiry |  | ||||||
|     expire_after = timedelta(days=30) |  | ||||||
|     # Allow overriding the location of the requests cache, just in case we are |  | ||||||
|     # running in an environment where we can't write to the current working di- |  | ||||||
|     # rectory (for example from csv-metadata-quality-web). |  | ||||||
|     REQUESTS_CACHE_DIR = os.environ.get("REQUESTS_CACHE_DIR", ".") |  | ||||||
|     requests_cache.install_cache( |  | ||||||
|         f"{REQUESTS_CACHE_DIR}/agrovoc-response-cache", expire_after=expire_after |  | ||||||
|     ) |  | ||||||
| 
 |  | ||||||
|     # prune old cache entries |  | ||||||
|     # requests_cache.remove_expired_responses() |  | ||||||
| 
 |  | ||||||
|     # Initialize an empty list to hold the validated AGROVOC values |     # Initialize an empty list to hold the validated AGROVOC values | ||||||
|     values = list() |     values = [] | ||||||
| 
 | 
 | ||||||
|     # Try to split multi-value field on "||" separator |     # Try to split multi-value field on "||" separator | ||||||
|     for value in field.split("||"): |     for value in field.split("||"): | ||||||
|         request_url = "http://agrovoc.uniroma2.it/agrovoc/rest/v1/agrovoc/search" |         request_url = "https://agrovoc.fao.org/browse/rest/v1/search" | ||||||
|         request_params = {"query": value} |         request_params = {"query": value} | ||||||
| 
 | 
 | ||||||
|         request = requests.get(request_url, params=request_params) |         request = requests.get(request_url, params=request_params) | ||||||
| @@ -373,7 +358,7 @@ def duplicate_items(df): | |||||||
| 
 | 
 | ||||||
|     if items_count_unique < items_count_total: |     if items_count_unique < items_count_total: | ||||||
|         # Create a list to hold our items while we check for duplicates |         # Create a list to hold our items while we check for duplicates | ||||||
|         items = list() |         items = [] | ||||||
| 
 | 
 | ||||||
|         for index, row in df.iterrows(): |         for index, row in df.iterrows(): | ||||||
|             item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}" |             item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}" | ||||||
| @@ -554,7 +539,7 @@ def countries_match_regions(row, exclude): | |||||||
|         if row[region_column_name] is not None: |         if row[region_column_name] is not None: | ||||||
|             regions = row[region_column_name].split("||") |             regions = row[region_column_name].split("||") | ||||||
|         else: |         else: | ||||||
|             regions = list() |             regions = [] | ||||||
| 
 | 
 | ||||||
|         for country in countries: |         for country in countries: | ||||||
|             # Look up the UN M.49 regions for this country code. CoCo seems to |             # Look up the UN M.49 regions for this country code. CoCo seems to | ||||||
| @@ -563,8 +548,13 @@ def countries_match_regions(row, exclude): | |||||||
|             un_region = cc.convert(names=country, to="UNRegion") |             un_region = cc.convert(names=country, to="UNRegion") | ||||||
| 
 | 
 | ||||||
|             if un_region != "not found" and un_region not in regions: |             if un_region != "not found" and un_region not in regions: | ||||||
|                 print( |                 try: | ||||||
|                     f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}{row[title_column_name]}" |                     print( | ||||||
|                 ) |                         f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}{row[title_column_name]}" | ||||||
|  |                     ) | ||||||
|  |                 except KeyError: | ||||||
|  |                     print( | ||||||
|  |                         f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}<title field not present>" | ||||||
|  |                     ) | ||||||
| 
 | 
 | ||||||
|     return |     return | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -2,8 +2,8 @@ | |||||||
| 
 | 
 | ||||||
| import re | import re | ||||||
| 
 | 
 | ||||||
| import langid |  | ||||||
| import pandas as pd | import pandas as pd | ||||||
|  | import py3langid as langid | ||||||
| from colorama import Fore | from colorama import Fore | ||||||
| from pycountry import languages | from pycountry import languages | ||||||
| 
 | 
 | ||||||
| @@ -20,7 +20,7 @@ def correct_language(row, exclude): | |||||||
|     # Initialize some variables at global scope so that we can set them in the |     # Initialize some variables at global scope so that we can set them in the | ||||||
|     # loop scope below and still be able to access them afterwards. |     # loop scope below and still be able to access them afterwards. | ||||||
|     language = "" |     language = "" | ||||||
|     sample_strings = list() |     sample_strings = [] | ||||||
|     title = None |     title = None | ||||||
| 
 | 
 | ||||||
|     # Iterate over the labels of the current row's values. Before we transposed |     # Iterate over the labels of the current row's values. Before we transposed | ||||||
| @@ -23,7 +23,7 @@ def whitespace(field, field_name): | |||||||
|         return |         return | ||||||
| 
 | 
 | ||||||
|     # Initialize an empty list to hold the cleaned values |     # Initialize an empty list to hold the cleaned values | ||||||
|     values = list() |     values = [] | ||||||
| 
 | 
 | ||||||
|     # Try to split multi-value field on "||" separator |     # Try to split multi-value field on "||" separator | ||||||
|     for value in field.split("||"): |     for value in field.split("||"): | ||||||
| @@ -64,7 +64,7 @@ def separators(field, field_name): | |||||||
|         return |         return | ||||||
| 
 | 
 | ||||||
|     # Initialize an empty list to hold the cleaned values |     # Initialize an empty list to hold the cleaned values | ||||||
|     values = list() |     values = [] | ||||||
| 
 | 
 | ||||||
|     # Try to split multi-value field on "||" separator |     # Try to split multi-value field on "||" separator | ||||||
|     for value in field.split("||"): |     for value in field.split("||"): | ||||||
| @@ -175,7 +175,7 @@ def duplicates(field, field_name): | |||||||
|     values = field.split("||") |     values = field.split("||") | ||||||
| 
 | 
 | ||||||
|     # Initialize an empty list to hold the de-duplicated values |     # Initialize an empty list to hold the de-duplicated values | ||||||
|     new_values = list() |     new_values = [] | ||||||
| 
 | 
 | ||||||
|     # Iterate over all values |     # Iterate over all values | ||||||
|     for value in values: |     for value in values: | ||||||
| @@ -355,10 +355,10 @@ def countries_match_regions(row, exclude): | |||||||
|         if row[region_column_name] is not None: |         if row[region_column_name] is not None: | ||||||
|             regions = row[region_column_name].split("||") |             regions = row[region_column_name].split("||") | ||||||
|         else: |         else: | ||||||
|             regions = list() |             regions = [] | ||||||
| 
 | 
 | ||||||
|         # An empty list for our regions so we can keep track for all countries |         # An empty list for our regions so we can keep track for all countries | ||||||
|         missing_regions = list() |         missing_regions = [] | ||||||
| 
 | 
 | ||||||
|         for country in countries: |         for country in countries: | ||||||
|             # Look up the UN M.49 regions for this country code. CoCo seems to |             # Look up the UN M.49 regions for this country code. CoCo seems to | ||||||
| @@ -370,9 +370,17 @@ def countries_match_regions(row, exclude): | |||||||
|             # it doesn't already exist in regions. |             # it doesn't already exist in regions. | ||||||
|             if un_region != "not found" and un_region not in regions: |             if un_region != "not found" and un_region not in regions: | ||||||
|                 if un_region not in missing_regions: |                 if un_region not in missing_regions: | ||||||
|                     print( |                     try: | ||||||
|                         f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}{row[title_column_name]}" |                         print( | ||||||
|                     ) |                             f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}{row[title_column_name]}" | ||||||
|  |                         ) | ||||||
|  |                     except KeyError: | ||||||
|  |                         # If there is no title column in the CSV we will print | ||||||
|  |                         # the fix without the title instead of crashing. | ||||||
|  |                         print( | ||||||
|  |                             f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}<title field not present>" | ||||||
|  |                         ) | ||||||
|  | 
 | ||||||
|                     missing_regions.append(un_region) |                     missing_regions.append(un_region) | ||||||
| 
 | 
 | ||||||
|         if len(missing_regions) > 0: |         if len(missing_regions) > 0: | ||||||
| @@ -387,3 +395,88 @@ def countries_match_regions(row, exclude): | |||||||
|                 row[region_column_name] = "||".join(missing_regions) |                 row[region_column_name] = "||".join(missing_regions) | ||||||
| 
 | 
 | ||||||
|     return row |     return row | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def normalize_dois(field): | ||||||
|  |     """Normalize DOIs. | ||||||
|  | 
 | ||||||
|  |     DOIs are meant to be globally unique identifiers. They are case insensitive, | ||||||
|  |     but in order to compare them robustly they should be normalized to a common | ||||||
|  |     format: | ||||||
|  | 
 | ||||||
|  |         - strip leading and trailing whitespace | ||||||
|  |         - lowercase all ASCII characters | ||||||
|  |         - convert all variations to https://doi.org/10.xxxx/xxxx URI format | ||||||
|  | 
 | ||||||
|  |     Return string with normalized DOI. | ||||||
|  | 
 | ||||||
|  |     See: https://www.crossref.org/documentation/member-setup/constructing-your-dois/ | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     # Skip fields with missing values | ||||||
|  |     if pd.isna(field): | ||||||
|  |         return | ||||||
|  | 
 | ||||||
|  |     # Try to split multi-value field on "||" separator | ||||||
|  |     values = field.split("||") | ||||||
|  | 
 | ||||||
|  |     # Initialize an empty list to hold the de-duplicated values | ||||||
|  |     new_values = [] | ||||||
|  | 
 | ||||||
|  |     # Iterate over all values (most items will only have one DOI) | ||||||
|  |     for value in values: | ||||||
|  |         # Strip leading and trailing whitespace | ||||||
|  |         new_value = value.strip() | ||||||
|  | 
 | ||||||
|  |         new_value = new_value.lower() | ||||||
|  | 
 | ||||||
|  |         # Convert to HTTPS | ||||||
|  |         pattern = re.compile(r"^http://") | ||||||
|  |         match = re.findall(pattern, new_value) | ||||||
|  | 
 | ||||||
|  |         if match: | ||||||
|  |             new_value = re.sub(pattern, "https://", new_value) | ||||||
|  | 
 | ||||||
|  |         # Convert dx.doi.org to doi.org | ||||||
|  |         pattern = re.compile(r"dx\.doi\.org") | ||||||
|  |         match = re.findall(pattern, new_value) | ||||||
|  | 
 | ||||||
|  |         if match: | ||||||
|  |             new_value = re.sub(pattern, "doi.org", new_value) | ||||||
|  | 
 | ||||||
|  |         # Convert www.doi.org to doi.org | ||||||
|  |         pattern = re.compile(r"www\.doi\.org") | ||||||
|  |         match = re.findall(pattern, new_value) | ||||||
|  | 
 | ||||||
|  |         if match: | ||||||
|  |             new_value = re.sub(pattern, "doi.org", new_value) | ||||||
|  | 
 | ||||||
|  |         # Convert erroneous %2f to / | ||||||
|  |         pattern = re.compile("%2f") | ||||||
|  |         match = re.findall(pattern, new_value) | ||||||
|  | 
 | ||||||
|  |         if match: | ||||||
|  |             new_value = re.sub(pattern, "/", new_value) | ||||||
|  | 
 | ||||||
|  |         # Replace values like doi: 10.11648/j.jps.20140201.14 | ||||||
|  |         pattern = re.compile(r"^doi: 10\.") | ||||||
|  |         match = re.findall(pattern, new_value) | ||||||
|  | 
 | ||||||
|  |         if match: | ||||||
|  |             new_value = re.sub(pattern, "https://doi.org/10.", new_value) | ||||||
|  | 
 | ||||||
|  |         # Replace values like 10.3390/foods12010115 | ||||||
|  |         pattern = re.compile(r"^10\.") | ||||||
|  |         match = re.findall(pattern, new_value) | ||||||
|  | 
 | ||||||
|  |         if match: | ||||||
|  |             new_value = re.sub(pattern, "https://doi.org/10.", new_value) | ||||||
|  | 
 | ||||||
|  |         if new_value != value: | ||||||
|  |             print(f"{Fore.GREEN}Normalized DOI: {Fore.RESET}{value}") | ||||||
|  | 
 | ||||||
|  |         new_values.append(new_value) | ||||||
|  | 
 | ||||||
|  |     new_field = "||".join(new_values) | ||||||
|  | 
 | ||||||
|  |     return new_field | ||||||
| @@ -2,7 +2,7 @@ | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| import json | import json | ||||||
| from importlib.resources import files | import os | ||||||
| 
 | 
 | ||||||
| from ftfy.badness import is_bad | from ftfy.badness import is_bad | ||||||
| 
 | 
 | ||||||
| @@ -58,7 +58,7 @@ def is_mojibake(field): | |||||||
| def load_spdx_licenses(): | def load_spdx_licenses(): | ||||||
|     """Returns a Python list of SPDX short license identifiers.""" |     """Returns a Python list of SPDX short license identifiers.""" | ||||||
| 
 | 
 | ||||||
|     with open(files("csv_metadata_quality").joinpath("data/licenses.json")) as f: |     with open(os.path.join(os.path.dirname(__file__), "data/licenses.json")) as f: | ||||||
|         licenses = json.load(f) |         licenses = json.load(f) | ||||||
| 
 | 
 | ||||||
|     # List comprehension to extract the license ID for each license |     # List comprehension to extract the license ID for each license | ||||||
| @@ -1,3 +1,3 @@ | |||||||
| # SPDX-License-Identifier: GPL-3.0-only | # SPDX-License-Identifier: GPL-3.0-only | ||||||
| 
 | 
 | ||||||
| VERSION = "0.6.1" | VERSION = "0.7.0" | ||||||
| @@ -257,7 +257,7 @@ def test_check_incorrect_iso_639_1_language(capsys): | |||||||
|  |  | ||||||
|     title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" |     title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" | ||||||
|     language = "es" |     language = "es" | ||||||
|     exclude = list() |     exclude = [] | ||||||
|  |  | ||||||
|     # Create a dictionary to mimic Pandas series |     # Create a dictionary to mimic Pandas series | ||||||
|     row = {"dc.title": title, "dc.language.iso": language} |     row = {"dc.title": title, "dc.language.iso": language} | ||||||
| @@ -277,7 +277,7 @@ def test_check_incorrect_iso_639_3_language(capsys): | |||||||
|  |  | ||||||
|     title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" |     title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" | ||||||
|     language = "spa" |     language = "spa" | ||||||
|     exclude = list() |     exclude = [] | ||||||
|  |  | ||||||
|     # Create a dictionary to mimic Pandas series |     # Create a dictionary to mimic Pandas series | ||||||
|     row = {"dc.title": title, "dc.language.iso": language} |     row = {"dc.title": title, "dc.language.iso": language} | ||||||
| @@ -297,7 +297,7 @@ def test_check_correct_iso_639_1_language(): | |||||||
|  |  | ||||||
|     title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" |     title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" | ||||||
|     language = "en" |     language = "en" | ||||||
|     exclude = list() |     exclude = [] | ||||||
|  |  | ||||||
|     # Create a dictionary to mimic Pandas series |     # Create a dictionary to mimic Pandas series | ||||||
|     row = {"dc.title": title, "dc.language.iso": language} |     row = {"dc.title": title, "dc.language.iso": language} | ||||||
| @@ -313,7 +313,7 @@ def test_check_correct_iso_639_3_language(): | |||||||
|  |  | ||||||
|     title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" |     title = "A randomised vaccine field trial in Kenya demonstrates protection against wildebeest-associated malignant catarrhal fever in cattle" | ||||||
|     language = "eng" |     language = "eng" | ||||||
|     exclude = list() |     exclude = [] | ||||||
|  |  | ||||||
|     # Create a dictionary to mimic Pandas series |     # Create a dictionary to mimic Pandas series | ||||||
|     row = {"dc.title": title, "dc.language.iso": language} |     row = {"dc.title": title, "dc.language.iso": language} | ||||||
| @@ -407,7 +407,7 @@ def test_check_doi_field(): | |||||||
|     # the citation and a DOI field. |     # the citation and a DOI field. | ||||||
|     d = {"cg.identifier.doi": doi, "dcterms.bibliographicCitation": citation} |     d = {"cg.identifier.doi": doi, "dcterms.bibliographicCitation": citation} | ||||||
|     series = pd.Series(data=d) |     series = pd.Series(data=d) | ||||||
|     exclude = list() |     exclude = [] | ||||||
|  |  | ||||||
|     result = check.citation_doi(series, exclude) |     result = check.citation_doi(series, exclude) | ||||||
|  |  | ||||||
| @@ -418,7 +418,7 @@ def test_check_doi_only_in_citation(capsys): | |||||||
|     """Test an item with a DOI in its citation, but no DOI field.""" |     """Test an item with a DOI in its citation, but no DOI field.""" | ||||||
|  |  | ||||||
|     citation = "Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218" |     citation = "Orth, A. 2021. Testing all the things. doi: 10.1186/1743-422X-9-218" | ||||||
|     exclude = list() |     exclude = [] | ||||||
|  |  | ||||||
|     # Emulate a column in a transposed dataframe (which is just a series), with |     # Emulate a column in a transposed dataframe (which is just a series), with | ||||||
|     # an empty DOI field and a citation containing a DOI. |     # an empty DOI field and a citation containing a DOI. | ||||||
| @@ -439,7 +439,7 @@ def test_title_in_citation(): | |||||||
|  |  | ||||||
|     title = "Testing all the things" |     title = "Testing all the things" | ||||||
|     citation = "Orth, A. 2021. Testing all the things." |     citation = "Orth, A. 2021. Testing all the things." | ||||||
|     exclude = list() |     exclude = [] | ||||||
|  |  | ||||||
|     # Emulate a column in a transposed dataframe (which is just a series), with |     # Emulate a column in a transposed dataframe (which is just a series), with | ||||||
|     # the title and citation. |     # the title and citation. | ||||||
| @@ -456,7 +456,7 @@ def test_title_not_in_citation(capsys): | |||||||
|  |  | ||||||
|     title = "Testing all the things" |     title = "Testing all the things" | ||||||
|     citation = "Orth, A. 2021. Testing all teh things." |     citation = "Orth, A. 2021. Testing all teh things." | ||||||
|     exclude = list() |     exclude = [] | ||||||
|  |  | ||||||
|     # Emulate a column in a transposed dataframe (which is just a series), with |     # Emulate a column in a transposed dataframe (which is just a series), with | ||||||
|     # the title and citation. |     # the title and citation. | ||||||
| @@ -477,7 +477,7 @@ def test_country_matches_region(): | |||||||
|  |  | ||||||
|     country = "Kenya" |     country = "Kenya" | ||||||
|     region = "Eastern Africa" |     region = "Eastern Africa" | ||||||
|     exclude = list() |     exclude = [] | ||||||
|  |  | ||||||
|     # Emulate a column in a transposed dataframe (which is just a series) |     # Emulate a column in a transposed dataframe (which is just a series) | ||||||
|     d = {"cg.coverage.country": country, "cg.coverage.region": region} |     d = {"cg.coverage.country": country, "cg.coverage.region": region} | ||||||
| @@ -495,7 +495,7 @@ def test_country_not_matching_region(capsys): | |||||||
|     country = "Kenya" |     country = "Kenya" | ||||||
|     region = "" |     region = "" | ||||||
|     missing_region = "Eastern Africa" |     missing_region = "Eastern Africa" | ||||||
|     exclude = list() |     exclude = [] | ||||||
|  |  | ||||||
|     # Emulate a column in a transposed dataframe (which is just a series) |     # Emulate a column in a transposed dataframe (which is just a series) | ||||||
|     d = { |     d = { | ||||||
|   | |||||||
| @@ -131,7 +131,7 @@ def test_fix_country_not_matching_region(): | |||||||
|     country = "Kenya" |     country = "Kenya" | ||||||
|     region = "" |     region = "" | ||||||
|     missing_region = "Eastern Africa" |     missing_region = "Eastern Africa" | ||||||
|     exclude = list() |     exclude = [] | ||||||
|  |  | ||||||
|     # Emulate a column in a transposed dataframe (which is just a series) |     # Emulate a column in a transposed dataframe (which is just a series) | ||||||
|     d = { |     d = { | ||||||
| @@ -152,3 +152,11 @@ def test_fix_country_not_matching_region(): | |||||||
|     series_correct = pd.Series(data=d_correct) |     series_correct = pd.Series(data=d_correct) | ||||||
|  |  | ||||||
|     pd.testing.assert_series_equal(result, series_correct) |     pd.testing.assert_series_equal(result, series_correct) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def test_fix_normalize_dois(): | ||||||
|  |     """Test normalizing a DOI.""" | ||||||
|  |  | ||||||
|  |     value = "doi: 10.11648/j.jps.20140201.14" | ||||||
|  |  | ||||||
|  |     assert fix.normalize_dois(value) == "https://doi.org/10.11648/j.jps.20140201.14" | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user