mirror of
				https://github.com/ilri/csv-metadata-quality.git
				synced 2025-11-04 06:29:08 +01:00 
			
		
		
		
	Compare commits
	
		
			73 Commits
		
	
	
		
			v0.6.1
			...
			ba4637ea34
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| ba4637ea34 | |||
| 355428a691 | |||
| 
						 | 
					58d4de973e | ||
| e1216dae3c | |||
| 
						 | 
					6b650ff1b3 | ||
| fa7bde6fc0 | |||
| 
						 | 
					f89159fe32 | ||
| 
						 | 
					02058c5a65 | ||
| 8fed6b71ff | |||
| b005b28cbe | |||
| 
						 | 
					c626290599 | ||
| 
						 | 
					1a06470b64 | ||
| d46a81672e | |||
| 2a50e75082 | |||
| 0d45e73983 | |||
| 
						 | 
					3611aab425 | ||
| 
						 | 
					5c4ad0eb41 | ||
| 
						 | 
					f1f39722f6 | ||
| 1c03999582 | |||
| 
						
						
							
						
						1f637f32cd
	
				 | 
					
					
						|||
| 
						
						
							
						
						b8241e919d
	
				 | 
					
					
						|||
| 
						
						
							
						
						b8dc19cc3f
	
				 | 
					
					
						|||
| 
						
						
							
						
						93c9b739ac
	
				 | 
					
					
						|||
| 
						
						
							
						
						4ed2786703
	
				 | 
					
					
						|||
| 
						 | 
					8728789183 | ||
| 
						
						
							
						
						bf90464809
	
				 | 
					
					
						|||
| 1878002391 | |||
| d21d2621e3 | |||
| f3fb1ff7fb | |||
| 1fa81f7558 | |||
| 
						 | 
					7409193b6b | ||
| 
						
						
							
						
						a84fcf0b7b
	
				 | 
					
					
						|||
| 
						
						
							
						
						25ac290df4
	
				 | 
					
					
						|||
| 
						
						
							
						
						3f52bad1e3
	
				 | 
					
					
						|||
| 0208ad0ade | |||
| 
						 | 
					3632ae0fc9 | ||
| 
						
						
							
						
						17d089cc6e
	
				 | 
					
					
						|||
| 
						
						
							
						
						bc470a4343
	
				 | 
					
					
						|||
| 
						
						
							
						
						be609a809d
	
				 | 
					
					
						|||
| 
						
						
							
						
						de3387ded7
	
				 | 
					
					
						|||
| 
						
						
							
						
						f343e87f0c
	
				 | 
					
					
						|||
| 
						
						
							
						
						7d3524fbd5
	
				 | 
					
					
						|||
| c614b71a52 | |||
| 
						 | 
					d159a839f3 | ||
| 
						
						
							
						
						36e2ebe5f4
	
				 | 
					
					
						|||
| 
						
						
							
						
						33f67b7a7c
	
				 | 
					
					
						|||
| 
						
						
							
						
						c0e1448439
	
				 | 
					
					
						|||
| 
						
						
							
						
						5d0804a08f
	
				 | 
					
					
						|||
| 
						
						
							
						
						f01c9edf17
	
				 | 
					
					
						|||
| 
						
						
							
						
						8d4295b2b3
	
				 | 
					
					
						|||
| 
						
						
							
						
						e2d46e9495
	
				 | 
					
					
						|||
| 
						
						
							
						
						1491e1edb0
	
				 | 
					
					
						|||
| 
						
						
							
						
						34142c3e6b
	
				 | 
					
					
						|||
| 
						
						
							
						
						0c88b96e8d
	
				 | 
					
					
						|||
| 
						
						
							
						
						2e55b4d6e3
	
				 | 
					
					
						|||
| 
						
						
							
						
						c90aad29f0
	
				 | 
					
					
						|||
| 
						
						
							
						
						6fd1e1377f
	
				 | 
					
					
						|||
| 
						
						
							
						
						c64b7eb1f1
	
				 | 
					
					
						|||
| 
						
						
							
						
						29cbc4f3a3
	
				 | 
					
					
						|||
| 
						
						
							
						
						307af1acfc
	
				 | 
					
					
						|||
| 
						
						
							
						
						b5106de9df
	
				 | 
					
					
						|||
| 
						
						
							
						
						9eeadfc44e
	
				 | 
					
					
						|||
| 
						
						
							
						
						d4aed378cf
	
				 | 
					
					
						|||
| 
						
						
							
						
						20a2cce34b
	
				 | 
					
					
						|||
| 
						
						
							
						
						d661ffe439
	
				 | 
					
					
						|||
| 
						
						
							
						
						45a310387a
	
				 | 
					
					
						|||
| 
						
						
							
						
						47b03c49ba
	
				 | 
					
					
						|||
| 
						
						
							
						
						986b81cbf4
	
				 | 
					
					
						|||
| 
						
						
							
						
						d43a47ae32
	
				 | 
					
					
						|||
| 
						
						
							
						
						ede37569f1
	
				 | 
					
					
						|||
| 
						
						
							
						
						0c53efe60a
	
				 | 
					
					
						|||
| 
						
						
							
						
						5f0e25b818
	
				 | 
					
					
						|||
| 
						
						
							
						
						4776154d6c
	
				 | 
					
					
						
							
								
								
									
										70
									
								
								.drone.yml
									
									
									
									
									
								
							
							
						
						
									
										70
									
								
								.drone.yml
									
									
									
									
									
								
							@@ -1,3 +1,33 @@
 | 
				
			|||||||
 | 
					---
 | 
				
			||||||
 | 
					kind: pipeline
 | 
				
			||||||
 | 
					type: docker
 | 
				
			||||||
 | 
					name: python311
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					steps:
 | 
				
			||||||
 | 
					- name: test
 | 
				
			||||||
 | 
					  image: python:3.11-slim
 | 
				
			||||||
 | 
					  commands:
 | 
				
			||||||
 | 
					  - id
 | 
				
			||||||
 | 
					  - python -V
 | 
				
			||||||
 | 
					  - apt update && apt install -y gcc g++ libicu-dev pkg-config git
 | 
				
			||||||
 | 
					  - python -m pip install poetry
 | 
				
			||||||
 | 
					  - poetry install
 | 
				
			||||||
 | 
					  - poetry run pytest
 | 
				
			||||||
 | 
					  # Basic test
 | 
				
			||||||
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv
 | 
				
			||||||
 | 
					  # Basic test with unsafe fixes
 | 
				
			||||||
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
 | 
				
			||||||
 | 
					  # Geography test
 | 
				
			||||||
 | 
					  - poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv
 | 
				
			||||||
 | 
					  # Geography test with unsafe fixes
 | 
				
			||||||
 | 
					  - poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u
 | 
				
			||||||
 | 
					  # Test with experimental checks
 | 
				
			||||||
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
 | 
				
			||||||
 | 
					  # Test with AGROVOC validation
 | 
				
			||||||
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
 | 
				
			||||||
 | 
					  # Test with AGROVOC validation (and dropping invalid)
 | 
				
			||||||
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
kind: pipeline
 | 
					kind: pipeline
 | 
				
			||||||
type: docker
 | 
					type: docker
 | 
				
			||||||
@@ -10,23 +40,23 @@ steps:
 | 
				
			|||||||
  - id
 | 
					  - id
 | 
				
			||||||
  - python -V
 | 
					  - python -V
 | 
				
			||||||
  - apt update && apt install -y gcc g++ libicu-dev pkg-config git
 | 
					  - apt update && apt install -y gcc g++ libicu-dev pkg-config git
 | 
				
			||||||
  - pip install -r requirements-dev.txt
 | 
					  - python -m pip install poetry
 | 
				
			||||||
  - pytest
 | 
					  - poetry install
 | 
				
			||||||
  - python setup.py install
 | 
					  - poetry run pytest
 | 
				
			||||||
  # Basic test
 | 
					  # Basic test
 | 
				
			||||||
  - csv-metadata-quality -i data/test.csv -o /tmp/test.csv
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv
 | 
				
			||||||
  # Basic test with unsafe fixes
 | 
					  # Basic test with unsafe fixes
 | 
				
			||||||
  - csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
 | 
				
			||||||
  # Geography test
 | 
					  # Geography test
 | 
				
			||||||
  - csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv
 | 
					  - poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv
 | 
				
			||||||
  # Geography test with unsafe fixes
 | 
					  # Geography test with unsafe fixes
 | 
				
			||||||
  - csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u
 | 
					  - poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u
 | 
				
			||||||
  # Test with experimental checks
 | 
					  # Test with experimental checks
 | 
				
			||||||
  - csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
 | 
				
			||||||
  # Test with AGROVOC validation
 | 
					  # Test with AGROVOC validation
 | 
				
			||||||
  - csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
 | 
				
			||||||
  # Test with AGROVOC validation (and dropping invalid)
 | 
					  # Test with AGROVOC validation (and dropping invalid)
 | 
				
			||||||
  - csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
kind: pipeline
 | 
					kind: pipeline
 | 
				
			||||||
@@ -40,22 +70,22 @@ steps:
 | 
				
			|||||||
  - id
 | 
					  - id
 | 
				
			||||||
  - python -V
 | 
					  - python -V
 | 
				
			||||||
  - apt update && apt install -y gcc g++ libicu-dev pkg-config git
 | 
					  - apt update && apt install -y gcc g++ libicu-dev pkg-config git
 | 
				
			||||||
  - pip install -r requirements-dev.txt
 | 
					  - python -m pip install poetry
 | 
				
			||||||
  - pytest
 | 
					  - poetry install
 | 
				
			||||||
  - python setup.py install
 | 
					  - poetry run pytest
 | 
				
			||||||
  # Basic test
 | 
					  # Basic test
 | 
				
			||||||
  - csv-metadata-quality -i data/test.csv -o /tmp/test.csv
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv
 | 
				
			||||||
  # Basic test with unsafe fixes
 | 
					  # Basic test with unsafe fixes
 | 
				
			||||||
  - csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
 | 
				
			||||||
  # Geography test
 | 
					  # Geography test
 | 
				
			||||||
  - csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv
 | 
					  - poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv
 | 
				
			||||||
  # Geography test with unsafe fixes
 | 
					  # Geography test with unsafe fixes
 | 
				
			||||||
  - csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u
 | 
					  - poetry run csv-metadata-quality -i data/test-geography.csv -o /tmp/test.csv -u
 | 
				
			||||||
  # Test with experimental checks
 | 
					  # Test with experimental checks
 | 
				
			||||||
  - csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
 | 
				
			||||||
  # Test with AGROVOC validation
 | 
					  # Test with AGROVOC validation
 | 
				
			||||||
  - csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
 | 
				
			||||||
  # Test with AGROVOC validation (and dropping invalid)
 | 
					  # Test with AGROVOC validation (and dropping invalid)
 | 
				
			||||||
  - csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
 | 
					  - poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# vim: ts=2 sw=2 et
 | 
					# vim: ts=2 sw=2 et
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										36
									
								
								.github/workflows/python-app.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										36
									
								
								.github/workflows/python-app.yml
									
									
									
									
										vendored
									
									
								
							@@ -15,37 +15,31 @@ jobs:
 | 
				
			|||||||
    runs-on: ubuntu-22.04
 | 
					    runs-on: ubuntu-22.04
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    steps:
 | 
					    steps:
 | 
				
			||||||
    - uses: actions/checkout@v3
 | 
					    - uses: actions/checkout@v4
 | 
				
			||||||
    - name: Set up Python 3.10
 | 
					    - name: Install poetry
 | 
				
			||||||
      uses: actions/setup-python@v4
 | 
					      run: pipx install poetry
 | 
				
			||||||
 | 
					    - uses: actions/setup-python@v4
 | 
				
			||||||
      with:
 | 
					      with:
 | 
				
			||||||
        python-version: '3.10'
 | 
					        python-version: '3.11'
 | 
				
			||||||
        cache: 'pip'
 | 
					        cache: 'poetry'
 | 
				
			||||||
    - name: Install dependencies
 | 
					    - run: poetry install
 | 
				
			||||||
      run: |
 | 
					 | 
				
			||||||
        python -m pip install --upgrade pip
 | 
					 | 
				
			||||||
        pip install flake8 pytest
 | 
					 | 
				
			||||||
        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
 | 
					 | 
				
			||||||
        if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
 | 
					 | 
				
			||||||
    - name: Lint with flake8
 | 
					    - name: Lint with flake8
 | 
				
			||||||
      run: |
 | 
					      run: |
 | 
				
			||||||
        # stop the build if there are Python syntax errors or undefined names
 | 
					        # stop the build if there are Python syntax errors or undefined names
 | 
				
			||||||
        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
 | 
					        poetry run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
 | 
				
			||||||
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
 | 
					        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
 | 
				
			||||||
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
 | 
					        poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
 | 
				
			||||||
    - name: Test with pytest
 | 
					    - name: Test with pytest
 | 
				
			||||||
      run: |
 | 
					      run: poetry run pytest
 | 
				
			||||||
        pytest
 | 
					 | 
				
			||||||
    - name: Test CLI
 | 
					    - name: Test CLI
 | 
				
			||||||
      run: |
 | 
					      run: |
 | 
				
			||||||
        python setup.py install
 | 
					 | 
				
			||||||
        # Basic test
 | 
					        # Basic test
 | 
				
			||||||
        csv-metadata-quality -i data/test.csv -o /tmp/test.csv
 | 
					        poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv
 | 
				
			||||||
        # Test with unsafe fixes
 | 
					        # Test with unsafe fixes
 | 
				
			||||||
        csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
 | 
					        poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -u
 | 
				
			||||||
        # Test with experimental checks
 | 
					        # Test with experimental checks
 | 
				
			||||||
        csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
 | 
					        poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv -e
 | 
				
			||||||
        # Test with AGROVOC validation
 | 
					        # Test with AGROVOC validation
 | 
				
			||||||
        csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
 | 
					        poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject
 | 
				
			||||||
        # Test with AGROVOC validation (and dropping invalid)
 | 
					        # Test with AGROVOC validation (and dropping invalid)
 | 
				
			||||||
        csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
 | 
					        poetry run csv-metadata-quality -i data/test.csv -o /tmp/test.csv --agrovoc-fields dcterms.subject -d
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										15
									
								
								CHANGELOG.md
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								CHANGELOG.md
									
									
									
									
									
								
							@@ -4,6 +4,21 @@ All notable changes to this project will be documented in this file.
 | 
				
			|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 | 
					The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 | 
				
			||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 | 
					and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Unreleased
 | 
				
			||||||
 | 
					### Fixed
 | 
				
			||||||
 | 
					- Fixed regex so we don't run the invalid multi-value separator fix on
 | 
				
			||||||
 | 
					`dcterms.bibliographicCitation` fields
 | 
				
			||||||
 | 
					- Fixed regex so we run the comma space fix on `dcterms.bibliographicCitation`
 | 
				
			||||||
 | 
					fields
 | 
				
			||||||
 | 
					- Don't crash the country/region checker/fixer when a title field is missing
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Changed
 | 
				
			||||||
 | 
					- Don't run newline fix on description fields
 | 
				
			||||||
 | 
					- Install requests-cache in main run() function instead of check.agrovoc() function so we only incur the overhead once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Updated
 | 
				
			||||||
 | 
					- Python dependencies, including Pandas 2.0.0 and [Arrow-backed dtypes](https://datapythonista.me/blog/pandas-20-and-the-arrow-revolution-part-i)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## [0.6.1] - 2023-02-23
 | 
					## [0.6.1] - 2023-02-23
 | 
				
			||||||
### Fixed
 | 
					### Fixed
 | 
				
			||||||
- Missing region check should ignore subregion field, if it exists
 | 
					- Missing region check should ignore subregion field, if it exists
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										1
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1 @@
 | 
				
			|||||||
 | 
					include csv_metadata_quality/data/licenses.json
 | 
				
			||||||
@@ -127,7 +127,6 @@ This currently uses the [Python langid](https://github.com/saffsd/langid.py) lib
 | 
				
			|||||||
- Warn if an author is shorter than 3 characters?
 | 
					- Warn if an author is shorter than 3 characters?
 | 
				
			||||||
- Validate DOIs? Normalize to https://doi.org format? Or use just the DOI part: 10.1016/j.worlddev.2010.06.006
 | 
					- Validate DOIs? Normalize to https://doi.org format? Or use just the DOI part: 10.1016/j.worlddev.2010.06.006
 | 
				
			||||||
- Warn if two items use the same file in `filename` column
 | 
					- Warn if two items use the same file in `filename` column
 | 
				
			||||||
- Add an option to drop invalid AGROVOC subjects?
 | 
					 | 
				
			||||||
- Add tests for application invocation, ie `tests/test_app.py`?
 | 
					- Add tests for application invocation, ie `tests/test_app.py`?
 | 
				
			||||||
- Validate ISSNs or journal titles against CrossRef API?
 | 
					- Validate ISSNs or journal titles against CrossRef API?
 | 
				
			||||||
- Add configurable field validation, like specify a field name and a validation file?
 | 
					- Add configurable field validation, like specify a field name and a validation file?
 | 
				
			||||||
@@ -137,7 +136,7 @@ This currently uses the [Python langid](https://github.com/saffsd/langid.py) lib
 | 
				
			|||||||
  - Warn if item is Open Access, but missing a license
 | 
					  - Warn if item is Open Access, but missing a license
 | 
				
			||||||
  - Warn if item has an ISSN but no journal title
 | 
					  - Warn if item has an ISSN but no journal title
 | 
				
			||||||
  - Update journal titles from ISSN
 | 
					  - Update journal titles from ISSN
 | 
				
			||||||
- Migrate to https://github.com/spdx/license-list-data
 | 
					- Migrate from Pandas to Polars
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## License
 | 
					## License
 | 
				
			||||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
 | 
					This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,11 +1,14 @@
 | 
				
			|||||||
# SPDX-License-Identifier: GPL-3.0-only
 | 
					# SPDX-License-Identifier: GPL-3.0-only
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import argparse
 | 
					import argparse
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import signal
 | 
					import signal
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
 | 
					from datetime import timedelta
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import pandas as pd
 | 
					import pandas as pd
 | 
				
			||||||
 | 
					import requests_cache
 | 
				
			||||||
from colorama import Fore
 | 
					from colorama import Fore
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import csv_metadata_quality.check as check
 | 
					import csv_metadata_quality.check as check
 | 
				
			||||||
@@ -74,7 +77,7 @@ def run(argv):
 | 
				
			|||||||
    signal.signal(signal.SIGINT, signal_handler)
 | 
					    signal.signal(signal.SIGINT, signal_handler)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Read all fields as strings so dates don't get converted from 1998 to 1998.0
 | 
					    # Read all fields as strings so dates don't get converted from 1998 to 1998.0
 | 
				
			||||||
    df = pd.read_csv(args.input_file, dtype=str)
 | 
					    df = pd.read_csv(args.input_file, dtype_backend="pyarrow", dtype="str")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Check if the user requested to skip any fields
 | 
					    # Check if the user requested to skip any fields
 | 
				
			||||||
    if args.exclude_fields:
 | 
					    if args.exclude_fields:
 | 
				
			||||||
@@ -84,6 +87,19 @@ def run(argv):
 | 
				
			|||||||
    else:
 | 
					    else:
 | 
				
			||||||
        exclude = list()
 | 
					        exclude = list()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # enable transparent request cache with thirty days expiry
 | 
				
			||||||
 | 
					    expire_after = timedelta(days=30)
 | 
				
			||||||
 | 
					    # Allow overriding the location of the requests cache, just in case we are
 | 
				
			||||||
 | 
					    # running in an environment where we can't write to the current working di-
 | 
				
			||||||
 | 
					    # rectory (for example from csv-metadata-quality-web).
 | 
				
			||||||
 | 
					    REQUESTS_CACHE_DIR = os.environ.get("REQUESTS_CACHE_DIR", ".")
 | 
				
			||||||
 | 
					    requests_cache.install_cache(
 | 
				
			||||||
 | 
					        f"{REQUESTS_CACHE_DIR}/agrovoc-response-cache", expire_after=expire_after
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # prune old cache entries
 | 
				
			||||||
 | 
					    requests_cache.delete()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for column in df.columns:
 | 
					    for column in df.columns:
 | 
				
			||||||
        if column in exclude:
 | 
					        if column in exclude:
 | 
				
			||||||
            print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")
 | 
					            print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")
 | 
				
			||||||
@@ -91,7 +107,9 @@ def run(argv):
 | 
				
			|||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if args.unsafe_fixes:
 | 
					        if args.unsafe_fixes:
 | 
				
			||||||
            match = re.match(r"^.*?abstract.*$", column)
 | 
					            # Skip whitespace and newline fixes on abstracts and descriptions
 | 
				
			||||||
 | 
					            # because there are too many with legitimate multi-line metadata.
 | 
				
			||||||
 | 
					            match = re.match(r"^.*?(abstract|description).*$", column)
 | 
				
			||||||
            if match is None:
 | 
					            if match is None:
 | 
				
			||||||
                # Fix: whitespace
 | 
					                # Fix: whitespace
 | 
				
			||||||
                df[column] = df[column].apply(fix.whitespace, field_name=column)
 | 
					                df[column] = df[column].apply(fix.whitespace, field_name=column)
 | 
				
			||||||
@@ -102,7 +120,7 @@ def run(argv):
 | 
				
			|||||||
        # Fix: missing space after comma. Only run on author and citation
 | 
					        # Fix: missing space after comma. Only run on author and citation
 | 
				
			||||||
        # fields for now, as this problem is mostly an issue in names.
 | 
					        # fields for now, as this problem is mostly an issue in names.
 | 
				
			||||||
        if args.unsafe_fixes:
 | 
					        if args.unsafe_fixes:
 | 
				
			||||||
            match = re.match(r"^.*?(author|citation).*$", column)
 | 
					            match = re.match(r"^.*?(author|[Cc]itation).*$", column)
 | 
				
			||||||
            if match is not None:
 | 
					            if match is not None:
 | 
				
			||||||
                df[column] = df[column].apply(fix.comma_space, field_name=column)
 | 
					                df[column] = df[column].apply(fix.comma_space, field_name=column)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -126,7 +144,7 @@ def run(argv):
 | 
				
			|||||||
        # Fix: invalid and unnecessary multi-value separators. Skip the title
 | 
					        # Fix: invalid and unnecessary multi-value separators. Skip the title
 | 
				
			||||||
        # and abstract fields because "|" is used to indicate something like
 | 
					        # and abstract fields because "|" is used to indicate something like
 | 
				
			||||||
        # a subtitle.
 | 
					        # a subtitle.
 | 
				
			||||||
        match = re.match(r"^.*?(abstract|title).*$", column)
 | 
					        match = re.match(r"^.*?(abstract|[Cc]itation|title).*$", column)
 | 
				
			||||||
        if match is None:
 | 
					        if match is None:
 | 
				
			||||||
            df[column] = df[column].apply(fix.separators, field_name=column)
 | 
					            df[column] = df[column].apply(fix.separators, field_name=column)
 | 
				
			||||||
            # Run whitespace fix again after fixing invalid separators
 | 
					            # Run whitespace fix again after fixing invalid separators
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,14 +1,12 @@
 | 
				
			|||||||
# SPDX-License-Identifier: GPL-3.0-only
 | 
					# SPDX-License-Identifier: GPL-3.0-only
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
from datetime import datetime, timedelta
 | 
					from datetime import datetime, timedelta
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import country_converter as coco
 | 
					import country_converter as coco
 | 
				
			||||||
import pandas as pd
 | 
					import pandas as pd
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
import requests_cache
 | 
					 | 
				
			||||||
from colorama import Fore
 | 
					from colorama import Fore
 | 
				
			||||||
from pycountry import languages
 | 
					from pycountry import languages
 | 
				
			||||||
from stdnum import isbn as stdnum_isbn
 | 
					from stdnum import isbn as stdnum_isbn
 | 
				
			||||||
@@ -203,25 +201,12 @@ def agrovoc(field, field_name, drop):
 | 
				
			|||||||
    if pd.isna(field):
 | 
					    if pd.isna(field):
 | 
				
			||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # enable transparent request cache with thirty days expiry
 | 
					 | 
				
			||||||
    expire_after = timedelta(days=30)
 | 
					 | 
				
			||||||
    # Allow overriding the location of the requests cache, just in case we are
 | 
					 | 
				
			||||||
    # running in an environment where we can't write to the current working di-
 | 
					 | 
				
			||||||
    # rectory (for example from csv-metadata-quality-web).
 | 
					 | 
				
			||||||
    REQUESTS_CACHE_DIR = os.environ.get("REQUESTS_CACHE_DIR", ".")
 | 
					 | 
				
			||||||
    requests_cache.install_cache(
 | 
					 | 
				
			||||||
        f"{REQUESTS_CACHE_DIR}/agrovoc-response-cache", expire_after=expire_after
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # prune old cache entries
 | 
					 | 
				
			||||||
    # requests_cache.remove_expired_responses()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # Initialize an empty list to hold the validated AGROVOC values
 | 
					    # Initialize an empty list to hold the validated AGROVOC values
 | 
				
			||||||
    values = list()
 | 
					    values = list()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Try to split multi-value field on "||" separator
 | 
					    # Try to split multi-value field on "||" separator
 | 
				
			||||||
    for value in field.split("||"):
 | 
					    for value in field.split("||"):
 | 
				
			||||||
        request_url = "http://agrovoc.uniroma2.it/agrovoc/rest/v1/agrovoc/search"
 | 
					        request_url = "https://agrovoc.uniroma2.it/agrovoc/rest/v1/agrovoc/search"
 | 
				
			||||||
        request_params = {"query": value}
 | 
					        request_params = {"query": value}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        request = requests.get(request_url, params=request_params)
 | 
					        request = requests.get(request_url, params=request_params)
 | 
				
			||||||
@@ -563,8 +548,13 @@ def countries_match_regions(row, exclude):
 | 
				
			|||||||
            un_region = cc.convert(names=country, to="UNRegion")
 | 
					            un_region = cc.convert(names=country, to="UNRegion")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if un_region != "not found" and un_region not in regions:
 | 
					            if un_region != "not found" and un_region not in regions:
 | 
				
			||||||
                print(
 | 
					                try:
 | 
				
			||||||
                    f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}{row[title_column_name]}"
 | 
					                    print(
 | 
				
			||||||
                )
 | 
					                        f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}{row[title_column_name]}"
 | 
				
			||||||
 | 
					                    )
 | 
				
			||||||
 | 
					                except KeyError:
 | 
				
			||||||
 | 
					                    print(
 | 
				
			||||||
 | 
					                        f"{Fore.YELLOW}Missing region ({country} → {un_region}): {Fore.RESET}<title field not present>"
 | 
				
			||||||
 | 
					                    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return
 | 
					    return
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -370,9 +370,17 @@ def countries_match_regions(row, exclude):
 | 
				
			|||||||
            # it doesn't already exist in regions.
 | 
					            # it doesn't already exist in regions.
 | 
				
			||||||
            if un_region != "not found" and un_region not in regions:
 | 
					            if un_region != "not found" and un_region not in regions:
 | 
				
			||||||
                if un_region not in missing_regions:
 | 
					                if un_region not in missing_regions:
 | 
				
			||||||
                    print(
 | 
					                    try:
 | 
				
			||||||
                        f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}{row[title_column_name]}"
 | 
					                        print(
 | 
				
			||||||
                    )
 | 
					                            f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}{row[title_column_name]}"
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
 | 
					                    except KeyError:
 | 
				
			||||||
 | 
					                        # If there is no title column in the CSV we will print
 | 
				
			||||||
 | 
					                        # the fix without the title instead of crashing.
 | 
				
			||||||
 | 
					                        print(
 | 
				
			||||||
 | 
					                            f"{Fore.YELLOW}Adding missing region ({un_region}): {Fore.RESET}<title field not present>"
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    missing_regions.append(un_region)
 | 
					                    missing_regions.append(un_region)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if len(missing_regions) > 0:
 | 
					        if len(missing_regions) > 0:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2,7 +2,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
from importlib.resources import files
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ftfy.badness import is_bad
 | 
					from ftfy.badness import is_bad
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -58,7 +58,7 @@ def is_mojibake(field):
 | 
				
			|||||||
def load_spdx_licenses():
 | 
					def load_spdx_licenses():
 | 
				
			||||||
    """Returns a Python list of SPDX short license identifiers."""
 | 
					    """Returns a Python list of SPDX short license identifiers."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    with open(files("csv_metadata_quality").joinpath("data/licenses.json")) as f:
 | 
					    with open(os.path.join(os.path.dirname(__file__), "data/licenses.json")) as f:
 | 
				
			||||||
        licenses = json.load(f)
 | 
					        licenses = json.load(f)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # List comprehension to extract the license ID for each license
 | 
					    # List comprehension to extract the license ID for each license
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										1410
									
								
								poetry.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										1410
									
								
								poetry.lock
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -12,25 +12,23 @@ csv-metadata-quality = 'csv_metadata_quality.__main__:main'
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[tool.poetry.dependencies]
 | 
					[tool.poetry.dependencies]
 | 
				
			||||||
python = "^3.9"
 | 
					python = "^3.9"
 | 
				
			||||||
pandas = "^1.5.2"
 | 
					pandas = {version = "^2.0.2", extras = ["feather", "performance"]}
 | 
				
			||||||
python-stdnum = "^1.18"
 | 
					python-stdnum = "^1.18"
 | 
				
			||||||
requests = "^2.28.2"
 | 
					requests = "^2.28.2"
 | 
				
			||||||
requests-cache = "^0.9.8"
 | 
					requests-cache = "^1.0.0"
 | 
				
			||||||
langid = "^1.1.6"
 | 
					langid = "^1.1.6"
 | 
				
			||||||
colorama = "^0.4.6"
 | 
					colorama = "^0.4.6"
 | 
				
			||||||
ftfy = "^6.1.1"
 | 
					ftfy = "^6.1.1"
 | 
				
			||||||
country-converter = {git = "https://github.com/alanorth/country_converter.git", rev = "myanmar-region"}
 | 
					country-converter = "~1.1.0"
 | 
				
			||||||
pycountry = {git = "https://github.com/alanorth/pycountry", rev = "iso-codes-4.12.0"}
 | 
					pycountry = {git = "https://github.com/alanorth/pycountry", rev = "iso-codes-4.15.0"}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[tool.poetry.dev-dependencies]
 | 
					[tool.poetry.group.dev.dependencies]
 | 
				
			||||||
pytest = "^7.2.1"
 | 
					pytest = "^7.2.1"
 | 
				
			||||||
flake8 = "^6.0.0"
 | 
					flake8 = "^6.0.0"
 | 
				
			||||||
pytest-clarity = "^1.0.1"
 | 
					pytest-clarity = "^1.0.1"
 | 
				
			||||||
black = "^23.1.0"
 | 
					black = "^23.1.0"
 | 
				
			||||||
isort = "^5.12.0"
 | 
					isort = "^5.12.0"
 | 
				
			||||||
csvkit = "^1.1.0"
 | 
					csvkit = "^1.1.0"
 | 
				
			||||||
 | 
					 | 
				
			||||||
[tool.poetry.group.dev.dependencies]
 | 
					 | 
				
			||||||
ipython = "^8.10.0"
 | 
					ipython = "^8.10.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[build-system]
 | 
					[build-system]
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										9
									
								
								renovate.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								renovate.json
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,9 @@
 | 
				
			|||||||
 | 
					{
 | 
				
			||||||
 | 
					  "$schema": "https://docs.renovatebot.com/renovate-schema.json",
 | 
				
			||||||
 | 
					  "extends": [
 | 
				
			||||||
 | 
					    "config:base"
 | 
				
			||||||
 | 
					  ],
 | 
				
			||||||
 | 
					  "pip_requirements": {
 | 
				
			||||||
 | 
					      "enabled": false
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -5,28 +5,28 @@ agate==1.7.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			|||||||
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
 | 
					appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
appnope==0.1.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "darwin"
 | 
					appnope==0.1.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "darwin"
 | 
				
			||||||
asttokens==2.2.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					asttokens==2.2.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					attrs==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
babel==2.11.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					babel==2.12.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
backcall==0.2.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					backcall==0.2.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
black==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					black==23.3.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
 | 
					certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
charset-normalizer==3.0.1 ; python_version >= "3.9" and python_version < "4"
 | 
					charset-normalizer==3.1.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
click==8.1.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
					click==8.1.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
 | 
					colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
country-converter @ git+https://github.com/alanorth/country_converter.git@myanmar-region ; python_version >= "3.9" and python_version < "4.0"
 | 
					country-converter==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
csvkit==1.1.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					csvkit==1.1.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
dbfread==2.0.7 ; python_version >= "3.9" and python_version < "4.0"
 | 
					dbfread==2.0.7 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
et-xmlfile==1.1.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					et-xmlfile==1.1.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
exceptiongroup==1.1.0 ; python_version >= "3.9" and python_version < "3.11"
 | 
					exceptiongroup==1.1.1 ; python_version >= "3.9" and python_version < "3.11"
 | 
				
			||||||
executing==1.2.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					executing==1.2.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
flake8==6.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					flake8==6.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
 | 
					ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
 | 
				
			||||||
greenlet==2.0.2 ; python_version >= "3.9" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version < "4.0"
 | 
					greenlet==2.0.2 ; python_version >= "3.9" and platform_machine == "aarch64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "ppc64le" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "x86_64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "amd64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "AMD64" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "win32" and python_version < "4.0" or python_version >= "3.9" and platform_machine == "WIN32" and python_version < "4.0"
 | 
				
			||||||
idna==3.4 ; python_version >= "3.9" and python_version < "4"
 | 
					idna==3.4 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
ipython==8.10.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					ipython==8.13.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
isodate==0.6.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					isodate==0.6.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
isort==5.12.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					isort==5.12.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
jedi==0.18.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
					jedi==0.18.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
@@ -37,44 +37,46 @@ matplotlib-inline==0.1.6 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			|||||||
mccabe==0.7.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					mccabe==0.7.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
					mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
numpy==1.24.2 ; python_version < "4.0" and python_version >= "3.9"
 | 
					numpy==1.24.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
olefile==0.46 ; python_version >= "3.9" and python_version < "4.0"
 | 
					olefile==0.46 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
openpyxl==3.1.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					openpyxl==3.1.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
packaging==23.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					packaging==23.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pandas==1.5.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pandas==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
parsedatetime==2.6 ; python_version >= "3.9" and python_version < "4.0"
 | 
					parsedatetime==2.6 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
					parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pathspec==0.11.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pathspec==0.11.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pexpect==4.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
 | 
					pexpect==4.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
 | 
				
			||||||
pickleshare==0.7.5 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pickleshare==0.7.5 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
platformdirs==3.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					platformdirs==3.5.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pluggy==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pluggy==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pprintpp==0.4.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pprintpp==0.4.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
prompt-toolkit==3.0.37 ; python_version >= "3.9" and python_version < "4.0"
 | 
					prompt-toolkit==3.0.38 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
 | 
					ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
 | 
				
			||||||
pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
 | 
					pyarrow==11.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pycodestyle==2.10.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pycodestyle==2.10.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.12.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.13.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pyflakes==3.0.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pyflakes==3.0.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pygments==2.14.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pygments==2.15.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pytest-clarity==1.0.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pytest-clarity==1.0.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pytest==7.2.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pytest==7.3.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
					python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
python-slugify==8.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					python-slugify==8.0.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
 | 
					python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pytimeparse==1.1.8 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pytimeparse==1.1.8 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pytz==2022.7.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pytz==2023.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
 | 
					requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
requests==2.28.2 ; python_version >= "3.9" and python_version < "4"
 | 
					requests==2.29.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
rich==13.3.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					rich==13.3.5 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
sqlalchemy==1.4.46 ; python_version >= "3.9" and python_version < "4.0"
 | 
					sqlalchemy==1.4.48 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
stack-data==0.6.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
					stack-data==0.6.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
text-unidecode==1.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
					text-unidecode==1.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
 | 
					tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
 | 
				
			||||||
traitlets==5.9.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					traitlets==5.9.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
typing-extensions==4.5.0 ; python_version >= "3.9" and python_version < "3.10"
 | 
					typing-extensions==4.5.0 ; python_version >= "3.9" and python_version < "3.10"
 | 
				
			||||||
 | 
					tzdata==2023.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
					url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
urllib3==1.26.14 ; python_version >= "3.9" and python_version < "4"
 | 
					urllib3==1.26.15 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
 | 
					wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
 | 
				
			||||||
xlrd==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					xlrd==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,23 +1,25 @@
 | 
				
			|||||||
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
 | 
					appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					attrs==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
 | 
					certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
charset-normalizer==3.0.1 ; python_version >= "3.9" and python_version < "4"
 | 
					charset-normalizer==3.1.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
 | 
					colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
country-converter @ git+https://github.com/alanorth/country_converter.git@myanmar-region ; python_version >= "3.9" and python_version < "4.0"
 | 
					country-converter==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
exceptiongroup==1.1.0 ; python_version >= "3.9" and python_version < "3.11"
 | 
					exceptiongroup==1.1.1 ; python_version >= "3.9" and python_version < "3.11"
 | 
				
			||||||
ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
 | 
					ftfy==6.1.1 ; python_version >= "3.9" and python_version < "4"
 | 
				
			||||||
idna==3.4 ; python_version >= "3.9" and python_version < "4"
 | 
					idna==3.4 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0"
 | 
					langid==1.1.6 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
numpy==1.24.2 ; python_version < "4.0" and python_version >= "3.9"
 | 
					numpy==1.24.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pandas==1.5.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pandas==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.12.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pyarrow==11.0.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
 | 
					pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.13.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
					python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
 | 
					python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
pytz==2022.7.1 ; python_version >= "3.9" and python_version < "4.0"
 | 
					pytz==2023.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
 | 
					requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
requests==2.28.2 ; python_version >= "3.9" and python_version < "4"
 | 
					requests==2.29.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
					six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
 | 
					tzdata==2023.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
					url-normalize==1.4.3 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
urllib3==1.26.14 ; python_version >= "3.9" and python_version < "4"
 | 
					urllib3==1.26.15 ; python_version >= "3.9" and python_version < "4.0"
 | 
				
			||||||
wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
 | 
					wcwidth==0.2.6 ; python_version >= "3.9" and python_version < "4"
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										36
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										36
									
								
								setup.py
									
									
									
									
									
								
							@@ -1,36 +0,0 @@
 | 
				
			|||||||
import setuptools
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
with open("README.md", "r") as fh:
 | 
					 | 
				
			||||||
    long_description = fh.read()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
install_requires = [
 | 
					 | 
				
			||||||
    "pandas",
 | 
					 | 
				
			||||||
    "python-stdnum",
 | 
					 | 
				
			||||||
    "requests",
 | 
					 | 
				
			||||||
    "requests-cache",
 | 
					 | 
				
			||||||
    "pycountry",
 | 
					 | 
				
			||||||
    "langid",
 | 
					 | 
				
			||||||
]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
setuptools.setup(
 | 
					 | 
				
			||||||
    name="csv-metadata-quality",
 | 
					 | 
				
			||||||
    version="0.6.1",
 | 
					 | 
				
			||||||
    author="Alan Orth",
 | 
					 | 
				
			||||||
    author_email="aorth@mjanja.ch",
 | 
					 | 
				
			||||||
    description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
 | 
					 | 
				
			||||||
    license="GPLv3",
 | 
					 | 
				
			||||||
    long_description=long_description,
 | 
					 | 
				
			||||||
    long_description_content_type="text/markdown",
 | 
					 | 
				
			||||||
    url="https://github.com/alanorth/csv-metadata-quality",
 | 
					 | 
				
			||||||
    classifiers=[
 | 
					 | 
				
			||||||
        "Programming Language :: Python :: 3.9",
 | 
					 | 
				
			||||||
        "Programming Language :: Python :: 3.10",
 | 
					 | 
				
			||||||
        "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
 | 
					 | 
				
			||||||
        "Operating System :: OS Independent",
 | 
					 | 
				
			||||||
    ],
 | 
					 | 
				
			||||||
    packages=["csv_metadata_quality"],
 | 
					 | 
				
			||||||
    entry_points={
 | 
					 | 
				
			||||||
        "console_scripts": ["csv-metadata-quality = csv_metadata_quality.__main__:main"]
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
    install_requires=install_requires,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
		Reference in New Issue
	
	Block a user