mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-05-15 01:07:44 +02:00
Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
0b2d211455
|
|||
7f1df0b47c
|
|||
365ecda324
|
|||
550ce7fb7e
|
@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [0.4.1] - 2020-01-15
|
||||||
|
### Changed
|
||||||
|
- Reduce minimum Python version to 3.6 by working around the `is_normalized()`
|
||||||
|
that only works in Python >= 3.8
|
||||||
|
|
||||||
## [0.4.0] - 2020-01-15
|
## [0.4.0] - 2020-01-15
|
||||||
### Added
|
### Added
|
||||||
- Unicode normalization (enable with `--unsafe-fixes`, see README.md)
|
- Unicode normalization (enable with `--unsafe-fixes`, see README.md)
|
||||||
|
@ -212,7 +212,7 @@ def normalize_unicode(field, field_name):
|
|||||||
Return normalized string.
|
Return normalized string.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from unicodedata import is_normalized
|
from csv_metadata_quality.util import is_nfc
|
||||||
from unicodedata import normalize
|
from unicodedata import normalize
|
||||||
|
|
||||||
# Skip fields with missing values
|
# Skip fields with missing values
|
||||||
@ -220,7 +220,7 @@ def normalize_unicode(field, field_name):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Check if the current string is using normalized Unicode (NFC)
|
# Check if the current string is using normalized Unicode (NFC)
|
||||||
if not is_normalized("NFC", field):
|
if not is_nfc(field):
|
||||||
print(f"Normalizing Unicode ({field_name}): {field}")
|
print(f"Normalizing Unicode ({field_name}): {field}")
|
||||||
field = normalize("NFC", field)
|
field = normalize("NFC", field)
|
||||||
|
|
||||||
|
14
csv_metadata_quality/util.py
Normal file
14
csv_metadata_quality/util.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
def is_nfc(field):
|
||||||
|
"""Utility function to check whether a string is using normalized Unicode.
|
||||||
|
Python's built-in unicodedata library has the is_normalized() function, but
|
||||||
|
it was only introduced in Python 3.8. By using a simple utility function we
|
||||||
|
are able to run on Python >= 3.6 again.
|
||||||
|
|
||||||
|
See: https://docs.python.org/3/library/unicodedata.html
|
||||||
|
|
||||||
|
Return boolean.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from unicodedata import normalize
|
||||||
|
|
||||||
|
return field == normalize("NFC", field)
|
@ -1 +1 @@
|
|||||||
VERSION = "0.4.0"
|
VERSION = "0.4.1"
|
||||||
|
4
setup.py
4
setup.py
@ -14,7 +14,7 @@ install_requires = [
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name="csv-metadata-quality",
|
name="csv-metadata-quality",
|
||||||
version="0.4.0",
|
version="0.4.1",
|
||||||
author="Alan Orth",
|
author="Alan Orth",
|
||||||
author_email="aorth@mjanja.ch",
|
author_email="aorth@mjanja.ch",
|
||||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
||||||
@ -23,6 +23,8 @@ setuptools.setup(
|
|||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
url="https://github.com/alanorth/csv-metadata-quality",
|
url="https://github.com/alanorth/csv-metadata-quality",
|
||||||
classifiers=[
|
classifiers=[
|
||||||
|
"Programming Language :: Python :: 3.6",
|
||||||
|
"Programming Language :: Python :: 3.7",
|
||||||
"Programming Language :: Python :: 3.8",
|
"Programming Language :: Python :: 3.8",
|
||||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
|
Reference in New Issue
Block a user