1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-29 00:58:19 +01:00

Compare commits

..

3 Commits

Author SHA1 Message Date
95015febbd
csv_metadata_quality/fix.py: fix thin spaces
All checks were successful
continuous-integration/drone/push Build is passing
Replace thin spaces with normal spaces. Sometimes I see these get
mis handled on Windows machines and they end up as "?" or so.
2021-12-09 23:22:53 +02:00
cef6c66b30
CHANGELOG.md: start next changes 2021-12-09 23:21:58 +02:00
9905e183ea
Bump version to 0.6.0-dev 2021-12-09 23:21:30 +02:00
5 changed files with 16 additions and 3 deletions

View File

@ -4,6 +4,8 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
## [0.5.0] - 2021-12-08 ## [0.5.0] - 2021-12-08
### Added ### Added
- Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy) - Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy)

View File

@ -104,6 +104,7 @@ def unnecessary_unicode(field):
Replaces unnecessary Unicode characters like: Replaces unnecessary Unicode characters like:
- Soft hyphen (U+00AD) hyphen - Soft hyphen (U+00AD) hyphen
- No-break space (U+00A0) space - No-break space (U+00A0) space
- Thin space (U+2009) space
Return string with characters removed or replaced. Return string with characters removed or replaced.
""" """
@ -148,6 +149,16 @@ def unnecessary_unicode(field):
) )
field = re.sub(pattern, "-", field) field = re.sub(pattern, "-", field)
# Check for thin spaces (U+2009)
pattern = re.compile(r"\u2009")
match = re.findall(pattern, field)
if match:
print(
f"{Fore.GREEN}Replacing unnecessary Unicode (U+2009): {Fore.RESET}{field}"
)
field = re.sub(pattern, " ", field)
return field return field

View File

@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-3.0-only # SPDX-License-Identifier: GPL-3.0-only
VERSION = "0.5.0" VERSION = "0.6.0-dev"

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "csv-metadata-quality" name = "csv-metadata-quality"
version = "0.5.0" version = "0.6.0-dev"
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem." description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
authors = ["Alan Orth <alan.orth@gmail.com>"] authors = ["Alan Orth <alan.orth@gmail.com>"]
license="GPL-3.0-only" license="GPL-3.0-only"

View File

@ -14,7 +14,7 @@ install_requires = [
setuptools.setup( setuptools.setup(
name="csv-metadata-quality", name="csv-metadata-quality",
version="0.5.0", version="0.6.0-dev",
author="Alan Orth", author="Alan Orth",
author_email="aorth@mjanja.ch", author_email="aorth@mjanja.ch",
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.", description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",