1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-28 16:48:20 +01:00

Compare commits

...

3 Commits

Author SHA1 Message Date
95015febbd
csv_metadata_quality/fix.py: fix thin spaces
All checks were successful
continuous-integration/drone/push Build is passing
Replace thin spaces with normal spaces. Sometimes I see these get
mis handled on Windows machines and they end up as "?" or so.
2021-12-09 23:22:53 +02:00
cef6c66b30
CHANGELOG.md: start next changes 2021-12-09 23:21:58 +02:00
9905e183ea
Bump version to 0.6.0-dev 2021-12-09 23:21:30 +02:00
5 changed files with 16 additions and 3 deletions

View File

@ -4,6 +4,8 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
## [0.5.0] - 2021-12-08
### Added
- Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy)

View File

@ -104,6 +104,7 @@ def unnecessary_unicode(field):
Replaces unnecessary Unicode characters like:
- Soft hyphen (U+00AD) hyphen
- No-break space (U+00A0) space
- Thin space (U+2009) space
Return string with characters removed or replaced.
"""
@ -148,6 +149,16 @@ def unnecessary_unicode(field):
)
field = re.sub(pattern, "-", field)
# Check for thin spaces (U+2009)
pattern = re.compile(r"\u2009")
match = re.findall(pattern, field)
if match:
print(
f"{Fore.GREEN}Replacing unnecessary Unicode (U+2009): {Fore.RESET}{field}"
)
field = re.sub(pattern, " ", field)
return field

View File

@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-3.0-only
VERSION = "0.5.0"
VERSION = "0.6.0-dev"

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "csv-metadata-quality"
version = "0.5.0"
version = "0.6.0-dev"
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
authors = ["Alan Orth <alan.orth@gmail.com>"]
license="GPL-3.0-only"

View File

@ -14,7 +14,7 @@ install_requires = [
setuptools.setup(
name="csv-metadata-quality",
version="0.5.0",
version="0.6.0-dev",
author="Alan Orth",
author_email="aorth@mjanja.ch",
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",