1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-29 00:58:19 +01:00

Compare commits

..

No commits in common. "95015febbd34a580ac61ef882bfabe75a7b93d6c" and "cc34db7ff80402e3913a1750e134be69a59420ca" have entirely different histories.

5 changed files with 3 additions and 16 deletions

View File

@ -4,8 +4,6 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
## [0.5.0] - 2021-12-08
### Added
- Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy)

View File

@ -104,7 +104,6 @@ def unnecessary_unicode(field):
Replaces unnecessary Unicode characters like:
- Soft hyphen (U+00AD) hyphen
- No-break space (U+00A0) space
- Thin space (U+2009) space
Return string with characters removed or replaced.
"""
@ -149,16 +148,6 @@ def unnecessary_unicode(field):
)
field = re.sub(pattern, "-", field)
# Check for thin spaces (U+2009)
pattern = re.compile(r"\u2009")
match = re.findall(pattern, field)
if match:
print(
f"{Fore.GREEN}Replacing unnecessary Unicode (U+2009): {Fore.RESET}{field}"
)
field = re.sub(pattern, " ", field)
return field

View File

@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-3.0-only
VERSION = "0.6.0-dev"
VERSION = "0.5.0"

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "csv-metadata-quality"
version = "0.6.0-dev"
version = "0.5.0"
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
authors = ["Alan Orth <alan.orth@gmail.com>"]
license="GPL-3.0-only"

View File

@ -14,7 +14,7 @@ install_requires = [
setuptools.setup(
name="csv-metadata-quality",
version="0.6.0-dev",
version="0.5.0",
author="Alan Orth",
author_email="aorth@mjanja.ch",
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",