mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-29 00:58:19 +01:00
Compare commits
No commits in common. "95015febbd34a580ac61ef882bfabe75a7b93d6c" and "cc34db7ff80402e3913a1750e134be69a59420ca" have entirely different histories.
95015febbd
...
cc34db7ff8
@ -4,8 +4,6 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
## Unreleased
|
|
||||||
|
|
||||||
## [0.5.0] - 2021-12-08
|
## [0.5.0] - 2021-12-08
|
||||||
### Added
|
### Added
|
||||||
- Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy)
|
- Ability to check for, and fix, "mojibake" characters using [ftfy](https://github.com/LuminosoInsight/python-ftfy)
|
||||||
|
@ -104,7 +104,6 @@ def unnecessary_unicode(field):
|
|||||||
Replaces unnecessary Unicode characters like:
|
Replaces unnecessary Unicode characters like:
|
||||||
- Soft hyphen (U+00AD) → hyphen
|
- Soft hyphen (U+00AD) → hyphen
|
||||||
- No-break space (U+00A0) → space
|
- No-break space (U+00A0) → space
|
||||||
- Thin space (U+2009) → space
|
|
||||||
|
|
||||||
Return string with characters removed or replaced.
|
Return string with characters removed or replaced.
|
||||||
"""
|
"""
|
||||||
@ -149,16 +148,6 @@ def unnecessary_unicode(field):
|
|||||||
)
|
)
|
||||||
field = re.sub(pattern, "-", field)
|
field = re.sub(pattern, "-", field)
|
||||||
|
|
||||||
# Check for thin spaces (U+2009)
|
|
||||||
pattern = re.compile(r"\u2009")
|
|
||||||
match = re.findall(pattern, field)
|
|
||||||
|
|
||||||
if match:
|
|
||||||
print(
|
|
||||||
f"{Fore.GREEN}Replacing unnecessary Unicode (U+2009): {Fore.RESET}{field}"
|
|
||||||
)
|
|
||||||
field = re.sub(pattern, " ", field)
|
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
# SPDX-License-Identifier: GPL-3.0-only
|
# SPDX-License-Identifier: GPL-3.0-only
|
||||||
|
|
||||||
VERSION = "0.6.0-dev"
|
VERSION = "0.5.0"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "csv-metadata-quality"
|
name = "csv-metadata-quality"
|
||||||
version = "0.6.0-dev"
|
version = "0.5.0"
|
||||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
|
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem."
|
||||||
authors = ["Alan Orth <alan.orth@gmail.com>"]
|
authors = ["Alan Orth <alan.orth@gmail.com>"]
|
||||||
license="GPL-3.0-only"
|
license="GPL-3.0-only"
|
||||||
|
2
setup.py
2
setup.py
@ -14,7 +14,7 @@ install_requires = [
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name="csv-metadata-quality",
|
name="csv-metadata-quality",
|
||||||
version="0.6.0-dev",
|
version="0.5.0",
|
||||||
author="Alan Orth",
|
author="Alan Orth",
|
||||||
author_email="aorth@mjanja.ch",
|
author_email="aorth@mjanja.ch",
|
||||||
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
description="A simple, but opinionated CSV quality checking and fixing pipeline for CSVs in the DSpace ecosystem.",
|
||||||
|
Loading…
Reference in New Issue
Block a user