mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-28 08:38:18 +01:00
Compare commits
No commits in common. "5d0804a08f2c1577dab789fccb4263624f128bc1" and "1491e1edb0dc8f1ab0cc7d0d6a0954958a97bbf9" have entirely different histories.
5d0804a08f
...
1491e1edb0
@ -11,9 +11,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
- Fixed regex so we run the comma space fix on `dcterms.bibliographicCitation`
|
- Fixed regex so we run the comma space fix on `dcterms.bibliographicCitation`
|
||||||
fields
|
fields
|
||||||
|
|
||||||
### Changed
|
|
||||||
- Don't run newline fix on description fields
|
|
||||||
|
|
||||||
### Updated
|
### Updated
|
||||||
- Python dependencies, including Pandas 2.0.0 and [Arrow-backed dtypes](https://datapythonista.me/blog/pandas-20-and-the-arrow-revolution-part-i)
|
- Python dependencies, including Pandas 2.0.0 and [Arrow-backed dtypes](https://datapythonista.me/blog/pandas-20-and-the-arrow-revolution-part-i)
|
||||||
|
|
||||||
|
@ -90,9 +90,7 @@ def run(argv):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if args.unsafe_fixes:
|
if args.unsafe_fixes:
|
||||||
# Skip whitespace and newline fixes on abstracts and descriptions
|
match = re.match(r"^.*?abstract.*$", column)
|
||||||
# because there are too many with legitimate multi-line metadata.
|
|
||||||
match = re.match(r"^.*?(abstract|description).*$", column)
|
|
||||||
if match is None:
|
if match is None:
|
||||||
# Fix: whitespace
|
# Fix: whitespace
|
||||||
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
df[column] = df[column].apply(fix.whitespace, field_name=column)
|
||||||
|
47
poetry.lock
generated
47
poetry.lock
generated
@ -127,22 +127,22 @@ test = ["astroid", "pytest"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "attrs"
|
name = "attrs"
|
||||||
version = "23.1.0"
|
version = "22.2.0"
|
||||||
description = "Classes Without Boilerplate"
|
description = "Classes Without Boilerplate"
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.6"
|
||||||
files = [
|
files = [
|
||||||
{file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"},
|
{file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"},
|
||||||
{file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
|
{file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
|
cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"]
|
||||||
dev = ["attrs[docs,tests]", "pre-commit"]
|
dev = ["attrs[docs,tests]"]
|
||||||
docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
|
docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"]
|
||||||
tests = ["attrs[tests-no-zope]", "zope-interface"]
|
tests = ["attrs[tests-no-zope]", "zope.interface"]
|
||||||
tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
|
tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "babel"
|
name = "babel"
|
||||||
@ -860,14 +860,14 @@ et-xmlfile = "*"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "packaging"
|
name = "packaging"
|
||||||
version = "23.1"
|
version = "23.0"
|
||||||
description = "Core utilities for Python packages"
|
description = "Core utilities for Python packages"
|
||||||
category = "dev"
|
category = "dev"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
|
{file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"},
|
||||||
{file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
|
{file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1171,14 +1171,14 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pygments"
|
name = "pygments"
|
||||||
version = "2.15.1"
|
version = "2.14.0"
|
||||||
description = "Pygments is a syntax highlighting package written in Python."
|
description = "Pygments is a syntax highlighting package written in Python."
|
||||||
category = "dev"
|
category = "dev"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.6"
|
||||||
files = [
|
files = [
|
||||||
{file = "Pygments-2.15.1-py3-none-any.whl", hash = "sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1"},
|
{file = "Pygments-2.14.0-py3-none-any.whl", hash = "sha256:fa7bd7bd2771287c0de303af8bfdfc731f51bd2c6a47ab69d117138893b82717"},
|
||||||
{file = "Pygments-2.15.1.tar.gz", hash = "sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c"},
|
{file = "Pygments-2.14.0.tar.gz", hash = "sha256:b3ed06a9e8ac9a9aae5a6f5dbe78a8a58655d17b43b93c078f094ddc476ae297"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
@ -1186,17 +1186,18 @@ plugins = ["importlib-metadata"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pytest"
|
name = "pytest"
|
||||||
version = "7.3.1"
|
version = "7.2.2"
|
||||||
description = "pytest: simple powerful testing with Python"
|
description = "pytest: simple powerful testing with Python"
|
||||||
category = "dev"
|
category = "dev"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"},
|
{file = "pytest-7.2.2-py3-none-any.whl", hash = "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e"},
|
||||||
{file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"},
|
{file = "pytest-7.2.2.tar.gz", hash = "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
attrs = ">=19.2.0"
|
||||||
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||||
exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
|
exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
|
||||||
iniconfig = "*"
|
iniconfig = "*"
|
||||||
@ -1205,7 +1206,7 @@ pluggy = ">=0.12,<2.0"
|
|||||||
tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
|
tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
|
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pytest-clarity"
|
name = "pytest-clarity"
|
||||||
@ -1352,14 +1353,14 @@ yaml = ["pyyaml (>=5.4)"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rich"
|
name = "rich"
|
||||||
version = "13.3.4"
|
version = "13.3.3"
|
||||||
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
|
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
|
||||||
category = "dev"
|
category = "dev"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7.0"
|
python-versions = ">=3.7.0"
|
||||||
files = [
|
files = [
|
||||||
{file = "rich-13.3.4-py3-none-any.whl", hash = "sha256:22b74cae0278fd5086ff44144d3813be1cedc9115bdfabbfefd86400cb88b20a"},
|
{file = "rich-13.3.3-py3-none-any.whl", hash = "sha256:540c7d6d26a1178e8e8b37e9ba44573a3cd1464ff6348b99ee7061b95d1c6333"},
|
||||||
{file = "rich-13.3.4.tar.gz", hash = "sha256:b5d573e13605423ec80bdd0cd5f8541f7844a0e71a13f74cf454ccb2f490708b"},
|
{file = "rich-13.3.3.tar.gz", hash = "sha256:dc84400a9d842b3a9c5ff74addd8eb798d155f36c1c91303888e0a66850d2a15"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
@ -5,7 +5,7 @@ agate==1.7.1 ; python_version >= "3.9" and python_version < "4.0"
|
|||||||
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
appnope==0.1.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "darwin"
|
appnope==0.1.3 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "darwin"
|
||||||
asttokens==2.2.1 ; python_version >= "3.9" and python_version < "4.0"
|
asttokens==2.2.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
attrs==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
babel==2.12.1 ; python_version >= "3.9" and python_version < "4.0"
|
babel==2.12.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
backcall==0.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
backcall==0.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
black==23.3.0 ; python_version >= "3.9" and python_version < "4.0"
|
black==23.3.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
@ -40,7 +40,7 @@ mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
|||||||
numpy==1.24.2 ; python_version >= "3.9" and python_version < "4.0"
|
numpy==1.24.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
olefile==0.46 ; python_version >= "3.9" and python_version < "4.0"
|
olefile==0.46 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
openpyxl==3.1.2 ; python_version >= "3.9" and python_version < "4.0"
|
openpyxl==3.1.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
packaging==23.1 ; python_version >= "3.9" and python_version < "4.0"
|
packaging==23.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pandas==2.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
pandas==2.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
parsedatetime==2.6 ; python_version >= "3.9" and python_version < "4.0"
|
parsedatetime==2.6 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0"
|
parso==0.8.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
@ -57,9 +57,9 @@ pyarrow==11.0.0 ; python_version >= "3.9" and python_version < "4.0"
|
|||||||
pycodestyle==2.10.0 ; python_version >= "3.9" and python_version < "4.0"
|
pycodestyle==2.10.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.13.0 ; python_version >= "3.9" and python_version < "4.0"
|
pycountry @ git+https://github.com/alanorth/pycountry@iso-codes-4.13.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pyflakes==3.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
pyflakes==3.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pygments==2.15.1 ; python_version >= "3.9" and python_version < "4.0"
|
pygments==2.14.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytest-clarity==1.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
pytest-clarity==1.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
pytest==7.3.1 ; python_version >= "3.9" and python_version < "4.0"
|
pytest==7.2.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-slugify==8.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
python-slugify==8.0.1 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
python-stdnum==1.18 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
@ -67,7 +67,7 @@ pytimeparse==1.1.8 ; python_version >= "3.9" and python_version < "4.0"
|
|||||||
pytz==2023.3 ; python_version >= "3.9" and python_version < "4.0"
|
pytz==2023.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
|
requests-cache==0.9.8 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
requests==2.28.2 ; python_version >= "3.9" and python_version < "4"
|
requests==2.28.2 ; python_version >= "3.9" and python_version < "4"
|
||||||
rich==13.3.4 ; python_version >= "3.9" and python_version < "4.0"
|
rich==13.3.3 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
sqlalchemy==1.4.47 ; python_version >= "3.9" and python_version < "4.0"
|
sqlalchemy==1.4.47 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
stack-data==0.6.2 ; python_version >= "3.9" and python_version < "4.0"
|
stack-data==0.6.2 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
attrs==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
|
attrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
cattrs==22.2.0 ; python_version >= "3.9" and python_version < "4.0"
|
||||||
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
|
certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4"
|
||||||
charset-normalizer==3.1.0 ; python_version >= "3.9" and python_version < "4"
|
charset-normalizer==3.1.0 ; python_version >= "3.9" and python_version < "4"
|
||||||
|
Loading…
Reference in New Issue
Block a user