1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-26 07:38:18 +01:00

Compare commits

..

No commits in common. "92ff0ee51b44d666c200f4283a1e2d56f96bb7a8" and "7fca981b95e43734df14c43eca08ff37dc62c3f5" have entirely different histories.

5 changed files with 18 additions and 33 deletions

View File

@ -444,20 +444,6 @@ def normalize_dois(field):
if match:
new_value = re.sub(pattern, "doi.org", new_value)
# Convert www.doi.org to doi.org
pattern = re.compile(r"www\.doi\.org")
match = re.findall(pattern, new_value)
if match:
new_value = re.sub(pattern, "doi.org", new_value)
# Convert erroneous %2f to /
pattern = re.compile("%2f")
match = re.findall(pattern, new_value)
if match:
new_value = re.sub(pattern, "/", new_value)
# Replace values like doi: 10.11648/j.jps.20140201.14
pattern = re.compile(r"^doi: 10\.")
match = re.findall(pattern, new_value)

View File

@ -40,4 +40,3 @@ Subregion field shouldnt trigger region checks,2022-12-07,,,,,Kenya,,,,,,East
DOI with HTTP and dx.doi.org,2024-04-23,,,,,,,,,,http://dx.doi.org/10.1016/j.envc.2023.100794,,
DOI with colon,2024-04-23,,,,,,,,,,doi: 10.11648/j.jps.20140201.14,,
Upper case bare DOI,2024-04-23,,,,,,,,,,10.19103/AS.2018.0043.16,,
DOI with %2f,2024-06-25,,,,,,,,,,https://doi.org/10.1016%2fj.envc.2023.100794,,

1 dc.title dcterms.issued dc.identifier.issn dc.identifier.isbn dcterms.language dcterms.subject cg.coverage.country filename dcterms.license dcterms.type dcterms.bibliographicCitation cg.identifier.doi cg.coverage.region cg.coverage.subregion
40 DOI with colon 2024-04-23 doi: 10.11648/j.jps.20140201.14
41 Upper case bare DOI 2024-04-23 10.19103/AS.2018.0043.16
42

View File

@ -8,14 +8,14 @@ authors = [
license= { file = "LICENSE.txt" }
dependencies = [
"pandas[feather,performance]~=2.2",
"python-stdnum~=1.20",
"requests~=2.32",
"requests-cache~=1.2.1",
"python-stdnum~=1.19",
"requests~=2.31",
"requests-cache~=1.2",
"colorama~=0.4",
"ftfy~=6.2.0",
"ftfy~=6.1",
"country-converter~=1.2",
"pycountry~=24.6.1",
"py3langid~=0.3",
"pycountry~=23.12",
"py3langid~=0.2",
]
readme = "README.md"
requires-python = ">= 3.9"

View File

@ -70,7 +70,7 @@ leather==0.4.0
# via agate
libcst==1.4.0
# via fixit
llvmlite==0.43.0
llvmlite==0.42.0
# via numba
markdown-it-py==3.0.0
# via rich
@ -80,11 +80,11 @@ mdurl==0.1.2
# via markdown-it-py
moreorless==0.4.0
# via fixit
numba==0.60.0
numba==0.59.1
# via pandas
numexpr==2.10.0
# via pandas
numpy==2.0.0
numpy==1.26.4
# via bottleneck
# via numba
# via numexpr
@ -122,11 +122,11 @@ ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.2
# via stack-data
py3langid==0.3.0
py3langid==0.2.2
# via csv-metadata-quality
pyarrow==16.1.0
# via pandas
pycountry==24.6.1
pycountry==23.12.11
# via csv-metadata-quality
pygments==2.18.0
# via ipython
@ -149,7 +149,7 @@ pyyaml==6.0.1
requests==2.32.2
# via csv-metadata-quality
# via requests-cache
requests-cache==1.2.1
requests-cache==1.2.0
# via csv-metadata-quality
rich==13.7.1
# via pytest-clarity

View File

@ -28,13 +28,13 @@ ftfy==6.2.0
# via csv-metadata-quality
idna==3.7
# via requests
llvmlite==0.43.0
llvmlite==0.42.0
# via numba
numba==0.60.0
numba==0.59.1
# via pandas
numexpr==2.10.0
# via pandas
numpy==2.0.0
numpy==1.26.4
# via bottleneck
# via numba
# via numexpr
@ -46,11 +46,11 @@ pandas==2.2.2
# via csv-metadata-quality
platformdirs==4.2.2
# via requests-cache
py3langid==0.3.0
py3langid==0.2.2
# via csv-metadata-quality
pyarrow==16.1.0
# via pandas
pycountry==24.6.1
pycountry==23.12.11
# via csv-metadata-quality
python-dateutil==2.9.0.post0
# via pandas
@ -61,7 +61,7 @@ pytz==2024.1
requests==2.32.2
# via csv-metadata-quality
# via requests-cache
requests-cache==1.2.1
requests-cache==1.2.0
# via csv-metadata-quality
six==1.16.0
# via python-dateutil