1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-11-10 00:05:51 +01:00

Compare commits

..

No commits in common. "92ff0ee51b44d666c200f4283a1e2d56f96bb7a8" and "7fca981b95e43734df14c43eca08ff37dc62c3f5" have entirely different histories.

5 changed files with 18 additions and 33 deletions

View File

@ -444,20 +444,6 @@ def normalize_dois(field):
if match: if match:
new_value = re.sub(pattern, "doi.org", new_value) new_value = re.sub(pattern, "doi.org", new_value)
# Convert www.doi.org to doi.org
pattern = re.compile(r"www\.doi\.org")
match = re.findall(pattern, new_value)
if match:
new_value = re.sub(pattern, "doi.org", new_value)
# Convert erroneous %2f to /
pattern = re.compile("%2f")
match = re.findall(pattern, new_value)
if match:
new_value = re.sub(pattern, "/", new_value)
# Replace values like doi: 10.11648/j.jps.20140201.14 # Replace values like doi: 10.11648/j.jps.20140201.14
pattern = re.compile(r"^doi: 10\.") pattern = re.compile(r"^doi: 10\.")
match = re.findall(pattern, new_value) match = re.findall(pattern, new_value)

View File

@ -40,4 +40,3 @@ Subregion field shouldnt trigger region checks,2022-12-07,,,,,Kenya,,,,,,East
DOI with HTTP and dx.doi.org,2024-04-23,,,,,,,,,,http://dx.doi.org/10.1016/j.envc.2023.100794,, DOI with HTTP and dx.doi.org,2024-04-23,,,,,,,,,,http://dx.doi.org/10.1016/j.envc.2023.100794,,
DOI with colon,2024-04-23,,,,,,,,,,doi: 10.11648/j.jps.20140201.14,, DOI with colon,2024-04-23,,,,,,,,,,doi: 10.11648/j.jps.20140201.14,,
Upper case bare DOI,2024-04-23,,,,,,,,,,10.19103/AS.2018.0043.16,, Upper case bare DOI,2024-04-23,,,,,,,,,,10.19103/AS.2018.0043.16,,
DOI with %2f,2024-06-25,,,,,,,,,,https://doi.org/10.1016%2fj.envc.2023.100794,,

1 dc.title dcterms.issued dc.identifier.issn dc.identifier.isbn dcterms.language dcterms.subject cg.coverage.country filename dcterms.license dcterms.type dcterms.bibliographicCitation cg.identifier.doi cg.coverage.region cg.coverage.subregion
40 DOI with colon 2024-04-23 doi: 10.11648/j.jps.20140201.14
41 Upper case bare DOI 2024-04-23 10.19103/AS.2018.0043.16
42

View File

@ -8,14 +8,14 @@ authors = [
license= { file = "LICENSE.txt" } license= { file = "LICENSE.txt" }
dependencies = [ dependencies = [
"pandas[feather,performance]~=2.2", "pandas[feather,performance]~=2.2",
"python-stdnum~=1.20", "python-stdnum~=1.19",
"requests~=2.32", "requests~=2.31",
"requests-cache~=1.2.1", "requests-cache~=1.2",
"colorama~=0.4", "colorama~=0.4",
"ftfy~=6.2.0", "ftfy~=6.1",
"country-converter~=1.2", "country-converter~=1.2",
"pycountry~=24.6.1", "pycountry~=23.12",
"py3langid~=0.3", "py3langid~=0.2",
] ]
readme = "README.md" readme = "README.md"
requires-python = ">= 3.9" requires-python = ">= 3.9"

View File

@ -70,7 +70,7 @@ leather==0.4.0
# via agate # via agate
libcst==1.4.0 libcst==1.4.0
# via fixit # via fixit
llvmlite==0.43.0 llvmlite==0.42.0
# via numba # via numba
markdown-it-py==3.0.0 markdown-it-py==3.0.0
# via rich # via rich
@ -80,11 +80,11 @@ mdurl==0.1.2
# via markdown-it-py # via markdown-it-py
moreorless==0.4.0 moreorless==0.4.0
# via fixit # via fixit
numba==0.60.0 numba==0.59.1
# via pandas # via pandas
numexpr==2.10.0 numexpr==2.10.0
# via pandas # via pandas
numpy==2.0.0 numpy==1.26.4
# via bottleneck # via bottleneck
# via numba # via numba
# via numexpr # via numexpr
@ -122,11 +122,11 @@ ptyprocess==0.7.0
# via pexpect # via pexpect
pure-eval==0.2.2 pure-eval==0.2.2
# via stack-data # via stack-data
py3langid==0.3.0 py3langid==0.2.2
# via csv-metadata-quality # via csv-metadata-quality
pyarrow==16.1.0 pyarrow==16.1.0
# via pandas # via pandas
pycountry==24.6.1 pycountry==23.12.11
# via csv-metadata-quality # via csv-metadata-quality
pygments==2.18.0 pygments==2.18.0
# via ipython # via ipython
@ -149,7 +149,7 @@ pyyaml==6.0.1
requests==2.32.2 requests==2.32.2
# via csv-metadata-quality # via csv-metadata-quality
# via requests-cache # via requests-cache
requests-cache==1.2.1 requests-cache==1.2.0
# via csv-metadata-quality # via csv-metadata-quality
rich==13.7.1 rich==13.7.1
# via pytest-clarity # via pytest-clarity

View File

@ -28,13 +28,13 @@ ftfy==6.2.0
# via csv-metadata-quality # via csv-metadata-quality
idna==3.7 idna==3.7
# via requests # via requests
llvmlite==0.43.0 llvmlite==0.42.0
# via numba # via numba
numba==0.60.0 numba==0.59.1
# via pandas # via pandas
numexpr==2.10.0 numexpr==2.10.0
# via pandas # via pandas
numpy==2.0.0 numpy==1.26.4
# via bottleneck # via bottleneck
# via numba # via numba
# via numexpr # via numexpr
@ -46,11 +46,11 @@ pandas==2.2.2
# via csv-metadata-quality # via csv-metadata-quality
platformdirs==4.2.2 platformdirs==4.2.2
# via requests-cache # via requests-cache
py3langid==0.3.0 py3langid==0.2.2
# via csv-metadata-quality # via csv-metadata-quality
pyarrow==16.1.0 pyarrow==16.1.0
# via pandas # via pandas
pycountry==24.6.1 pycountry==23.12.11
# via csv-metadata-quality # via csv-metadata-quality
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via pandas # via pandas
@ -61,7 +61,7 @@ pytz==2024.1
requests==2.32.2 requests==2.32.2
# via csv-metadata-quality # via csv-metadata-quality
# via requests-cache # via requests-cache
requests-cache==1.2.1 requests-cache==1.2.0
# via csv-metadata-quality # via csv-metadata-quality
six==1.16.0 six==1.16.0
# via python-dateutil # via python-dateutil