mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-22 05:45:02 +01:00
Compare commits
4 Commits
7fca981b95
...
92ff0ee51b
Author | SHA1 | Date | |
---|---|---|---|
92ff0ee51b | |||
ae38a826ec | |||
c1f630c298 | |||
82b056f0ea |
@ -444,6 +444,20 @@ def normalize_dois(field):
|
||||
if match:
|
||||
new_value = re.sub(pattern, "doi.org", new_value)
|
||||
|
||||
# Convert www.doi.org to doi.org
|
||||
pattern = re.compile(r"www\.doi\.org")
|
||||
match = re.findall(pattern, new_value)
|
||||
|
||||
if match:
|
||||
new_value = re.sub(pattern, "doi.org", new_value)
|
||||
|
||||
# Convert erroneous %2f to /
|
||||
pattern = re.compile("%2f")
|
||||
match = re.findall(pattern, new_value)
|
||||
|
||||
if match:
|
||||
new_value = re.sub(pattern, "/", new_value)
|
||||
|
||||
# Replace values like doi: 10.11648/j.jps.20140201.14
|
||||
pattern = re.compile(r"^doi: 10\.")
|
||||
match = re.findall(pattern, new_value)
|
||||
|
@ -40,3 +40,4 @@ Subregion field shouldn’t trigger region checks,2022-12-07,,,,,Kenya,,,,,,East
|
||||
DOI with HTTP and dx.doi.org,2024-04-23,,,,,,,,,,http://dx.doi.org/10.1016/j.envc.2023.100794,,
|
||||
DOI with colon,2024-04-23,,,,,,,,,,doi: 10.11648/j.jps.20140201.14,,
|
||||
Upper case bare DOI,2024-04-23,,,,,,,,,,10.19103/AS.2018.0043.16,,
|
||||
DOI with %2f,2024-06-25,,,,,,,,,,https://doi.org/10.1016%2fj.envc.2023.100794,,
|
||||
|
|
@ -8,14 +8,14 @@ authors = [
|
||||
license= { file = "LICENSE.txt" }
|
||||
dependencies = [
|
||||
"pandas[feather,performance]~=2.2",
|
||||
"python-stdnum~=1.19",
|
||||
"requests~=2.31",
|
||||
"requests-cache~=1.2",
|
||||
"python-stdnum~=1.20",
|
||||
"requests~=2.32",
|
||||
"requests-cache~=1.2.1",
|
||||
"colorama~=0.4",
|
||||
"ftfy~=6.1",
|
||||
"ftfy~=6.2.0",
|
||||
"country-converter~=1.2",
|
||||
"pycountry~=23.12",
|
||||
"py3langid~=0.2",
|
||||
"pycountry~=24.6.1",
|
||||
"py3langid~=0.3",
|
||||
]
|
||||
readme = "README.md"
|
||||
requires-python = ">= 3.9"
|
||||
|
@ -70,7 +70,7 @@ leather==0.4.0
|
||||
# via agate
|
||||
libcst==1.4.0
|
||||
# via fixit
|
||||
llvmlite==0.42.0
|
||||
llvmlite==0.43.0
|
||||
# via numba
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
@ -80,11 +80,11 @@ mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
moreorless==0.4.0
|
||||
# via fixit
|
||||
numba==0.59.1
|
||||
numba==0.60.0
|
||||
# via pandas
|
||||
numexpr==2.10.0
|
||||
# via pandas
|
||||
numpy==1.26.4
|
||||
numpy==2.0.0
|
||||
# via bottleneck
|
||||
# via numba
|
||||
# via numexpr
|
||||
@ -122,11 +122,11 @@ ptyprocess==0.7.0
|
||||
# via pexpect
|
||||
pure-eval==0.2.2
|
||||
# via stack-data
|
||||
py3langid==0.2.2
|
||||
py3langid==0.3.0
|
||||
# via csv-metadata-quality
|
||||
pyarrow==16.1.0
|
||||
# via pandas
|
||||
pycountry==23.12.11
|
||||
pycountry==24.6.1
|
||||
# via csv-metadata-quality
|
||||
pygments==2.18.0
|
||||
# via ipython
|
||||
@ -149,7 +149,7 @@ pyyaml==6.0.1
|
||||
requests==2.32.2
|
||||
# via csv-metadata-quality
|
||||
# via requests-cache
|
||||
requests-cache==1.2.0
|
||||
requests-cache==1.2.1
|
||||
# via csv-metadata-quality
|
||||
rich==13.7.1
|
||||
# via pytest-clarity
|
||||
|
@ -28,13 +28,13 @@ ftfy==6.2.0
|
||||
# via csv-metadata-quality
|
||||
idna==3.7
|
||||
# via requests
|
||||
llvmlite==0.42.0
|
||||
llvmlite==0.43.0
|
||||
# via numba
|
||||
numba==0.59.1
|
||||
numba==0.60.0
|
||||
# via pandas
|
||||
numexpr==2.10.0
|
||||
# via pandas
|
||||
numpy==1.26.4
|
||||
numpy==2.0.0
|
||||
# via bottleneck
|
||||
# via numba
|
||||
# via numexpr
|
||||
@ -46,11 +46,11 @@ pandas==2.2.2
|
||||
# via csv-metadata-quality
|
||||
platformdirs==4.2.2
|
||||
# via requests-cache
|
||||
py3langid==0.2.2
|
||||
py3langid==0.3.0
|
||||
# via csv-metadata-quality
|
||||
pyarrow==16.1.0
|
||||
# via pandas
|
||||
pycountry==23.12.11
|
||||
pycountry==24.6.1
|
||||
# via csv-metadata-quality
|
||||
python-dateutil==2.9.0.post0
|
||||
# via pandas
|
||||
@ -61,7 +61,7 @@ pytz==2024.1
|
||||
requests==2.32.2
|
||||
# via csv-metadata-quality
|
||||
# via requests-cache
|
||||
requests-cache==1.2.0
|
||||
requests-cache==1.2.1
|
||||
# via csv-metadata-quality
|
||||
six==1.16.0
|
||||
# via python-dateutil
|
||||
|
Loading…
Reference in New Issue
Block a user