mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-10 00:05:51 +01:00
Compare commits
No commits in common. "92ff0ee51b44d666c200f4283a1e2d56f96bb7a8" and "7fca981b95e43734df14c43eca08ff37dc62c3f5" have entirely different histories.
92ff0ee51b
...
7fca981b95
@ -444,20 +444,6 @@ def normalize_dois(field):
|
|||||||
if match:
|
if match:
|
||||||
new_value = re.sub(pattern, "doi.org", new_value)
|
new_value = re.sub(pattern, "doi.org", new_value)
|
||||||
|
|
||||||
# Convert www.doi.org to doi.org
|
|
||||||
pattern = re.compile(r"www\.doi\.org")
|
|
||||||
match = re.findall(pattern, new_value)
|
|
||||||
|
|
||||||
if match:
|
|
||||||
new_value = re.sub(pattern, "doi.org", new_value)
|
|
||||||
|
|
||||||
# Convert erroneous %2f to /
|
|
||||||
pattern = re.compile("%2f")
|
|
||||||
match = re.findall(pattern, new_value)
|
|
||||||
|
|
||||||
if match:
|
|
||||||
new_value = re.sub(pattern, "/", new_value)
|
|
||||||
|
|
||||||
# Replace values like doi: 10.11648/j.jps.20140201.14
|
# Replace values like doi: 10.11648/j.jps.20140201.14
|
||||||
pattern = re.compile(r"^doi: 10\.")
|
pattern = re.compile(r"^doi: 10\.")
|
||||||
match = re.findall(pattern, new_value)
|
match = re.findall(pattern, new_value)
|
||||||
|
@ -40,4 +40,3 @@ Subregion field shouldn’t trigger region checks,2022-12-07,,,,,Kenya,,,,,,East
|
|||||||
DOI with HTTP and dx.doi.org,2024-04-23,,,,,,,,,,http://dx.doi.org/10.1016/j.envc.2023.100794,,
|
DOI with HTTP and dx.doi.org,2024-04-23,,,,,,,,,,http://dx.doi.org/10.1016/j.envc.2023.100794,,
|
||||||
DOI with colon,2024-04-23,,,,,,,,,,doi: 10.11648/j.jps.20140201.14,,
|
DOI with colon,2024-04-23,,,,,,,,,,doi: 10.11648/j.jps.20140201.14,,
|
||||||
Upper case bare DOI,2024-04-23,,,,,,,,,,10.19103/AS.2018.0043.16,,
|
Upper case bare DOI,2024-04-23,,,,,,,,,,10.19103/AS.2018.0043.16,,
|
||||||
DOI with %2f,2024-06-25,,,,,,,,,,https://doi.org/10.1016%2fj.envc.2023.100794,,
|
|
||||||
|
|
@ -8,14 +8,14 @@ authors = [
|
|||||||
license= { file = "LICENSE.txt" }
|
license= { file = "LICENSE.txt" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"pandas[feather,performance]~=2.2",
|
"pandas[feather,performance]~=2.2",
|
||||||
"python-stdnum~=1.20",
|
"python-stdnum~=1.19",
|
||||||
"requests~=2.32",
|
"requests~=2.31",
|
||||||
"requests-cache~=1.2.1",
|
"requests-cache~=1.2",
|
||||||
"colorama~=0.4",
|
"colorama~=0.4",
|
||||||
"ftfy~=6.2.0",
|
"ftfy~=6.1",
|
||||||
"country-converter~=1.2",
|
"country-converter~=1.2",
|
||||||
"pycountry~=24.6.1",
|
"pycountry~=23.12",
|
||||||
"py3langid~=0.3",
|
"py3langid~=0.2",
|
||||||
]
|
]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">= 3.9"
|
requires-python = ">= 3.9"
|
||||||
|
@ -70,7 +70,7 @@ leather==0.4.0
|
|||||||
# via agate
|
# via agate
|
||||||
libcst==1.4.0
|
libcst==1.4.0
|
||||||
# via fixit
|
# via fixit
|
||||||
llvmlite==0.43.0
|
llvmlite==0.42.0
|
||||||
# via numba
|
# via numba
|
||||||
markdown-it-py==3.0.0
|
markdown-it-py==3.0.0
|
||||||
# via rich
|
# via rich
|
||||||
@ -80,11 +80,11 @@ mdurl==0.1.2
|
|||||||
# via markdown-it-py
|
# via markdown-it-py
|
||||||
moreorless==0.4.0
|
moreorless==0.4.0
|
||||||
# via fixit
|
# via fixit
|
||||||
numba==0.60.0
|
numba==0.59.1
|
||||||
# via pandas
|
# via pandas
|
||||||
numexpr==2.10.0
|
numexpr==2.10.0
|
||||||
# via pandas
|
# via pandas
|
||||||
numpy==2.0.0
|
numpy==1.26.4
|
||||||
# via bottleneck
|
# via bottleneck
|
||||||
# via numba
|
# via numba
|
||||||
# via numexpr
|
# via numexpr
|
||||||
@ -122,11 +122,11 @@ ptyprocess==0.7.0
|
|||||||
# via pexpect
|
# via pexpect
|
||||||
pure-eval==0.2.2
|
pure-eval==0.2.2
|
||||||
# via stack-data
|
# via stack-data
|
||||||
py3langid==0.3.0
|
py3langid==0.2.2
|
||||||
# via csv-metadata-quality
|
# via csv-metadata-quality
|
||||||
pyarrow==16.1.0
|
pyarrow==16.1.0
|
||||||
# via pandas
|
# via pandas
|
||||||
pycountry==24.6.1
|
pycountry==23.12.11
|
||||||
# via csv-metadata-quality
|
# via csv-metadata-quality
|
||||||
pygments==2.18.0
|
pygments==2.18.0
|
||||||
# via ipython
|
# via ipython
|
||||||
@ -149,7 +149,7 @@ pyyaml==6.0.1
|
|||||||
requests==2.32.2
|
requests==2.32.2
|
||||||
# via csv-metadata-quality
|
# via csv-metadata-quality
|
||||||
# via requests-cache
|
# via requests-cache
|
||||||
requests-cache==1.2.1
|
requests-cache==1.2.0
|
||||||
# via csv-metadata-quality
|
# via csv-metadata-quality
|
||||||
rich==13.7.1
|
rich==13.7.1
|
||||||
# via pytest-clarity
|
# via pytest-clarity
|
||||||
|
@ -28,13 +28,13 @@ ftfy==6.2.0
|
|||||||
# via csv-metadata-quality
|
# via csv-metadata-quality
|
||||||
idna==3.7
|
idna==3.7
|
||||||
# via requests
|
# via requests
|
||||||
llvmlite==0.43.0
|
llvmlite==0.42.0
|
||||||
# via numba
|
# via numba
|
||||||
numba==0.60.0
|
numba==0.59.1
|
||||||
# via pandas
|
# via pandas
|
||||||
numexpr==2.10.0
|
numexpr==2.10.0
|
||||||
# via pandas
|
# via pandas
|
||||||
numpy==2.0.0
|
numpy==1.26.4
|
||||||
# via bottleneck
|
# via bottleneck
|
||||||
# via numba
|
# via numba
|
||||||
# via numexpr
|
# via numexpr
|
||||||
@ -46,11 +46,11 @@ pandas==2.2.2
|
|||||||
# via csv-metadata-quality
|
# via csv-metadata-quality
|
||||||
platformdirs==4.2.2
|
platformdirs==4.2.2
|
||||||
# via requests-cache
|
# via requests-cache
|
||||||
py3langid==0.3.0
|
py3langid==0.2.2
|
||||||
# via csv-metadata-quality
|
# via csv-metadata-quality
|
||||||
pyarrow==16.1.0
|
pyarrow==16.1.0
|
||||||
# via pandas
|
# via pandas
|
||||||
pycountry==24.6.1
|
pycountry==23.12.11
|
||||||
# via csv-metadata-quality
|
# via csv-metadata-quality
|
||||||
python-dateutil==2.9.0.post0
|
python-dateutil==2.9.0.post0
|
||||||
# via pandas
|
# via pandas
|
||||||
@ -61,7 +61,7 @@ pytz==2024.1
|
|||||||
requests==2.32.2
|
requests==2.32.2
|
||||||
# via csv-metadata-quality
|
# via csv-metadata-quality
|
||||||
# via requests-cache
|
# via requests-cache
|
||||||
requests-cache==1.2.1
|
requests-cache==1.2.0
|
||||||
# via csv-metadata-quality
|
# via csv-metadata-quality
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
# via python-dateutil
|
# via python-dateutil
|
||||||
|
Loading…
Reference in New Issue
Block a user