mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-22 12:12:18 +01:00
Use pycountry instead of iso-639 for languages
The latter is a fork that hasn't been updated since 2016 and the original still seems to be well maintained, with recent database updates as well as tests for Python 3.7. Also, pycountry supports ISO 3166-2 (administrative zones), which we could eventually use for sub regions.
This commit is contained in:
parent
a85b410ab9
commit
3c798fb504
2
Pipfile
2
Pipfile
@ -12,9 +12,9 @@ flake8 = "*"
|
|||||||
pandas = "*"
|
pandas = "*"
|
||||||
python-stdnum = "*"
|
python-stdnum = "*"
|
||||||
xlrd = "*"
|
xlrd = "*"
|
||||||
iso-639 = "*"
|
|
||||||
requests = "*"
|
requests = "*"
|
||||||
requests-cache = "*"
|
requests-cache = "*"
|
||||||
|
pycountry = "*"
|
||||||
|
|
||||||
[requires]
|
[requires]
|
||||||
python_version = "3.7"
|
python_version = "3.7"
|
||||||
|
28
Pipfile.lock
generated
28
Pipfile.lock
generated
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"_meta": {
|
"_meta": {
|
||||||
"hash": {
|
"hash": {
|
||||||
"sha256": "c5c86b4dae011bcbf6705514d97aa55e0a59dd8b7927c38e34103d77eca13cc7"
|
"sha256": "1c4130ed98fb55545244ba2926f2b4246dc86af7545cb892a45311426f934cae"
|
||||||
},
|
},
|
||||||
"pipfile-spec": 6,
|
"pipfile-spec": 6,
|
||||||
"requires": {
|
"requires": {
|
||||||
@ -37,13 +37,6 @@
|
|||||||
],
|
],
|
||||||
"version": "==2.8"
|
"version": "==2.8"
|
||||||
},
|
},
|
||||||
"iso-639": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:dc9cd4b880b898d774c47fe9775167404af8a85dd889d58f9008035109acce49"
|
|
||||||
],
|
|
||||||
"index": "pypi",
|
|
||||||
"version": "==0.4.5"
|
|
||||||
},
|
|
||||||
"numpy": {
|
"numpy": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:03e311b0a4c9f5755da7d52161280c6a78406c7be5c5cc7facfbcebb641efb7e",
|
"sha256:03e311b0a4c9f5755da7d52161280c6a78406c7be5c5cc7facfbcebb641efb7e",
|
||||||
@ -85,6 +78,13 @@
|
|||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==0.25.0"
|
"version": "==0.25.0"
|
||||||
},
|
},
|
||||||
|
"pycountry": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:68e58bfd3bedeea49ba9d4b38f2bd5e042f9753628eba9a819fb03f551d89096"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==19.7.15"
|
||||||
|
},
|
||||||
"python-dateutil": {
|
"python-dateutil": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb",
|
"sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb",
|
||||||
@ -235,10 +235,10 @@
|
|||||||
},
|
},
|
||||||
"packaging": {
|
"packaging": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:0c98a5d0be38ed775798ece1b9727178c4469d9c3b4ada66e8e6b7849f8732af",
|
"sha256:a7ac867b97fdc07ee80a8058fe4435ccd274ecc3b0ed61d852d7d53055528cf9",
|
||||||
"sha256:9e1cbf8c12b1f1ce0bb5344b8d7ecf66a6f8a6e91bcb0c84593ed6d3ab5c4ab3"
|
"sha256:c491ca87294da7cc01902edbe30a5bc6c4c28172b5138ab4e4aa1b9d7bfaeafe"
|
||||||
],
|
],
|
||||||
"version": "==19.0"
|
"version": "==19.1"
|
||||||
},
|
},
|
||||||
"parso": {
|
"parso": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@ -314,10 +314,10 @@
|
|||||||
},
|
},
|
||||||
"pyparsing": {
|
"pyparsing": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:43c5486cefefa536c9aab528881c992328f020eefe4f6d06332449c365218580",
|
"sha256:6f98a7b9397e206d78cc01df10131398f1c8b8510a2f4d97d9abd82e1aacdd80",
|
||||||
"sha256:d6c5ffe9d0305b9b977f7a642d36b9370954d1da7ada4c62393382cbadad4265"
|
"sha256:d9338df12903bbf5d65a0e4e87c2161968b10d2e489652bb47001d82a9b028b4"
|
||||||
],
|
],
|
||||||
"version": "==2.4.1.1"
|
"version": "==2.4.2"
|
||||||
},
|
},
|
||||||
"pytest": {
|
"pytest": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
|
@ -157,7 +157,7 @@ def language(field):
|
|||||||
Prints the value if it is invalid.
|
Prints the value if it is invalid.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from iso639 import languages
|
from pycountry import languages
|
||||||
|
|
||||||
# Skip fields with missing values
|
# Skip fields with missing values
|
||||||
if pd.isna(field):
|
if pd.isna(field):
|
||||||
@ -169,19 +169,14 @@ def language(field):
|
|||||||
for value in field.split('||'):
|
for value in field.split('||'):
|
||||||
|
|
||||||
# After splitting, check if language value is 2 or 3 characters so we
|
# After splitting, check if language value is 2 or 3 characters so we
|
||||||
# can check it against ISO 639-2 or ISO 639-3 accordingly. In iso-639
|
# can check it against ISO 639-2 or ISO 639-3 accordingly.
|
||||||
# library ISO 639-2 is "part1" and ISO 639-3 is "part3".
|
|
||||||
if len(value) == 2:
|
if len(value) == 2:
|
||||||
try:
|
if not languages.get(alpha_2=value):
|
||||||
languages.get(part1=value)
|
|
||||||
except KeyError:
|
|
||||||
print(f'Invalid ISO 639-2 language: {value}')
|
print(f'Invalid ISO 639-2 language: {value}')
|
||||||
|
|
||||||
pass
|
pass
|
||||||
elif len(value) == 3:
|
elif len(value) == 3:
|
||||||
try:
|
if not languages.get(alpha_3=value):
|
||||||
languages.get(part3=value)
|
|
||||||
except KeyError:
|
|
||||||
print(f'Invalid ISO 639-3 language: {value}')
|
print(f'Invalid ISO 639-3 language: {value}')
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user