mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-11-17 11:37:03 +01:00
Use pycountry instead of iso-639 for languages
The latter is a fork that hasn't been updated since 2016 and the original still seems to be well maintained, with recent database updates as well as tests for Python 3.7. Also, pycountry supports ISO 3166-2 (administrative zones), which we could eventually use for sub regions.
This commit is contained in:
parent
a85b410ab9
commit
3c798fb504
2
Pipfile
2
Pipfile
@ -12,9 +12,9 @@ flake8 = "*"
|
||||
pandas = "*"
|
||||
python-stdnum = "*"
|
||||
xlrd = "*"
|
||||
iso-639 = "*"
|
||||
requests = "*"
|
||||
requests-cache = "*"
|
||||
pycountry = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.7"
|
||||
|
28
Pipfile.lock
generated
28
Pipfile.lock
generated
@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "c5c86b4dae011bcbf6705514d97aa55e0a59dd8b7927c38e34103d77eca13cc7"
|
||||
"sha256": "1c4130ed98fb55545244ba2926f2b4246dc86af7545cb892a45311426f934cae"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
@ -37,13 +37,6 @@
|
||||
],
|
||||
"version": "==2.8"
|
||||
},
|
||||
"iso-639": {
|
||||
"hashes": [
|
||||
"sha256:dc9cd4b880b898d774c47fe9775167404af8a85dd889d58f9008035109acce49"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.4.5"
|
||||
},
|
||||
"numpy": {
|
||||
"hashes": [
|
||||
"sha256:03e311b0a4c9f5755da7d52161280c6a78406c7be5c5cc7facfbcebb641efb7e",
|
||||
@ -85,6 +78,13 @@
|
||||
"index": "pypi",
|
||||
"version": "==0.25.0"
|
||||
},
|
||||
"pycountry": {
|
||||
"hashes": [
|
||||
"sha256:68e58bfd3bedeea49ba9d4b38f2bd5e042f9753628eba9a819fb03f551d89096"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==19.7.15"
|
||||
},
|
||||
"python-dateutil": {
|
||||
"hashes": [
|
||||
"sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb",
|
||||
@ -235,10 +235,10 @@
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:0c98a5d0be38ed775798ece1b9727178c4469d9c3b4ada66e8e6b7849f8732af",
|
||||
"sha256:9e1cbf8c12b1f1ce0bb5344b8d7ecf66a6f8a6e91bcb0c84593ed6d3ab5c4ab3"
|
||||
"sha256:a7ac867b97fdc07ee80a8058fe4435ccd274ecc3b0ed61d852d7d53055528cf9",
|
||||
"sha256:c491ca87294da7cc01902edbe30a5bc6c4c28172b5138ab4e4aa1b9d7bfaeafe"
|
||||
],
|
||||
"version": "==19.0"
|
||||
"version": "==19.1"
|
||||
},
|
||||
"parso": {
|
||||
"hashes": [
|
||||
@ -314,10 +314,10 @@
|
||||
},
|
||||
"pyparsing": {
|
||||
"hashes": [
|
||||
"sha256:43c5486cefefa536c9aab528881c992328f020eefe4f6d06332449c365218580",
|
||||
"sha256:d6c5ffe9d0305b9b977f7a642d36b9370954d1da7ada4c62393382cbadad4265"
|
||||
"sha256:6f98a7b9397e206d78cc01df10131398f1c8b8510a2f4d97d9abd82e1aacdd80",
|
||||
"sha256:d9338df12903bbf5d65a0e4e87c2161968b10d2e489652bb47001d82a9b028b4"
|
||||
],
|
||||
"version": "==2.4.1.1"
|
||||
"version": "==2.4.2"
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
|
@ -157,7 +157,7 @@ def language(field):
|
||||
Prints the value if it is invalid.
|
||||
"""
|
||||
|
||||
from iso639 import languages
|
||||
from pycountry import languages
|
||||
|
||||
# Skip fields with missing values
|
||||
if pd.isna(field):
|
||||
@ -169,19 +169,14 @@ def language(field):
|
||||
for value in field.split('||'):
|
||||
|
||||
# After splitting, check if language value is 2 or 3 characters so we
|
||||
# can check it against ISO 639-2 or ISO 639-3 accordingly. In iso-639
|
||||
# library ISO 639-2 is "part1" and ISO 639-3 is "part3".
|
||||
# can check it against ISO 639-2 or ISO 639-3 accordingly.
|
||||
if len(value) == 2:
|
||||
try:
|
||||
languages.get(part1=value)
|
||||
except KeyError:
|
||||
if not languages.get(alpha_2=value):
|
||||
print(f'Invalid ISO 639-2 language: {value}')
|
||||
|
||||
pass
|
||||
elif len(value) == 3:
|
||||
try:
|
||||
languages.get(part3=value)
|
||||
except KeyError:
|
||||
if not languages.get(alpha_3=value):
|
||||
print(f'Invalid ISO 639-3 language: {value}')
|
||||
|
||||
pass
|
||||
|
Loading…
Reference in New Issue
Block a user