1
0
mirror of https://github.com/ilri/csv-metadata-quality.git synced 2024-12-22 12:12:18 +01:00

Use pycountry instead of iso-639 for languages

The latter is a fork that hasn't been updated since 2016 and the
original still seems to be well maintained, with recent database
updates as well as tests for Python 3.7.

Also, pycountry supports ISO 3166-2 (administrative zones), which
we could eventually use for sub regions.
This commit is contained in:
Alan Orth 2019-07-30 16:39:26 +03:00
parent a85b410ab9
commit 3c798fb504
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
3 changed files with 19 additions and 24 deletions

View File

@ -12,9 +12,9 @@ flake8 = "*"
pandas = "*" pandas = "*"
python-stdnum = "*" python-stdnum = "*"
xlrd = "*" xlrd = "*"
iso-639 = "*"
requests = "*" requests = "*"
requests-cache = "*" requests-cache = "*"
pycountry = "*"
[requires] [requires]
python_version = "3.7" python_version = "3.7"

28
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "c5c86b4dae011bcbf6705514d97aa55e0a59dd8b7927c38e34103d77eca13cc7" "sha256": "1c4130ed98fb55545244ba2926f2b4246dc86af7545cb892a45311426f934cae"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": { "requires": {
@ -37,13 +37,6 @@
], ],
"version": "==2.8" "version": "==2.8"
}, },
"iso-639": {
"hashes": [
"sha256:dc9cd4b880b898d774c47fe9775167404af8a85dd889d58f9008035109acce49"
],
"index": "pypi",
"version": "==0.4.5"
},
"numpy": { "numpy": {
"hashes": [ "hashes": [
"sha256:03e311b0a4c9f5755da7d52161280c6a78406c7be5c5cc7facfbcebb641efb7e", "sha256:03e311b0a4c9f5755da7d52161280c6a78406c7be5c5cc7facfbcebb641efb7e",
@ -85,6 +78,13 @@
"index": "pypi", "index": "pypi",
"version": "==0.25.0" "version": "==0.25.0"
}, },
"pycountry": {
"hashes": [
"sha256:68e58bfd3bedeea49ba9d4b38f2bd5e042f9753628eba9a819fb03f551d89096"
],
"index": "pypi",
"version": "==19.7.15"
},
"python-dateutil": { "python-dateutil": {
"hashes": [ "hashes": [
"sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb", "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb",
@ -235,10 +235,10 @@
}, },
"packaging": { "packaging": {
"hashes": [ "hashes": [
"sha256:0c98a5d0be38ed775798ece1b9727178c4469d9c3b4ada66e8e6b7849f8732af", "sha256:a7ac867b97fdc07ee80a8058fe4435ccd274ecc3b0ed61d852d7d53055528cf9",
"sha256:9e1cbf8c12b1f1ce0bb5344b8d7ecf66a6f8a6e91bcb0c84593ed6d3ab5c4ab3" "sha256:c491ca87294da7cc01902edbe30a5bc6c4c28172b5138ab4e4aa1b9d7bfaeafe"
], ],
"version": "==19.0" "version": "==19.1"
}, },
"parso": { "parso": {
"hashes": [ "hashes": [
@ -314,10 +314,10 @@
}, },
"pyparsing": { "pyparsing": {
"hashes": [ "hashes": [
"sha256:43c5486cefefa536c9aab528881c992328f020eefe4f6d06332449c365218580", "sha256:6f98a7b9397e206d78cc01df10131398f1c8b8510a2f4d97d9abd82e1aacdd80",
"sha256:d6c5ffe9d0305b9b977f7a642d36b9370954d1da7ada4c62393382cbadad4265" "sha256:d9338df12903bbf5d65a0e4e87c2161968b10d2e489652bb47001d82a9b028b4"
], ],
"version": "==2.4.1.1" "version": "==2.4.2"
}, },
"pytest": { "pytest": {
"hashes": [ "hashes": [

View File

@ -157,7 +157,7 @@ def language(field):
Prints the value if it is invalid. Prints the value if it is invalid.
""" """
from iso639 import languages from pycountry import languages
# Skip fields with missing values # Skip fields with missing values
if pd.isna(field): if pd.isna(field):
@ -169,19 +169,14 @@ def language(field):
for value in field.split('||'): for value in field.split('||'):
# After splitting, check if language value is 2 or 3 characters so we # After splitting, check if language value is 2 or 3 characters so we
# can check it against ISO 639-2 or ISO 639-3 accordingly. In iso-639 # can check it against ISO 639-2 or ISO 639-3 accordingly.
# library ISO 639-2 is "part1" and ISO 639-3 is "part3".
if len(value) == 2: if len(value) == 2:
try: if not languages.get(alpha_2=value):
languages.get(part1=value)
except KeyError:
print(f'Invalid ISO 639-2 language: {value}') print(f'Invalid ISO 639-2 language: {value}')
pass pass
elif len(value) == 3: elif len(value) == 3:
try: if not languages.get(alpha_3=value):
languages.get(part3=value)
except KeyError:
print(f'Invalid ISO 639-3 language: {value}') print(f'Invalid ISO 639-3 language: {value}')
pass pass