mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2024-12-21 11:42:20 +01:00
Use py3langid instead of langid
Faster and more modern code for Python 3 as a drop-in replacement. See: https://adrien.barbaresi.eu/blog/language-detection-langid-py-faster.html
This commit is contained in:
parent
fb341dd9fa
commit
a21ffb0fa8
@ -15,6 +15,7 @@ fields
|
||||
### Changed
|
||||
- Don't run newline fix on description fields
|
||||
- Install requests-cache in main run() function instead of check.agrovoc() function so we only incur the overhead once
|
||||
- Use py3langid instead of langid, see: [How to make language detection with langid.py faster](https://adrien.barbaresi.eu/blog/language-detection-langid-py-faster.html)
|
||||
|
||||
### Updated
|
||||
- Python dependencies, including Pandas 2.0.0 and [Arrow-backed dtypes](https://datapythonista.me/blog/pandas-20-and-the-arrow-revolution-part-i)
|
||||
|
@ -2,8 +2,8 @@
|
||||
|
||||
import re
|
||||
|
||||
import langid
|
||||
import pandas as pd
|
||||
import py3langid as langid
|
||||
from colorama import Fore
|
||||
from pycountry import languages
|
||||
|
||||
|
@ -16,11 +16,11 @@ pandas = {version = "^2.0.2", extras = ["feather", "performance"]}
|
||||
python-stdnum = "^1.18"
|
||||
requests = "^2.28.2"
|
||||
requests-cache = "^1.0.0"
|
||||
langid = "^1.1.6"
|
||||
colorama = "^0.4.6"
|
||||
ftfy = "^6.1.1"
|
||||
country-converter = "~1.1.0"
|
||||
pycountry = "^23.12.7"
|
||||
py3langid = "^0.2.2"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.2.1"
|
||||
|
Loading…
Reference in New Issue
Block a user