From d4aed378cf9715efb69c9693528fa6ac0ee322a8 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 22 Mar 2023 12:16:56 +0300 Subject: [PATCH] Switch to pandas 2.0.0rc1 Seems to work fine with the new PyArrow datatypes. --- csv_metadata_quality/app.py | 3 +-- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/csv_metadata_quality/app.py b/csv_metadata_quality/app.py index f5c7ceb..2e38308 100644 --- a/csv_metadata_quality/app.py +++ b/csv_metadata_quality/app.py @@ -73,8 +73,7 @@ def run(argv): # set the signal handler for SIGINT (^C) signal.signal(signal.SIGINT, signal_handler) - # Read all fields as strings so dates don't get converted from 1998 to 1998.0 - df = pd.read_csv(args.input_file, dtype=str) + df = pd.read_csv(args.input_file, dtype_backend="pyarrow") # Check if the user requested to skip any fields if args.exclude_fields: diff --git a/pyproject.toml b/pyproject.toml index b31742a..a791ad3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ csv-metadata-quality = 'csv_metadata_quality.__main__:main' [tool.poetry.dependencies] python = "^3.9" -pandas = "^1.5.2" +pandas = "2.0.0rc1" python-stdnum = "^1.18" requests = "^2.28.2" requests-cache = "^0.9.8"