Switch to pandas 2.0.0rc1

Seems to work fine with the new PyArrow datatypes.
2025-09-12 06:37:02 +02:00 · 2023-03-22 12:16:56 +03:00
parent 20a2cce34b
commit d4aed378cf
2 changed files with 2 additions and 3 deletions
--- a/csv_metadata_quality/app.py
+++ b/csv_metadata_quality/app.py
@@ -73,8 +73,7 @@ def run(argv):
    # set the signal handler for SIGINT (^C)
    signal.signal(signal.SIGINT, signal_handler)

-    # Read all fields as strings so dates don't get converted from 1998 to 1998.0
-    df = pd.read_csv(args.input_file, dtype=str)
+    df = pd.read_csv(args.input_file, dtype_backend="pyarrow")

    # Check if the user requested to skip any fields
    if args.exclude_fields:
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ csv-metadata-quality = 'csv_metadata_quality.__main__:main'

 [tool.poetry.dependencies]
 python = "^3.9"
-pandas = "^1.5.2"
+pandas = "2.0.0rc1"
 python-stdnum = "^1.18"
 requests = "^2.28.2"
 requests-cache = "^0.9.8"