mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-05-09 14:46:00 +02:00
csv_metadata_quality/app.py: read fields as strings
I suspect this undermines the PyArrow backend performance gains in recent Pandas 2.0.0, but we are dealing with messy data sometimes and we must rely on data being strings.
This commit is contained in:
@ -73,7 +73,8 @@ def run(argv):
|
||||
# set the signal handler for SIGINT (^C)
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
df = pd.read_csv(args.input_file, dtype_backend="pyarrow")
|
||||
# Read all fields as strings so dates don't get converted from 1998 to 1998.0
|
||||
df = pd.read_csv(args.input_file, dtype_backend="pyarrow", dtype="str")
|
||||
|
||||
# Check if the user requested to skip any fields
|
||||
if args.exclude_fields:
|
||||
|
Reference in New Issue
Block a user