mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-02-18 07:46:21 +01:00
Compare commits
2 Commits
8eddb76aab
...
28f9026286
Author | SHA1 | Date | |
---|---|---|---|
28f9026286 | |||
cfe09f7126 |
@ -63,7 +63,7 @@ While it is *theoretically* possible for a single `|` character to be used legit
|
||||
This will also remove unnecessary trailing multi-value separators, for example `Kenya||Tanzania||`.
|
||||
|
||||
## Unsafe Fixes
|
||||
You can enable several "unsafe" fixes with the `--unsafe-fixes` option. Currently this will remove newlines and perform Unicode normalization.
|
||||
You can enable several "unsafe" fixes with the `--unsafe-fixes` option. Currently this will remove newlines, perform Unicode normalization, and attempt to fix "mojibake" characters.
|
||||
|
||||
### Newlines
|
||||
This is considered "unsafe" because some systems give special importance to vertical space and render it properly. DSpace does not support rendering newlines in its XMLUI and has, at times, suffered from parsing errors that cause the import process to fail if an input file had newlines. The `--unsafe-fixes` option strips Unix line feeds (U+000A).
|
||||
|
@ -1,3 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
from sys import argv
|
||||
|
||||
from csv_metadata_quality import app
|
||||
|
@ -1,3 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import signal
|
||||
|
@ -1,3 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
|
@ -1,3 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import re
|
||||
|
||||
import langid
|
||||
|
@ -1,3 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import re
|
||||
from unicodedata import normalize
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
from ftfy.badness import sequence_weirdness
|
||||
|
||||
|
||||
|
@ -1 +1,3 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
VERSION = "0.4.8-dev"
|
||||
|
@ -1,3 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import pandas as pd
|
||||
from colorama import Fore
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
import csv_metadata_quality.fix as fix
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user