mirror of
https://github.com/ilri/csv-metadata-quality.git
synced 2025-02-22 09:46:22 +01:00
Compare commits
No commits in common. "a7fc5a246c6362e0bbe6d698970c0a8da63ca372" and "9f5d2c2c4ff2e228e8c16bd07db064b70decc53e" have entirely different histories.
a7fc5a246c
...
9f5d2c2c4f
@ -4,7 +4,6 @@ import signal
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from colorama import Fore
|
|
||||||
|
|
||||||
import csv_metadata_quality.check as check
|
import csv_metadata_quality.check as check
|
||||||
import csv_metadata_quality.experimental as experimental
|
import csv_metadata_quality.experimental as experimental
|
||||||
@ -78,7 +77,7 @@ def run(argv):
|
|||||||
if column == exclude and skip is False:
|
if column == exclude and skip is False:
|
||||||
skip = True
|
skip = True
|
||||||
if skip:
|
if skip:
|
||||||
print(f"{Fore.YELLOW}Skipping {Fore.RESET}{column}")
|
print(f"Skipping {column}")
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@ from datetime import datetime, timedelta
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import requests
|
import requests
|
||||||
import requests_cache
|
import requests_cache
|
||||||
from colorama import Fore
|
|
||||||
from pycountry import languages
|
from pycountry import languages
|
||||||
|
|
||||||
|
|
||||||
@ -27,7 +26,7 @@ def issn(field):
|
|||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
|
|
||||||
if not issn.is_valid(value):
|
if not issn.is_valid(value):
|
||||||
print(f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}")
|
print(f"Invalid ISSN: {value}")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
@ -52,7 +51,7 @@ def isbn(field):
|
|||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
|
|
||||||
if not isbn.is_valid(value):
|
if not isbn.is_valid(value):
|
||||||
print(f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}")
|
print(f"Invalid ISBN: {value}")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
@ -77,9 +76,7 @@ def separators(field, field_name):
|
|||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
# Check if the current value is blank
|
# Check if the current value is blank
|
||||||
if value == "":
|
if value == "":
|
||||||
print(
|
print(f"Unnecessary multi-value separator ({field_name}): {field}")
|
||||||
f"{Fore.RED}Unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}"
|
|
||||||
)
|
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -88,9 +85,7 @@ def separators(field, field_name):
|
|||||||
|
|
||||||
# Check if there was a match
|
# Check if there was a match
|
||||||
if match:
|
if match:
|
||||||
print(
|
print(f"Invalid multi-value separator ({field_name}): {field}")
|
||||||
f"{Fore.RED}Invalid multi-value separator ({field_name}): {Fore.RESET}{field}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
@ -107,7 +102,7 @@ def date(field, field_name):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if pd.isna(field):
|
if pd.isna(field):
|
||||||
print(f"{Fore.RED}Missing date ({field_name}).{Fore.RESET}")
|
print(f"Missing date ({field_name}).")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -116,9 +111,7 @@ def date(field, field_name):
|
|||||||
|
|
||||||
# We don't allow multi-value date fields
|
# We don't allow multi-value date fields
|
||||||
if len(multiple_dates) > 1:
|
if len(multiple_dates) > 1:
|
||||||
print(
|
print(f"Multiple dates not allowed ({field_name}): {field}")
|
||||||
f"{Fore.RED}Multiple dates not allowed ({field_name}): {Fore.RESET}{field}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
@ -152,7 +145,7 @@ def date(field, field_name):
|
|||||||
|
|
||||||
return field
|
return field
|
||||||
except ValueError:
|
except ValueError:
|
||||||
print(f"{Fore.RED}Invalid date ({field_name}): {Fore.RESET}{field}")
|
print(f"Invalid date ({field_name}): {field}")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
@ -185,7 +178,9 @@ def suspicious_characters(field, field_name):
|
|||||||
# character and spanning enough of the rest to give a preview,
|
# character and spanning enough of the rest to give a preview,
|
||||||
# but not too much to cause the line to break in terminals with
|
# but not too much to cause the line to break in terminals with
|
||||||
# a default of 80 characters width.
|
# a default of 80 characters width.
|
||||||
suspicious_character_msg = f"{Fore.YELLOW}Suspicious character ({field_name}): {Fore.RESET}{field_subset}"
|
suspicious_character_msg = (
|
||||||
|
f"Suspicious character ({field_name}): {field_subset}"
|
||||||
|
)
|
||||||
print(f"{suspicious_character_msg:1.80}")
|
print(f"{suspicious_character_msg:1.80}")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
@ -210,16 +205,16 @@ def language(field):
|
|||||||
# can check it against ISO 639-1 or ISO 639-3 accordingly.
|
# can check it against ISO 639-1 or ISO 639-3 accordingly.
|
||||||
if len(value) == 2:
|
if len(value) == 2:
|
||||||
if not languages.get(alpha_2=value):
|
if not languages.get(alpha_2=value):
|
||||||
print(f"{Fore.RED}Invalid ISO 639-1 language: {Fore.RESET}{value}")
|
print(f"Invalid ISO 639-1 language: {value}")
|
||||||
|
|
||||||
pass
|
pass
|
||||||
elif len(value) == 3:
|
elif len(value) == 3:
|
||||||
if not languages.get(alpha_3=value):
|
if not languages.get(alpha_3=value):
|
||||||
print(f"{Fore.RED}Invalid ISO 639-3 language: {Fore.RESET}{value}")
|
print(f"Invalid ISO 639-3 language: {value}")
|
||||||
|
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
print(f"{Fore.RED}Invalid language: {Fore.RESET}{value}")
|
print(f"Invalid language: {value}")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
@ -261,7 +256,7 @@ def agrovoc(field, field_name):
|
|||||||
|
|
||||||
# check if there are any results
|
# check if there are any results
|
||||||
if len(data["results"]) == 0:
|
if len(data["results"]) == 0:
|
||||||
print(f"{Fore.RED}Invalid AGROVOC ({field_name}): {Fore.RESET}{value}")
|
print(f"Invalid AGROVOC ({field_name}): {value}")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
|
||||||
@ -314,6 +309,6 @@ def filename_extension(field):
|
|||||||
break
|
break
|
||||||
|
|
||||||
if filename_extension_match is False:
|
if filename_extension_match is False:
|
||||||
print(f"{Fore.YELLOW}Filename with uncommon extension: {Fore.RESET}{value}")
|
print(f"Filename with uncommon extension: {value}")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from colorama import Fore
|
|
||||||
|
|
||||||
|
|
||||||
def correct_language(row):
|
def correct_language(row):
|
||||||
@ -11,10 +10,9 @@ def correct_language(row):
|
|||||||
language and returns the value in the language field if it does match.
|
language and returns the value in the language field if it does match.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
import langid
|
|
||||||
from pycountry import languages
|
from pycountry import languages
|
||||||
|
import langid
|
||||||
|
import re
|
||||||
|
|
||||||
# Initialize some variables at global scope so that we can set them in the
|
# Initialize some variables at global scope so that we can set them in the
|
||||||
# loop scope below and still be able to access them afterwards.
|
# loop scope below and still be able to access them afterwards.
|
||||||
@ -85,12 +83,12 @@ def correct_language(row):
|
|||||||
detected_language = languages.get(alpha_2=langid_classification[0])
|
detected_language = languages.get(alpha_2=langid_classification[0])
|
||||||
if len(language) == 2 and language != detected_language.alpha_2:
|
if len(language) == 2 and language != detected_language.alpha_2:
|
||||||
print(
|
print(
|
||||||
f"{Fore.YELLOW}Possibly incorrect language {language} (detected {detected_language.alpha_2}): {Fore.RESET}{title}"
|
f"Possibly incorrect language {language} (detected {detected_language.alpha_2}): {title}"
|
||||||
)
|
)
|
||||||
|
|
||||||
elif len(language) == 3 and language != detected_language.alpha_3:
|
elif len(language) == 3 and language != detected_language.alpha_3:
|
||||||
print(
|
print(
|
||||||
f"{Fore.YELLOW}Possibly incorrect language {language} (detected {detected_language.alpha_3}): {Fore.RESET}{title}"
|
f"Possibly incorrect language {language} (detected {detected_language.alpha_3}): {title}"
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -2,7 +2,6 @@ import re
|
|||||||
from unicodedata import normalize
|
from unicodedata import normalize
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from colorama import Fore
|
|
||||||
|
|
||||||
from csv_metadata_quality.util import is_nfc
|
from csv_metadata_quality.util import is_nfc
|
||||||
|
|
||||||
@ -30,9 +29,7 @@ def whitespace(field, field_name):
|
|||||||
match = re.findall(pattern, value)
|
match = re.findall(pattern, value)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
print(
|
print(f"Removing excessive whitespace ({field_name}): {value}")
|
||||||
f"{Fore.GREEN}Removing excessive whitespace ({field_name}): {Fore.RESET}{value}"
|
|
||||||
)
|
|
||||||
value = re.sub(pattern, " ", value)
|
value = re.sub(pattern, " ", value)
|
||||||
|
|
||||||
# Save cleaned value
|
# Save cleaned value
|
||||||
@ -65,9 +62,7 @@ def separators(field, field_name):
|
|||||||
for value in field.split("||"):
|
for value in field.split("||"):
|
||||||
# Check if the value is blank and skip it
|
# Check if the value is blank and skip it
|
||||||
if value == "":
|
if value == "":
|
||||||
print(
|
print(f"Fixing unnecessary multi-value separator ({field_name}): {field}")
|
||||||
f"{Fore.GREEN}Fixing unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}"
|
|
||||||
)
|
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -76,9 +71,7 @@ def separators(field, field_name):
|
|||||||
match = re.findall(pattern, value)
|
match = re.findall(pattern, value)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
print(
|
print(f"Fixing invalid multi-value separator ({field_name}): {value}")
|
||||||
f"{Fore.RED}Fixing invalid multi-value separator ({field_name}): {Fore.RESET}{value}"
|
|
||||||
)
|
|
||||||
|
|
||||||
value = re.sub(pattern, "||", value)
|
value = re.sub(pattern, "||", value)
|
||||||
|
|
||||||
@ -114,7 +107,7 @@ def unnecessary_unicode(field):
|
|||||||
match = re.findall(pattern, field)
|
match = re.findall(pattern, field)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
print(f"{Fore.GREEN}Removing unnecessary Unicode (U+200B): {Fore.RESET}{field}")
|
print(f"Removing unnecessary Unicode (U+200B): {field}")
|
||||||
field = re.sub(pattern, "", field)
|
field = re.sub(pattern, "", field)
|
||||||
|
|
||||||
# Check for replacement characters (U+FFFD)
|
# Check for replacement characters (U+FFFD)
|
||||||
@ -122,7 +115,7 @@ def unnecessary_unicode(field):
|
|||||||
match = re.findall(pattern, field)
|
match = re.findall(pattern, field)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
print(f"{Fore.GREEN}Removing unnecessary Unicode (U+FFFD): {Fore.RESET}{field}")
|
print(f"Removing unnecessary Unicode (U+FFFD): {field}")
|
||||||
field = re.sub(pattern, "", field)
|
field = re.sub(pattern, "", field)
|
||||||
|
|
||||||
# Check for no-break spaces (U+00A0)
|
# Check for no-break spaces (U+00A0)
|
||||||
@ -130,9 +123,7 @@ def unnecessary_unicode(field):
|
|||||||
match = re.findall(pattern, field)
|
match = re.findall(pattern, field)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
print(
|
print(f"Replacing unnecessary Unicode (U+00A0): {field}")
|
||||||
f"{Fore.GREEN}Replacing unnecessary Unicode (U+00A0): {Fore.RESET}{field}"
|
|
||||||
)
|
|
||||||
field = re.sub(pattern, " ", field)
|
field = re.sub(pattern, " ", field)
|
||||||
|
|
||||||
# Check for soft hyphens (U+00AD), sometimes preceeded with a normal hyphen
|
# Check for soft hyphens (U+00AD), sometimes preceeded with a normal hyphen
|
||||||
@ -140,9 +131,7 @@ def unnecessary_unicode(field):
|
|||||||
match = re.findall(pattern, field)
|
match = re.findall(pattern, field)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
print(
|
print(f"Replacing unnecessary Unicode (U+00AD): {field}")
|
||||||
f"{Fore.GREEN}Replacing unnecessary Unicode (U+00AD): {Fore.RESET}{field}"
|
|
||||||
)
|
|
||||||
field = re.sub(pattern, "-", field)
|
field = re.sub(pattern, "-", field)
|
||||||
|
|
||||||
return field
|
return field
|
||||||
@ -167,9 +156,7 @@ def duplicates(field, field_name):
|
|||||||
if value not in new_values:
|
if value not in new_values:
|
||||||
new_values.append(value)
|
new_values.append(value)
|
||||||
else:
|
else:
|
||||||
print(
|
print(f"Removing duplicate value ({field_name}): {value}")
|
||||||
f"{Fore.GREEN}Removing duplicate value ({field_name}): {Fore.RESET}{value}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create a new field consisting of all values joined with "||"
|
# Create a new field consisting of all values joined with "||"
|
||||||
new_field = "||".join(new_values)
|
new_field = "||".join(new_values)
|
||||||
@ -202,7 +189,7 @@ def newlines(field):
|
|||||||
match = re.findall(r"\n", field)
|
match = re.findall(r"\n", field)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
print(f"{Fore.GREEN}Removing newline: {Fore.RESET}{field}")
|
print(f"Removing newline: {field}")
|
||||||
field = field.replace("\n", "")
|
field = field.replace("\n", "")
|
||||||
|
|
||||||
return field
|
return field
|
||||||
@ -226,9 +213,7 @@ def comma_space(field, field_name):
|
|||||||
match = re.findall(r",\w", field)
|
match = re.findall(r",\w", field)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
print(
|
print(f"Adding space after comma ({field_name}): {field}")
|
||||||
f"{Fore.GREEN}Adding space after comma ({field_name}): {Fore.RESET}{field}"
|
|
||||||
)
|
|
||||||
field = re.sub(r",(\w)", r", \1", field)
|
field = re.sub(r",(\w)", r", \1", field)
|
||||||
|
|
||||||
return field
|
return field
|
||||||
@ -249,7 +234,7 @@ def normalize_unicode(field, field_name):
|
|||||||
|
|
||||||
# Check if the current string is using normalized Unicode (NFC)
|
# Check if the current string is using normalized Unicode (NFC)
|
||||||
if not is_nfc(field):
|
if not is_nfc(field):
|
||||||
print(f"{Fore.GREEN}Normalizing Unicode ({field_name}): {Fore.RESET}{field}")
|
print(f"Normalizing Unicode ({field_name}): {field}")
|
||||||
field = normalize("NFC", field)
|
field = normalize("NFC", field)
|
||||||
|
|
||||||
return field
|
return field
|
||||||
|
4
poetry.lock
generated
4
poetry.lock
generated
@ -159,7 +159,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
|||||||
name = "colorama"
|
name = "colorama"
|
||||||
version = "0.4.4"
|
version = "0.4.4"
|
||||||
description = "Cross-platform colored terminal text."
|
description = "Cross-platform colored terminal text."
|
||||||
category = "main"
|
category = "dev"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||||
|
|
||||||
@ -765,7 +765,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "1.1"
|
lock-version = "1.1"
|
||||||
python-versions = "^3.8"
|
python-versions = "^3.8"
|
||||||
content-hash = "8c4ba410bbdc930d2d74f7864470a18827029a5697869833959708d7425460ae"
|
content-hash = "63f2c6ef09652c4f8407660ff7b4690c8a07e5501eb8fc8c477f485de5888fcf"
|
||||||
|
|
||||||
[metadata.files]
|
[metadata.files]
|
||||||
agate = [
|
agate = [
|
||||||
|
@ -16,7 +16,6 @@ requests = "^2.23.0"
|
|||||||
requests-cache = "^0.5.2"
|
requests-cache = "^0.5.2"
|
||||||
pycountry = "^19.8.18"
|
pycountry = "^19.8.18"
|
||||||
langid = "^1.1.6"
|
langid = "^1.1.6"
|
||||||
colorama = "^0.4.4"
|
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
pytest = "^6.1.1"
|
pytest = "^6.1.1"
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from colorama import Fore
|
|
||||||
|
|
||||||
import csv_metadata_quality.check as check
|
import csv_metadata_quality.check as check
|
||||||
import csv_metadata_quality.experimental as experimental
|
import csv_metadata_quality.experimental as experimental
|
||||||
@ -13,7 +12,7 @@ def test_check_invalid_issn(capsys):
|
|||||||
check.issn(value)
|
check.issn(value)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert captured.out == f"{Fore.RED}Invalid ISSN: {Fore.RESET}{value}\n"
|
assert captured.out == f"Invalid ISSN: {value}\n"
|
||||||
|
|
||||||
|
|
||||||
def test_check_valid_issn():
|
def test_check_valid_issn():
|
||||||
@ -34,7 +33,7 @@ def test_check_invalid_isbn(capsys):
|
|||||||
check.isbn(value)
|
check.isbn(value)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert captured.out == f"{Fore.RED}Invalid ISBN: {Fore.RESET}{value}\n"
|
assert captured.out == f"Invalid ISBN: {value}\n"
|
||||||
|
|
||||||
|
|
||||||
def test_check_valid_isbn():
|
def test_check_valid_isbn():
|
||||||
@ -57,10 +56,7 @@ def test_check_invalid_separators(capsys):
|
|||||||
check.separators(value, field_name)
|
check.separators(value, field_name)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert captured.out == f"Invalid multi-value separator ({field_name}): {value}\n"
|
||||||
captured.out
|
|
||||||
== f"{Fore.RED}Invalid multi-value separator ({field_name}): {Fore.RESET}{value}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_check_unnecessary_separators(capsys):
|
def test_check_unnecessary_separators(capsys):
|
||||||
@ -74,8 +70,7 @@ def test_check_unnecessary_separators(capsys):
|
|||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert (
|
||||||
captured.out
|
captured.out == f"Unnecessary multi-value separator ({field_name}): {field}\n"
|
||||||
== f"{Fore.RED}Unnecessary multi-value separator ({field_name}): {Fore.RESET}{field}\n"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -101,7 +96,7 @@ def test_check_missing_date(capsys):
|
|||||||
check.date(value, field_name)
|
check.date(value, field_name)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert captured.out == f"{Fore.RED}Missing date ({field_name}).{Fore.RESET}\n"
|
assert captured.out == f"Missing date ({field_name}).\n"
|
||||||
|
|
||||||
|
|
||||||
def test_check_multiple_dates(capsys):
|
def test_check_multiple_dates(capsys):
|
||||||
@ -114,10 +109,7 @@ def test_check_multiple_dates(capsys):
|
|||||||
check.date(value, field_name)
|
check.date(value, field_name)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert captured.out == f"Multiple dates not allowed ({field_name}): {value}\n"
|
||||||
captured.out
|
|
||||||
== f"{Fore.RED}Multiple dates not allowed ({field_name}): {Fore.RESET}{value}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_check_invalid_date(capsys):
|
def test_check_invalid_date(capsys):
|
||||||
@ -130,9 +122,7 @@ def test_check_invalid_date(capsys):
|
|||||||
check.date(value, field_name)
|
check.date(value, field_name)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert captured.out == f"Invalid date ({field_name}): {value}\n"
|
||||||
captured.out == f"{Fore.RED}Invalid date ({field_name}): {Fore.RESET}{value}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_check_valid_date():
|
def test_check_valid_date():
|
||||||
@ -157,10 +147,7 @@ def test_check_suspicious_characters(capsys):
|
|||||||
check.suspicious_characters(value, field_name)
|
check.suspicious_characters(value, field_name)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert captured.out == f"Suspicious character ({field_name}): ˆt\n"
|
||||||
captured.out
|
|
||||||
== f"{Fore.YELLOW}Suspicious character ({field_name}): {Fore.RESET}ˆt\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_check_valid_iso639_1_language():
|
def test_check_valid_iso639_1_language():
|
||||||
@ -191,9 +178,7 @@ def test_check_invalid_iso639_1_language(capsys):
|
|||||||
check.language(value)
|
check.language(value)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert captured.out == f"Invalid ISO 639-1 language: {value}\n"
|
||||||
captured.out == f"{Fore.RED}Invalid ISO 639-1 language: {Fore.RESET}{value}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_check_invalid_iso639_3_language(capsys):
|
def test_check_invalid_iso639_3_language(capsys):
|
||||||
@ -204,9 +189,7 @@ def test_check_invalid_iso639_3_language(capsys):
|
|||||||
check.language(value)
|
check.language(value)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert captured.out == f"Invalid ISO 639-3 language: {value}\n"
|
||||||
captured.out == f"{Fore.RED}Invalid ISO 639-3 language: {Fore.RESET}{value}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_check_invalid_language(capsys):
|
def test_check_invalid_language(capsys):
|
||||||
@ -217,7 +200,7 @@ def test_check_invalid_language(capsys):
|
|||||||
check.language(value)
|
check.language(value)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert captured.out == f"{Fore.RED}Invalid language: {Fore.RESET}{value}\n"
|
assert captured.out == f"Invalid language: {value}\n"
|
||||||
|
|
||||||
|
|
||||||
def test_check_invalid_agrovoc(capsys):
|
def test_check_invalid_agrovoc(capsys):
|
||||||
@ -229,10 +212,7 @@ def test_check_invalid_agrovoc(capsys):
|
|||||||
check.agrovoc(value, field_name)
|
check.agrovoc(value, field_name)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert captured.out == f"Invalid AGROVOC ({field_name}): {value}\n"
|
||||||
captured.out
|
|
||||||
== f"{Fore.RED}Invalid AGROVOC ({field_name}): {Fore.RESET}{value}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_check_valid_agrovoc():
|
def test_check_valid_agrovoc():
|
||||||
@ -254,10 +234,7 @@ def test_check_uncommon_filename_extension(capsys):
|
|||||||
check.filename_extension(value)
|
check.filename_extension(value)
|
||||||
|
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert captured.out == f"Filename with uncommon extension: {value}\n"
|
||||||
captured.out
|
|
||||||
== f"{Fore.YELLOW}Filename with uncommon extension: {Fore.RESET}{value}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_check_common_filename_extension():
|
def test_check_common_filename_extension():
|
||||||
@ -285,7 +262,7 @@ def test_check_incorrect_iso_639_1_language(capsys):
|
|||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert (
|
||||||
captured.out
|
captured.out
|
||||||
== f"{Fore.YELLOW}Possibly incorrect language {language} (detected en): {Fore.RESET}{title}\n"
|
== f"Possibly incorrect language {language} (detected en): {title}\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -304,7 +281,7 @@ def test_check_incorrect_iso_639_3_language(capsys):
|
|||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert (
|
assert (
|
||||||
captured.out
|
captured.out
|
||||||
== f"{Fore.YELLOW}Possibly incorrect language {language} (detected eng): {Fore.RESET}{title}\n"
|
== f"Possibly incorrect language {language} (detected eng): {title}\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user