mirror of
https://github.com/ilri/csv-metadata-quality-web.git
synced 2025-05-08 22:26:05 +02:00
Move csv_metadata_quality_web to a package
Eventually I will want to refactor so this will be necessary.
This commit is contained in:
0
csv_metadata_quality_web/__init__.py
Normal file
0
csv_metadata_quality_web/__init__.py
Normal file
133
csv_metadata_quality_web/main.py
Normal file
133
csv_metadata_quality_web/main.py
Normal file
@ -0,0 +1,133 @@
|
||||
import os
|
||||
import subprocess
|
||||
from base64 import b64decode, b64encode
|
||||
|
||||
from ansi2html import Ansi2HTMLConverter
|
||||
from csv_metadata_quality.version import VERSION as cli_version
|
||||
from flask import (
|
||||
Flask,
|
||||
abort,
|
||||
redirect,
|
||||
render_template,
|
||||
request,
|
||||
send_from_directory,
|
||||
url_for,
|
||||
)
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config["MAX_CONTENT_LENGTH"] = 1024 * 1024
|
||||
app.config["UPLOAD_EXTENSIONS"] = [".csv"]
|
||||
# the only place we can write to on Google App Engine is /tmp
|
||||
# see: https://cloud.google.com/appengine/docs/standard/python3/using-temp-files
|
||||
app.config["UPLOAD_PATH"] = "/tmp"
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return render_template("index.html", cli_version=cli_version)
|
||||
|
||||
|
||||
@app.route("/", methods=["POST"])
|
||||
def process():
|
||||
uploaded_file = request.files["file"]
|
||||
filename = secure_filename(uploaded_file.filename)
|
||||
|
||||
if filename != "":
|
||||
file_ext = os.path.splitext(filename)[1]
|
||||
if file_ext not in app.config["UPLOAD_EXTENSIONS"]:
|
||||
abort(400)
|
||||
|
||||
uploaded_file.save(os.path.join(app.config["UPLOAD_PATH"], filename))
|
||||
|
||||
# generate a base64 representation of the filename to use as a slug
|
||||
base64name = b64encode(filename.encode("ascii"))
|
||||
|
||||
# do we need to use secure_filename again here?
|
||||
input_file = os.path.join(app.config["UPLOAD_PATH"], filename)
|
||||
# write output file with the same name as the input file plus "-cleaned"
|
||||
output_file = os.path.join(
|
||||
app.config["UPLOAD_PATH"], os.path.splitext(filename)[0] + "-cleaned.csv"
|
||||
)
|
||||
|
||||
args = ["-i", input_file, "-o", output_file]
|
||||
|
||||
if "excludeCheckbox" in request.form:
|
||||
if "excludeText" in request.form:
|
||||
args.append("-x")
|
||||
args.append(request.form["excludeText"])
|
||||
|
||||
if "agrovocCheckbox" in request.form:
|
||||
if "agrovocText" in request.form:
|
||||
args.append("-a")
|
||||
args.append(request.form["agrovocText"])
|
||||
|
||||
if "unsafe" in request.form:
|
||||
args.append("-u")
|
||||
|
||||
if "experimental" in request.form:
|
||||
args.append("-e")
|
||||
|
||||
# Set cache dir to our upload path so we can tell csv-metadata-quality
|
||||
# to store its requests-cache database there instead of in the current
|
||||
# working directory (we can only write to /tmp on Google App Engine).
|
||||
# Also, make sure to keep our PATH!
|
||||
env = {
|
||||
"REQUESTS_CACHE_DIR": app.config["UPLOAD_PATH"],
|
||||
"PATH": os.environ["PATH"],
|
||||
}
|
||||
|
||||
# run subprocess and capture output as UTF-8 so we get a string instead of
|
||||
# bytes for ansi2html
|
||||
results = subprocess.run(
|
||||
["csv-metadata-quality"] + args,
|
||||
capture_output=True,
|
||||
encoding="UTF-8",
|
||||
env=env,
|
||||
)
|
||||
# convert the output to HTML using ansi2html
|
||||
conv = Ansi2HTMLConverter()
|
||||
stdout_html = conv.convert(results.stdout)
|
||||
|
||||
# render the results to HTML so we can save them for later and allowing
|
||||
# the user to share the results page without posting the file again. We
|
||||
# decode base64name before sending it to convert it from bytes to str.
|
||||
results_html = render_template(
|
||||
"result.html",
|
||||
cli_version=cli_version,
|
||||
filename=filename,
|
||||
stdout=stdout_html,
|
||||
base64name=base64name.decode("ascii"),
|
||||
)
|
||||
# save results to a file so it's easy to have a saved results page when
|
||||
# we don't know the options a user used to POST the form.
|
||||
results_html_file = os.path.join(
|
||||
app.config["UPLOAD_PATH"], base64name.decode("ascii")
|
||||
)
|
||||
with open(results_html_file, "w") as fh:
|
||||
fh.write(results_html)
|
||||
|
||||
return redirect(url_for("results", base64slug=base64name))
|
||||
|
||||
return "No file selected"
|
||||
|
||||
|
||||
@app.route("/result/<base64slug>")
|
||||
def results(base64slug):
|
||||
results_html_file = os.path.join(app.config["UPLOAD_PATH"], base64slug)
|
||||
with open(results_html_file, "r") as fh:
|
||||
results_html = fh.read()
|
||||
|
||||
return results_html
|
||||
|
||||
|
||||
@app.route("/result/<base64slug>/download")
|
||||
def result_download(base64slug):
|
||||
filename = b64decode(base64slug).decode("ascii")
|
||||
filename = secure_filename(os.path.splitext(filename)[0] + "-cleaned.csv")
|
||||
|
||||
return send_from_directory(app.config["UPLOAD_PATH"], filename, as_attachment=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="127.0.0.1", port=8080, debug=True)
|
6
csv_metadata_quality_web/static/css/style.min.css
vendored
Normal file
6
csv_metadata_quality_web/static/css/style.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
5
csv_metadata_quality_web/templates/footer.html
Normal file
5
csv_metadata_quality_web/templates/footer.html
Normal file
@ -0,0 +1,5 @@
|
||||
<footer class="footer mt-auto py-3">
|
||||
<div class="container">
|
||||
<span><a href="https://github.com/ilri/csv-metadata-quality">csv-metadata-quality</a> v{{ cli_version }}</span>
|
||||
</div>
|
||||
</footer>
|
8
csv_metadata_quality_web/templates/head.html
Normal file
8
csv_metadata_quality_web/templates/head.html
Normal file
@ -0,0 +1,8 @@
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="description" content="Simple web interface for the DSpace CSV Metadata Quality tool.">
|
||||
<meta name="author" content="Alan Orth">
|
||||
<title>DSpace CSV Metadata Quality Checker</title>
|
||||
<link href="{{ url_for('static', filename='css/style.min.css') }}" rel="stylesheet">
|
||||
</head>
|
7
csv_metadata_quality_web/templates/header.html
Normal file
7
csv_metadata_quality_web/templates/header.html
Normal file
@ -0,0 +1,7 @@
|
||||
<header>
|
||||
<div class="navbar navbar-dark bg-dark shadow-sm">
|
||||
<div class="container">
|
||||
<a href="/" class="navbar-brand text-white text-decoration-none">DSpace CSV Metadata Quality Checker</a>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
52
csv_metadata_quality_web/templates/index.html
Normal file
52
csv_metadata_quality_web/templates/index.html
Normal file
@ -0,0 +1,52 @@
|
||||
<!doctype html>
|
||||
<html lang="en" class="h-100">
|
||||
{% include 'head.html' %}
|
||||
<body class="d-flex flex-column h-100">
|
||||
{% include 'header.html' %}
|
||||
<main class="flex-shrink-0">
|
||||
<div class="container py-3">
|
||||
<p class="lead">The DSpace CSV Metadata Quality Checker is a collection of sanity checks and automated fixes for a number of common issues in metadata files.</p>
|
||||
<form method="POST" action="" enctype="multipart/form-data">
|
||||
<div class="mb-3">
|
||||
<label for="formFile" class="form-label">Select a CSV file to process (or try <a href="https://raw.githubusercontent.com/ilri/csv-metadata-quality/master/data/test.csv">test.csv</a>)</label>
|
||||
<input class="form-control" type="file" id="formFile" name="file" accept=".csv">
|
||||
</div>
|
||||
|
||||
<div class="input-group mb-3">
|
||||
<div class="input-group-text">
|
||||
Skip fields(s)
|
||||
<input class="form-check-input" type="checkbox" id="excludeFieldsCheckbox" name="excludeCheckbox" aria-label="Checkbox for following text input">
|
||||
</div>
|
||||
<input type="text" class="form-control" placeholder="dcterms.subject" id="excludeFieldsText" name="excludeText" aria-label="Text input with checkbox">
|
||||
<div id="excludeHelp" class="form-text">Optionally indicate fields to skip during analysis. Separate multiple fields with a comma, for example: <code>dcterms.issued,dcterms.subject</code>.</div>
|
||||
</div>
|
||||
|
||||
<div class="input-group mb-3">
|
||||
<div class="input-group-text">
|
||||
Validate field(s) against AGROVOC
|
||||
<input class="form-check-input" type="checkbox" checked="true" id="agrovocFieldsCheckbox" name="agrovocCheckbox" aria-label="Checkbox for following text input">
|
||||
</div>
|
||||
<input type="text" class="form-control" value="dcterms.subject" id="agrovocFieldsText" name="agrovocText" aria-label="Text input with checkbox">
|
||||
<div id="agrovocHelp" class="form-text">Optionally indicate fields to validate against <a href="https://agrovoc.uniroma2.it/agrovoc/agrovoc/en/" title="AGROVOC Multilingual Thesaurus">AGROVOC</a>. Separate multiple fields with a comma, for example: <code>dcterms.subject,cg.coverage.country</code>. Note: this can take an extra minute or more depending on your data. If you have a problem please try again and it will generally be faster the second time.</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-3 form-check form-switch">
|
||||
<input class="form-check-input" type="checkbox" checked="true" id="unsafeCheckbox" name="unsafe">
|
||||
<label class="form-check-label" for="unsafeCheckbox" aria-describedby="unsafeHelp">Enable unsafe fixes</label>
|
||||
<div id="unsafeHelp" class="form-text">This will remove newlines and perform <a href="https://withblue.ink/2019/03/11/why-you-need-to-normalize-unicode-strings.html" title='When "Zoë" !== "Zoë". Or why you need to normalize Unicode strings'>normalization of Unicode characters</a>. Read more about these <a href="https://github.com/ilri/csv-metadata-quality#unsafe-fixes">unsafe fixes</a>.</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-3 form-check form-switch">
|
||||
<input class="form-check-input" type="checkbox" id="experimentalCheckbox" name="experimental">
|
||||
<label class="form-check-label" for="experimentalCheckbox" aria-describedby="experimentalHelp">Enable experimental checks</label>
|
||||
<div id="experimentalHelp" class="form-text">Attempt to validate whether the value of an item's <code>dc.language.iso</code> or <code>dcterms.language</code> field matches the <em>actual</em> language of text used in its title, abstract, and citation. Read more about these <a href="https://github.com/ilri/csv-metadata-quality#experimental-checks">experimental checks</a>.</div>
|
||||
</div>
|
||||
|
||||
<button type="submit" class="btn btn-primary">Submit</button>
|
||||
</form>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
{% include 'footer.html' %}
|
||||
</body>
|
||||
</html>
|
21
csv_metadata_quality_web/templates/result.html
Normal file
21
csv_metadata_quality_web/templates/result.html
Normal file
@ -0,0 +1,21 @@
|
||||
<!doctype html>
|
||||
<html lang="en" class="h-100">
|
||||
{% include 'head.html' %}
|
||||
<body class="d-flex flex-column h-100">
|
||||
{% include 'header.html' %}
|
||||
<main class="flex-shrink-0">
|
||||
<div class="container py-3">
|
||||
<p class="lead">The DSpace CSV Metadata Quality Checker is a collection of sanity checks and automated fixes for a number of common issues in metadata files.</p>
|
||||
<h3>Results</h3>
|
||||
<p>Results for <code>{{ filename }}</code>. Download <a href="/result/{{ base64name }}/download" title="{{ filename | replace('.csv', '-cleaned.csv') }}">cleaned file</a>.</p>
|
||||
<h3>Log</h3>
|
||||
<p>The detailed log of the analysis is below. <span style="color: #00aa00">Green</span> indicates a fix was applied, <span style="color: #aa0000">red</span> indicates an error, and <span style="color: #aa5500">orange</span> indicates a warning.</p>
|
||||
<blockquote>
|
||||
{{- stdout | safe -}}
|
||||
</blockquote>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
{% include 'footer.html' %}
|
||||
</body>
|
||||
</html>
|
Reference in New Issue
Block a user