2021-03-11 14:55:58 +01:00
|
|
|
import os
|
2021-03-12 18:14:49 +01:00
|
|
|
import subprocess
|
2021-03-12 20:22:03 +01:00
|
|
|
from base64 import b64decode, b64encode
|
2021-03-11 13:32:50 +01:00
|
|
|
|
2021-03-12 18:14:49 +01:00
|
|
|
from ansi2html import Ansi2HTMLConverter
|
2021-03-11 19:29:43 +01:00
|
|
|
from csv_metadata_quality.version import VERSION as cli_version
|
2021-03-12 22:00:48 +01:00
|
|
|
from flask import (
|
|
|
|
Flask,
|
|
|
|
abort,
|
|
|
|
redirect,
|
|
|
|
render_template,
|
|
|
|
request,
|
|
|
|
send_from_directory,
|
|
|
|
url_for,
|
|
|
|
)
|
2021-03-11 14:55:58 +01:00
|
|
|
from werkzeug.utils import secure_filename
|
2021-03-11 13:32:50 +01:00
|
|
|
|
|
|
|
app = Flask(__name__)
|
2021-03-11 14:55:58 +01:00
|
|
|
app.config["MAX_CONTENT_LENGTH"] = 1024 * 1024
|
|
|
|
app.config["UPLOAD_EXTENSIONS"] = [".csv"]
|
2021-03-13 21:08:23 +01:00
|
|
|
# the only place we can write to on Google App Engine is /tmp
|
|
|
|
# see: https://cloud.google.com/appengine/docs/standard/python3/using-temp-files
|
2021-03-11 15:06:33 +01:00
|
|
|
app.config["UPLOAD_PATH"] = "/tmp"
|
2021-03-11 13:32:50 +01:00
|
|
|
|
|
|
|
|
|
|
|
@app.route("/")
|
|
|
|
def index():
|
2021-03-11 19:29:43 +01:00
|
|
|
return render_template("index.html", cli_version=cli_version)
|
2021-03-11 14:55:58 +01:00
|
|
|
|
|
|
|
|
|
|
|
@app.route("/", methods=["POST"])
|
2021-03-13 21:11:26 +01:00
|
|
|
def process():
|
2021-03-11 14:55:58 +01:00
|
|
|
uploaded_file = request.files["file"]
|
|
|
|
filename = secure_filename(uploaded_file.filename)
|
|
|
|
|
|
|
|
if filename != "":
|
|
|
|
file_ext = os.path.splitext(filename)[1]
|
|
|
|
if file_ext not in app.config["UPLOAD_EXTENSIONS"]:
|
|
|
|
abort(400)
|
|
|
|
|
|
|
|
uploaded_file.save(os.path.join(app.config["UPLOAD_PATH"], filename))
|
|
|
|
|
2021-03-12 20:22:03 +01:00
|
|
|
# generate a base64 representation of the filename to use as a slug
|
|
|
|
base64name = b64encode(filename.encode("ascii"))
|
|
|
|
|
2021-03-13 21:11:26 +01:00
|
|
|
# do we need to use secure_filename again here?
|
|
|
|
input_file = os.path.join(app.config["UPLOAD_PATH"], filename)
|
|
|
|
# write output file with the same name as the input file plus "-cleaned"
|
|
|
|
output_file = os.path.join(
|
|
|
|
app.config["UPLOAD_PATH"], os.path.splitext(filename)[0] + "-cleaned.csv"
|
|
|
|
)
|
|
|
|
|
|
|
|
args = ["-i", input_file, "-o", output_file]
|
|
|
|
|
2021-03-13 22:34:11 +01:00
|
|
|
if "excludeCheckbox" in request.form:
|
|
|
|
if "excludeText" in request.form:
|
|
|
|
args.append("-x")
|
|
|
|
args.append(request.form["excludeText"])
|
|
|
|
|
2021-03-13 21:11:26 +01:00
|
|
|
if "unsafe" in request.form:
|
|
|
|
args.append("-u")
|
|
|
|
|
2021-03-13 22:01:11 +01:00
|
|
|
if "experimental" in request.form:
|
|
|
|
args.append("-e")
|
|
|
|
|
2021-03-13 21:11:26 +01:00
|
|
|
# run subprocess and capture output as UTF-8 so we get a string instead of
|
|
|
|
# bytes for ansi2html
|
|
|
|
results = subprocess.run(
|
|
|
|
["csv-metadata-quality"] + args,
|
|
|
|
capture_output=True,
|
|
|
|
encoding="UTF-8",
|
|
|
|
)
|
|
|
|
# convert the output to HTML using ansi2html
|
|
|
|
conv = Ansi2HTMLConverter()
|
|
|
|
stdout_html = conv.convert(results.stdout)
|
|
|
|
|
|
|
|
# render the results to HTML so we can save them for later and allowing
|
|
|
|
# the user to share the results page without posting the file again. We
|
|
|
|
# decode base64name before sending it to convert it from bytes to str.
|
|
|
|
results_html = render_template(
|
|
|
|
"result.html",
|
|
|
|
cli_version=cli_version,
|
|
|
|
filename=filename,
|
|
|
|
stdout=stdout_html,
|
|
|
|
base64name=base64name.decode("ascii"),
|
|
|
|
)
|
|
|
|
# save results to a file so it's easy to have a saved results page when
|
|
|
|
# we don't know the options a user used to POST the form.
|
|
|
|
results_html_file = os.path.join(
|
|
|
|
app.config["UPLOAD_PATH"], base64name.decode("ascii")
|
|
|
|
)
|
|
|
|
with open(results_html_file, "w") as fh:
|
|
|
|
fh.write(results_html)
|
|
|
|
|
|
|
|
return redirect(url_for("results", base64slug=base64name))
|
2021-03-11 21:42:59 +01:00
|
|
|
|
|
|
|
return "No file selected"
|
|
|
|
|
|
|
|
|
2021-03-12 20:22:03 +01:00
|
|
|
@app.route("/result/<base64slug>")
|
2021-03-13 21:11:26 +01:00
|
|
|
def results(base64slug):
|
|
|
|
results_html_file = os.path.join(app.config["UPLOAD_PATH"], base64slug)
|
|
|
|
with open(results_html_file, "r") as fh:
|
|
|
|
results_html = fh.read()
|
2021-03-12 20:22:03 +01:00
|
|
|
|
2021-03-13 21:11:26 +01:00
|
|
|
return results_html
|
2021-03-12 18:14:49 +01:00
|
|
|
|
2021-03-12 22:00:48 +01:00
|
|
|
|
|
|
|
@app.route("/result/<base64slug>/download")
|
|
|
|
def result_download(base64slug):
|
|
|
|
filename = b64decode(base64slug).decode("ascii")
|
|
|
|
filename = secure_filename(os.path.splitext(filename)[0] + "-cleaned.csv")
|
|
|
|
|
|
|
|
return send_from_directory(app.config["UPLOAD_PATH"], filename, as_attachment=True)
|
2021-03-11 13:32:50 +01:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
app.run(host="127.0.0.1", port=8080, debug=True)
|