From 198acdb1a7d46d2049f4db0dff110fb8f6cd36ab Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Sat, 13 Mar 2021 22:11:26 +0200 Subject: [PATCH] Major refactor Re-work upload and file processing so they are in the same Python function. Now I will start exposing other command line options in the form, like unsafe fixes, excluding fields, etc. Now I see tha t it is easier to save the POSTed file and process it in the same function so I don't have to pass around the other POSTed form val ues as URL query parameters. Now, as a result of changing the flow above, I also had to make a change to the way I show the results page. Instead of processing the file and returning the rendered results to the user directly, I process the file, save the rendered results to /tmp, and return a redirect to the user to the results page. --- main.py | 79 ++++++++++++++++++++++++++----------------- templates/index.html | 10 +++++- templates/result.html | 18 +++++----- 3 files changed, 65 insertions(+), 42 deletions(-) diff --git a/main.py b/main.py index ff6f38c..0bb2d00 100644 --- a/main.py +++ b/main.py @@ -29,7 +29,7 @@ def index(): @app.route("/", methods=["POST"]) -def upload_file(): +def process(): uploaded_file = request.files["file"] filename = secure_filename(uploaded_file.filename) @@ -43,42 +43,59 @@ def upload_file(): # generate a base64 representation of the filename to use as a slug base64name = b64encode(filename.encode("ascii")) - return redirect(url_for("process_file", base64slug=base64name)) + # do we need to use secure_filename again here? + input_file = os.path.join(app.config["UPLOAD_PATH"], filename) + # write output file with the same name as the input file plus "-cleaned" + output_file = os.path.join( + app.config["UPLOAD_PATH"], os.path.splitext(filename)[0] + "-cleaned.csv" + ) + + args = ["-i", input_file, "-o", output_file] + + if "unsafe" in request.form: + args.append("-u") + + # run subprocess and capture output as UTF-8 so we get a string instead of + # bytes for ansi2html + results = subprocess.run( + ["csv-metadata-quality"] + args, + capture_output=True, + encoding="UTF-8", + ) + # convert the output to HTML using ansi2html + conv = Ansi2HTMLConverter() + stdout_html = conv.convert(results.stdout) + + # render the results to HTML so we can save them for later and allowing + # the user to share the results page without posting the file again. We + # decode base64name before sending it to convert it from bytes to str. + results_html = render_template( + "result.html", + cli_version=cli_version, + filename=filename, + stdout=stdout_html, + base64name=base64name.decode("ascii"), + ) + # save results to a file so it's easy to have a saved results page when + # we don't know the options a user used to POST the form. + results_html_file = os.path.join( + app.config["UPLOAD_PATH"], base64name.decode("ascii") + ) + with open(results_html_file, "w") as fh: + fh.write(results_html) + + return redirect(url_for("results", base64slug=base64name)) return "No file selected" @app.route("/result/") -def process_file(base64slug): - # get filename from base64-encoded slug - filename = b64decode(base64slug).decode("ascii") +def results(base64slug): + results_html_file = os.path.join(app.config["UPLOAD_PATH"], base64slug) + with open(results_html_file, "r") as fh: + results_html = fh.read() - # do we need to use secure_filename again here? - input_file = os.path.join(app.config["UPLOAD_PATH"], filename) - # write output file with the same name as the input file plus "-cleaned" - output_file = os.path.join( - app.config["UPLOAD_PATH"], os.path.splitext(filename)[0] + "-cleaned.csv" - ) - - args = ["-i", input_file, "-o", output_file] - - # run subprocess and capture output as UTF-8 so we get a string instead of - # bytes for ansi2html - results = subprocess.run( - ["csv-metadata-quality"] + args, - capture_output=True, - encoding="UTF-8", - ) - # convert the output to HTML using ansi2html - conv = Ansi2HTMLConverter() - html = conv.convert(results.stdout) - return render_template( - "result.html", - cli_version=cli_version, - filename=filename, - stdout=html, - base64name=base64slug, - ) + return results_html @app.route("/result//download") diff --git a/templates/index.html b/templates/index.html index 65ea1af..a33ea6b 100644 --- a/templates/index.html +++ b/templates/index.html @@ -5,13 +5,21 @@ {% include 'header.html' %}
-

The DSpace CSV Metadata Quality Checker is a pipeline of sanity checks and automated fixes for a number of common issues in metadata files.

+

The DSpace CSV Metadata Quality Checker is a collection of sanity checks and automated fixes for a number of common issues in metadata files.

+

Options

+ +
+ + +
This will remove newlines and perform normalization of Unicode characters. Read more about these unsafe fixes.
+
+
diff --git a/templates/result.html b/templates/result.html index e1dcf69..bb344ff 100644 --- a/templates/result.html +++ b/templates/result.html @@ -4,19 +4,17 @@ {% include 'header.html' %}
-
-

Results

-

Processing {{ filename }}. Download cleaned file.

-
-
- -
-
+
+

The DSpace CSV Metadata Quality Checker is a collection of sanity checks and automated fixes for a number of common issues in metadata files.

+

Results

+

Results for {{ filename }}. Download cleaned file.

+

Log

+

The detailed log of the analysis is below. Green indicates a fix was applied, red indicates an error, and orange indicates a warning.

- {{ stdout | safe }} +{{- stdout | safe -}}
-
+
{% include 'footer.html' %}