import traceback import sys, os import html import re import polars as pl sys.path.insert( 0, os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), "../smithy/src") ), ) from smithy import smith_set_from_rcv print("Content-Type: text/html\n") message = "" content_type = os.environ.get("CONTENT_TYPE", "") content_length = int(os.environ.get("CONTENT_LENGTH", 0) or 0) boundary = content_type.split("boundary=")[-1].encode() body = sys.stdin.buffer.read(content_length) parts = body.split(b"--" + boundary) spreadsheet = None for part in parts: if b"Content-Disposition" in part and b'name="spreadsheet"' in part: header, _, data = part.partition(b"\r\n\r\n") filename_match = re.search(rb'filename="([^"]+)"', header) if filename_match: filename = filename_match.group(1).decode() filedata = data.rstrip(b"\r\n--") spreadsheet = (filename, filedata) break if spreadsheet is not None: filename, filedata = spreadsheet if filename and filedata: filepath = os.path.join("/tmp", filename) with open(filepath, "wb") as f: f.write(filedata) try: if filename.endswith(".csv"): df = pl.read_csv(filepath) elif filename.endswith((".xlsx", ".xls")): df = pl.read_excel(filepath) else: message = """
File extension is not valid. Use CSV (.csv) or Excel (.xlsx, .xls).
""" if df is not None: # Normalize df = df.with_columns( [ pl.col(c) .cast(pl.Utf8) .str.strip_chars() .cast(pl.Int64, strict=False) .fill_null(0) for c in df.columns ] ) smiths = smith_set_from_rcv(df) # Solve! message = f"""DataFrame was empty.
""" except Exception as e: message = f"""Internal Error Encountered: {e}
""" traceback.print_exc() else: message = """Filename or File Data not found/valid in form submission.
""" else: message = """No file field found in the form.
""" print("""