Source code for app.save_formatting

"""
app/save_formatting.py
-----------------------------------------------------------------------------
Formatted Markdown builders and folder-name generator for the save system.

This module produces the human-readable Markdown files and filesystem-safe
folder names that make up a save package.  Each function is a pure formatter:
it takes scalar values and returns a string.  No I/O, no network, no
``app.*`` dependencies — only ``datetime`` from the standard library.

Exports
-------
save_folder_name(timestamp, input_hash) -> str
    Produce a unique folder name for a save operation.

build_output_md(text, model, temperature, max_tokens, seed, timestamp,
                input_hash, *, system_prompt_hash, ipc_id) -> str
    Format the generated LLM output as a Markdown document with a provenance
    header.

build_baseline_md(text, folder_name) -> str
    Format the stored baseline text as a Markdown document.

build_game_log_md(entries, model, temperature, max_tokens, seed, timestamp) -> str
    Format the in-game chat log as a Markdown table with a provenance header.
    Used by ``POST /api/save_chat`` to write ``game_log.md``.

build_system_prompt_md(prompt_text, folder_name) -> str
    Format the system prompt as a Markdown document with a fenced code block.

Design notes
------------
These functions were extracted from ``main.py`` so that:

1. The save route handler can remain a thin orchestrator.
2. Formatting logic can be unit-tested without the HTTP layer.
3. ``build_output_md`` accepts scalar parameters (not ``SaveRequest``)
   to avoid tight coupling between formatting and the request schema.
"""

from __future__ import annotations

from datetime import datetime

# -----------------------------------------------------------------------------
# Folder name
# -----------------------------------------------------------------------------


[docs] def save_folder_name(timestamp: datetime, input_hash: str) -> str: """ Produce a unique folder name for a save operation. Format: ``YYYYMMDD_HHMMSS_<8-char-hash-prefix>`` Example: ``20260218_143022_d845cdcf`` The 8-character hash prefix provides practical uniqueness even when two saves occur within the same second (different payload → different hash suffix). The format uses only digits, underscores, and lowercase hex characters, making it safe for all major filesystems. Parameters ---------- timestamp : UTC datetime of the save (passed in so the folder name stays consistent with the ``metadata.json`` timestamp). input_hash : Full 64-char SHA-256 hex digest of the AxisPayload. Returns ------- str : Folder name safe for all major filesystems. """ date_part = timestamp.strftime("%Y%m%d_%H%M%S") hash_part = input_hash[:8] return f"{date_part}_{hash_part}"
# ----------------------------------------------------------------------------- # Markdown builders # -----------------------------------------------------------------------------
[docs] def build_output_md( text: str, model: str, temperature: float, max_tokens: int, seed: int, timestamp: datetime, input_hash: str, *, system_prompt_hash: str | None = None, ipc_id: str | None = None, ) -> str: """ Format the generated LLM output as a Markdown document. Includes an HTML-comment provenance header (model, temperature, seed, hashes) so the file is self-documenting when opened in any Markdown viewer. The IPC hashes are included when available so saved files carry a complete reproducibility record. Parameters ---------- text : The raw LLM-generated text. model : Ollama model identifier used for the generation. temperature : Sampling temperature used. max_tokens : Token budget used. seed : RNG seed from the AxisPayload. timestamp : UTC datetime of the save (for the provenance header). input_hash : SHA-256 of the payload. system_prompt_hash : SHA-256 of the normalised system prompt (optional). ipc_id : Interpretive Provenance Chain identifier (optional). Returns ------- str : Markdown string ready to write to disk. """ lines = [ "# Output", "", "<!-- Axis Descriptor Lab – generated output -->", f"<!-- saved: {timestamp.isoformat()} -->", f"<!-- model: {model} | temp: {temperature} | max_tokens: {max_tokens} -->", f"<!-- seed: {seed} | input_hash: {input_hash[:16]}... -->", ] # Append IPC provenance hashes when available so the saved file carries # a complete reproducibility record without needing metadata.json. if system_prompt_hash: lines.append(f"<!-- system_prompt_hash: {system_prompt_hash[:16]}... -->") if ipc_id: lines.append(f"<!-- ipc_id: {ipc_id[:16]}... -->") lines += ["", text, ""] return "\n".join(lines)
[docs] def build_baseline_md(text: str, folder_name: str) -> str: """ Format the stored baseline text as a Markdown document. Parameters ---------- text : The baseline text (state.baseline from the frontend). folder_name : Save folder name (used in the provenance comment). Returns ------- str : Markdown string ready to write to disk. """ lines = [ "# Baseline (A)", "", f"<!-- Axis Descriptor Lab – baseline text for save {folder_name} -->", "", text, "", ] return "\n".join(lines)
[docs] def build_game_log_md( entries: list[dict], model: str, temperature: float, max_tokens: int, seed: int, timestamp: datetime, ) -> str: """ Format the in-game chat log as a Markdown document. Produces a Markdown table with one row per entry and an HTML-comment provenance header recording model and generation settings. Pipe characters inside OOC and IC text are backslash-escaped so they do not break the table structure. Table columns ------------- ``#`` — 1-based row index. ``Char`` — Character key uppercased ("A" or "B"). ``OOC`` — Original out-of-character message (empty cell if absent). ``Channel`` — Chat channel ("say", "yell", "whisper"). ``IC Text`` — Translated in-character dialogue (error detail for failures). ``Status`` — ``ok`` for success, short failure reason otherwise. ``Duration`` — Round-trip time in seconds (e.g. ``0.8s``), blank if unknown. ``Sent`` — ``HH:MM:SS`` from ``sent_at``, blank if unknown. ``Gap`` — Seconds since previous entry's ``sent_at``, blank for row 1. Parameters ---------- entries : Serialised ``ChatLogEntry`` dicts (keys: ch, channel, ooc_message, ic_text, model, ipc_id, status, error_detail, sent_at, duration_ms). ``ooc_message`` may be absent or ``None`` for legacy entries. Timing/status fields default to success/None for backward compatibility. model : Ollama model tag used during the session. temperature : Sampling temperature used. max_tokens : Token budget used. seed : Seed value used. timestamp : UTC datetime of the save (for the provenance header). Returns ------- str : Markdown string ready to write to disk. """ lines = [ "# In-Game Log", "", "<!-- Axis Descriptor Lab – chat translation log -->", f"<!-- saved: {timestamp.isoformat()} -->", f"<!-- model: {model} | temp: {temperature} | max_tokens: {max_tokens} | seed: {seed} -->", "", "| # | Char | OOC | Channel | IC Text | Status | Duration | Sent | Gap |", "| --- | --- | --- | --- | --- | --- | --- | --- | --- |", ] prev_sent_at: str | None = None for i, entry in enumerate(entries, start=1): # Escape pipe characters in both OOC and IC text to avoid breaking # the Markdown table structure. ooc_raw = entry.get("ooc_message") or "" ooc_escaped = ooc_raw.replace("|", "\\|") status = entry.get("status") or "success" ic_raw = entry.get("ic_text") or "" # For failed entries, show the error detail in the IC Text column. if status != "success" and not ic_raw: ic_raw = entry.get("error_detail") or "" ic_escaped = ic_raw.replace("|", "\\|") # Status column: short label. status_label = "ok" if status == "success" else status.replace("fallback.", "") # Duration column: seconds with 1 decimal. duration_ms = entry.get("duration_ms") duration_str = f"{duration_ms / 1000:.1f}s" if duration_ms is not None else "" # Sent column: HH:MM:SS from sent_at ISO timestamp. sent_at = entry.get("sent_at") sent_str = "" if sent_at: try: sent_str = datetime.fromisoformat(sent_at).strftime("%H:%M:%S") except (ValueError, TypeError): sent_str = "" # Gap column: seconds since previous entry's sent_at. gap_str = "" if sent_at and prev_sent_at: try: curr = datetime.fromisoformat(sent_at) prev = datetime.fromisoformat(prev_sent_at) gap_secs = (curr - prev).total_seconds() gap_str = f"{gap_secs:.1f}s" except (ValueError, TypeError): gap_str = "" prev_sent_at = sent_at lines.append( f"| {i} | {entry['ch'].upper()} | {ooc_escaped} | {entry['channel']}" f" | {ic_escaped} | {status_label} | {duration_str} | {sent_str} | {gap_str} |" ) lines.append("") return "\n".join(lines)
[docs] def build_system_prompt_md(prompt_text: str, folder_name: str) -> str: """ Format the system prompt as a Markdown document with a fenced code block. Wrapping in a fenced code block preserves all whitespace and makes the prompt clearly machine-readable when opened in a Markdown viewer. Parameters ---------- prompt_text : The system prompt string (may be multi-line). folder_name : Save folder name (for the provenance comment). Returns ------- str : Markdown string ready to write to disk. """ lines = [ "# System Prompt", "", f"<!-- Axis Descriptor Lab – system prompt for save {folder_name} -->", "", "```text", prompt_text, "```", "", ] return "\n".join(lines)