Source code for app.save_formatting

"""
app/save_formatting.py
-----------------------------------------------------------------------------
Formatted Markdown builders and folder-name generator for the save system.

This module produces the human-readable Markdown files and filesystem-safe
folder names that make up a save package.  Each function is a pure formatter:
it takes scalar values and returns a string.  No I/O, no network, no
``app.*`` dependencies — only ``datetime`` from the standard library.

Exports
-------
save_folder_name(timestamp, input_hash) -> str
    Produce a unique folder name for a save operation.

build_output_md(text, model, temperature, max_tokens, seed, timestamp,
                input_hash, *, system_prompt_hash, ipc_id) -> str
    Format the generated LLM output as a Markdown document with a provenance
    header.

build_baseline_md(text, folder_name) -> str
    Format the stored baseline text as a Markdown document.

build_game_log_md(entries, model, temperature, max_tokens, seed, timestamp) -> str
    Format the in-game chat log as a Markdown table with a provenance header.
    Used by ``POST /api/save_chat`` to write ``game_log.md``.

build_system_prompt_md(prompt_text, folder_name) -> str
    Format the system prompt as a Markdown document with a fenced code block.

Design notes
------------
These functions were extracted from ``main.py`` so that:

1. The save route handler can remain a thin orchestrator.
2. Formatting logic can be unit-tested without the HTTP layer.
3. ``build_output_md`` accepts scalar parameters (not ``SaveRequest``)
   to avoid tight coupling between formatting and the request schema.
"""

from __future__ import annotations

from datetime import datetime

# -----------------------------------------------------------------------------
# Folder name
# -----------------------------------------------------------------------------



[docs]
def save_folder_name(timestamp: datetime, input_hash: str) -> str:
    """
    Produce a unique folder name for a save operation.

    Format: ``YYYYMMDD_HHMMSS_<8-char-hash-prefix>``

    Example: ``20260218_143022_d845cdcf``

    The 8-character hash prefix provides practical uniqueness even when two
    saves occur within the same second (different payload → different hash
    suffix).  The format uses only digits, underscores, and lowercase hex
    characters, making it safe for all major filesystems.

    Parameters
    ----------
    timestamp  : UTC datetime of the save (passed in so the folder name
                 stays consistent with the ``metadata.json`` timestamp).
    input_hash : Full 64-char SHA-256 hex digest of the AxisPayload.

    Returns
    -------
    str : Folder name safe for all major filesystems.
    """
    date_part = timestamp.strftime("%Y%m%d_%H%M%S")
    hash_part = input_hash[:8]
    return f"{date_part}_{hash_part}"



# -----------------------------------------------------------------------------
# Markdown builders
# -----------------------------------------------------------------------------



[docs]
def build_output_md(
    text: str,
    model: str,
    temperature: float,
    max_tokens: int,
    seed: int,
    timestamp: datetime,
    input_hash: str,
    *,
    system_prompt_hash: str | None = None,
    ipc_id: str | None = None,
) -> str:
    """
    Format the generated LLM output as a Markdown document.

    Includes an HTML-comment provenance header (model, temperature, seed,
    hashes) so the file is self-documenting when opened in any Markdown
    viewer.  The IPC hashes are included when available so saved files
    carry a complete reproducibility record.

    Parameters
    ----------
    text               : The raw LLM-generated text.
    model              : Ollama model identifier used for the generation.
    temperature        : Sampling temperature used.
    max_tokens         : Token budget used.
    seed               : RNG seed from the AxisPayload.
    timestamp          : UTC datetime of the save (for the provenance header).
    input_hash         : SHA-256 of the payload.
    system_prompt_hash : SHA-256 of the normalised system prompt (optional).
    ipc_id             : Interpretive Provenance Chain identifier (optional).

    Returns
    -------
    str : Markdown string ready to write to disk.
    """
    lines = [
        "# Output",
        "",
        "<!-- Axis Descriptor Lab – generated output -->",
        f"<!-- saved: {timestamp.isoformat()} -->",
        f"<!-- model: {model} | temp: {temperature} | max_tokens: {max_tokens} -->",
        f"<!-- seed: {seed} | input_hash: {input_hash[:16]}... -->",
    ]

    # Append IPC provenance hashes when available so the saved file carries
    # a complete reproducibility record without needing metadata.json.
    if system_prompt_hash:
        lines.append(f"<!-- system_prompt_hash: {system_prompt_hash[:16]}... -->")
    if ipc_id:
        lines.append(f"<!-- ipc_id: {ipc_id[:16]}... -->")

    lines += ["", text, ""]
    return "\n".join(lines)




[docs]
def build_baseline_md(text: str, folder_name: str) -> str:
    """
    Format the stored baseline text as a Markdown document.

    Parameters
    ----------
    text        : The baseline text (state.baseline from the frontend).
    folder_name : Save folder name (used in the provenance comment).

    Returns
    -------
    str : Markdown string ready to write to disk.
    """
    lines = [
        "# Baseline (A)",
        "",
        f"<!-- Axis Descriptor Lab – baseline text for save {folder_name} -->",
        "",
        text,
        "",
    ]
    return "\n".join(lines)




[docs]
def build_game_log_md(
    entries: list[dict],
    model: str,
    temperature: float,
    max_tokens: int,
    seed: int,
    timestamp: datetime,
) -> str:
    """
    Format the in-game chat log as a Markdown document.

    Produces a Markdown table with one row per entry and an HTML-comment
    provenance header recording model and generation settings.  Pipe
    characters inside OOC and IC text are backslash-escaped so they do not
    break the table structure.

    Table columns
    -------------
    ``#``        — 1-based row index.
    ``Char``     — Character key uppercased ("A" or "B").
    ``OOC``      — Original out-of-character message (empty cell if absent).
    ``Channel``  — Chat channel ("say", "yell", "whisper").
    ``IC Text``  — Translated in-character dialogue (error detail for failures).
    ``Status``   — ``ok`` for success, short failure reason otherwise.
    ``Duration`` — Round-trip time in seconds (e.g. ``0.8s``), blank if unknown.
    ``Sent``     — ``HH:MM:SS`` from ``sent_at``, blank if unknown.
    ``Gap``      — Seconds since previous entry's ``sent_at``, blank for row 1.

    Parameters
    ----------
    entries     : Serialised ``ChatLogEntry`` dicts (keys: ch, channel,
                  ooc_message, ic_text, model, ipc_id, status, error_detail,
                  sent_at, duration_ms).  ``ooc_message`` may be absent or
                  ``None`` for legacy entries.  Timing/status fields default
                  to success/None for backward compatibility.
    model       : Ollama model tag used during the session.
    temperature : Sampling temperature used.
    max_tokens  : Token budget used.
    seed        : Seed value used.
    timestamp   : UTC datetime of the save (for the provenance header).

    Returns
    -------
    str : Markdown string ready to write to disk.
    """
    lines = [
        "# In-Game Log",
        "",
        "<!-- Axis Descriptor Lab – chat translation log -->",
        f"<!-- saved: {timestamp.isoformat()} -->",
        f"<!-- model: {model} | temp: {temperature} | max_tokens: {max_tokens} | seed: {seed} -->",
        "",
        "| # | Char | OOC | Channel | IC Text | Status | Duration | Sent | Gap |",
        "| --- | --- | --- | --- | --- | --- | --- | --- | --- |",
    ]
    prev_sent_at: str | None = None
    for i, entry in enumerate(entries, start=1):
        # Escape pipe characters in both OOC and IC text to avoid breaking
        # the Markdown table structure.
        ooc_raw = entry.get("ooc_message") or ""
        ooc_escaped = ooc_raw.replace("|", "\\|")

        status = entry.get("status") or "success"
        ic_raw = entry.get("ic_text") or ""
        # For failed entries, show the error detail in the IC Text column.
        if status != "success" and not ic_raw:
            ic_raw = entry.get("error_detail") or ""
        ic_escaped = ic_raw.replace("|", "\\|")

        # Status column: short label.
        status_label = "ok" if status == "success" else status.replace("fallback.", "")

        # Duration column: seconds with 1 decimal.
        duration_ms = entry.get("duration_ms")
        duration_str = f"{duration_ms / 1000:.1f}s" if duration_ms is not None else ""

        # Sent column: HH:MM:SS from sent_at ISO timestamp.
        sent_at = entry.get("sent_at")
        sent_str = ""
        if sent_at:
            try:
                sent_str = datetime.fromisoformat(sent_at).strftime("%H:%M:%S")
            except (ValueError, TypeError):
                sent_str = ""

        # Gap column: seconds since previous entry's sent_at.
        gap_str = ""
        if sent_at and prev_sent_at:
            try:
                curr = datetime.fromisoformat(sent_at)
                prev = datetime.fromisoformat(prev_sent_at)
                gap_secs = (curr - prev).total_seconds()
                gap_str = f"{gap_secs:.1f}s"
            except (ValueError, TypeError):
                gap_str = ""
        prev_sent_at = sent_at

        lines.append(
            f"| {i} | {entry['ch'].upper()} | {ooc_escaped} | {entry['channel']}"
            f" | {ic_escaped} | {status_label} | {duration_str} | {sent_str} | {gap_str} |"
        )
    lines.append("")
    return "\n".join(lines)




[docs]
def build_system_prompt_md(prompt_text: str, folder_name: str) -> str:
    """
    Format the system prompt as a Markdown document with a fenced code block.

    Wrapping in a fenced code block preserves all whitespace and makes the
    prompt clearly machine-readable when opened in a Markdown viewer.

    Parameters
    ----------
    prompt_text : The system prompt string (may be multi-line).
    folder_name : Save folder name (for the provenance comment).

    Returns
    -------
    str : Markdown string ready to write to disk.
    """
    lines = [
        "# System Prompt",
        "",
        f"<!-- Axis Descriptor Lab – system prompt for save {folder_name} -->",
        "",
        "```text",
        prompt_text,
        "```",
        "",
    ]
    return "\n".join(lines)