feat(mcp-image-gen): add name and count params to generate_image

- Add name (str) param: filename prefix saved as {name}_{timestamp}_{seed}.png
- Add count (int, 1-10) param: generate N images in one call
- Extract _sanitize_name() helper: strips special chars, collapses underscores, caps at 64 chars
- Extract _build_filename() helper: pure function for testable filename construction
- Extract _generate_single() coroutine: clean loop body for batch generation
- Fixed seed batches increment seed per image (seed+i-1) for deterministic variation
- random seed (-1) batches give independent random seeds per image
- Partial batch failures continue (error TextContent in slot, remaining images proceed)
- Returns flat interleaved [Text1, Image1, Text2, Image2, ...] list
- 34/34 tests passing (was 19, added 15 new tests)
This commit is contained in:
Patrick Plate
2026-04-06 07:45:37 +02:00
parent 79a2e1d10a
commit 79f1e6d65f
3 changed files with 794 additions and 45 deletions
+156 -44
View File
@@ -6,6 +6,7 @@ import copy
import json
import os
import random
import re
import time
from datetime import datetime
from pathlib import Path
@@ -22,6 +23,9 @@ COMFYUI_URL = os.environ.get("COMFYUI_URL", "http://localhost:8188").rstrip("/")
IMAGE_OUTPUT_DIR = os.environ.get("IMAGE_OUTPUT_DIR", "~/Pictures/mcp-generated")
COMFYUI_TIMEOUT = int(os.environ.get("COMFYUI_TIMEOUT", "120"))
# Maximum number of images allowed in a single batch call
MAX_COUNT = 10
# Path to the bundled FLUX.1-schnell workflow template
_WORKFLOW_PATH = Path(__file__).parent / "workflows" / "flux_schnell.json"
@@ -126,46 +130,59 @@ def build_flux_workflow(
# ---------------------------------------------------------------------------
# Tools
# Helpers
# ---------------------------------------------------------------------------
@mcp.tool()
async def generate_image(
prompt: str,
width: int = 1024,
height: int = 1024,
steps: int = 4,
model: str = "flux1-schnell.safetensors",
seed: int = -1,
negative_prompt: str = "",
output_dir: str = "",
) -> list:
"""Generate an image from a text prompt using ComfyUI.
def _sanitize_name(name: str) -> str:
"""Sanitize a user-provided name for safe use in filenames.
Returns both a file path (for persistence) and an inline base64 image
(for display in Claude / Roo Code chat).
Replaces whitespace with underscores, strips any characters that are not
alphanumeric, underscores, or hyphens, and collapses consecutive
underscores/hyphens. Returns empty string if nothing usable remains.
"""
name = name.strip()
name = re.sub(r"\s+", "_", name) # spaces → underscores
name = re.sub(r"[^\w\-]", "", name) # strip non-alphanum/underscore/hyphen
name = re.sub(r"[_\-]{2,}", "_", name) # collapse runs
name = name.strip("_-") # trim leading/trailing separators
return name[:64] # cap at 64 chars
def _build_filename(name: str, timestamp: str, actual_seed: int) -> str:
"""Build an output filename from optional name, timestamp and seed."""
sanitized = _sanitize_name(name)
if sanitized:
return f"{sanitized}_{timestamp}_{actual_seed}.png"
return f"{timestamp}_{actual_seed}.png"
async def _generate_single(
client: ComfyUIClient,
prompt: str,
negative_prompt: str,
width: int,
height: int,
steps: int,
seed: int,
model: str,
resolved_output_dir: Path,
name: str,
label: str,
) -> list:
"""Generate a single image and return [TextContent, ImageContent] or [TextContent] on error.
Args:
prompt: Text description of the image to generate.
width: Image width in pixels (default: 1024).
height: Image height in pixels (default: 1024).
steps: Number of inference steps. FLUX.1-schnell works well at 4.
model: ComfyUI model filename (default: flux1-schnell.safetensors).
seed: Random seed for reproducibility. -1 = random.
negative_prompt: Things to exclude from the image (optional).
output_dir: Override output directory. Defaults to IMAGE_OUTPUT_DIR env var
or ~/Pictures/mcp-generated.
Returns:
[TextContent(path + metadata), ImageContent(base64 PNG)]
client: ComfyUIClient instance.
prompt: Positive text prompt.
negative_prompt: Negative text prompt.
width / height: Image dimensions.
steps: Inference steps.
seed: Seed value (-1 = random).
model: ComfyUI model filename.
resolved_output_dir: Resolved output directory Path.
name: User-supplied name prefix (unsanitized).
label: Human-readable label for TextContent prefix (e.g. "[lumen 1/3]").
"""
# Resolve output directory
resolved_output_dir = Path(
output_dir or IMAGE_OUTPUT_DIR
).expanduser().resolve()
client = ComfyUIClient(COMFYUI_URL)
# Build and submit workflow
try:
workflow = build_flux_workflow(
@@ -178,14 +195,13 @@ async def generate_image(
model=model,
)
actual_seed = workflow["_meta"]["actual_seed"]
prompt_id = await client.queue_prompt(workflow)
except httpx.ConnectError:
return [
TextContent(
type="text",
text=(
f"ComfyUI not reachable at {COMFYUI_URL}. "
f"{label} ComfyUI not reachable at {COMFYUI_URL}. "
"Start it with: python main.py --listen"
),
)
@@ -194,7 +210,7 @@ async def generate_image(
return [
TextContent(
type="text",
text=f"ComfyUI returned an error: {e.response.status_code}{e.response.text}",
text=f"{label} ComfyUI returned an error: {e.response.status_code}{e.response.text}",
)
]
@@ -207,7 +223,7 @@ async def generate_image(
TextContent(
type="text",
text=(
f"Generation timed out after {COMFYUI_TIMEOUT}s. "
f"{label} Generation timed out after {COMFYUI_TIMEOUT}s. "
f"prompt_id={prompt_id} — use get_generation_status to check"
),
)
@@ -236,7 +252,7 @@ async def generate_image(
return [
TextContent(
type="text",
text=f"Failed to retrieve generation history: {e}",
text=f"{label} Failed to retrieve generation history: {e}",
)
]
@@ -255,7 +271,7 @@ async def generate_image(
return [
TextContent(
type="text",
text=f"No output image found in history for prompt_id={prompt_id}",
text=f"{label} No output image found in history for prompt_id={prompt_id}",
)
]
@@ -270,7 +286,7 @@ async def generate_image(
return [
TextContent(
type="text",
text=f"Failed to download generated image: {e}",
text=f"{label} Failed to download generated image: {e}",
)
]
@@ -278,14 +294,14 @@ async def generate_image(
try:
resolved_output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{timestamp}_{actual_seed}.png"
filename = _build_filename(name, timestamp, actual_seed)
out_path = resolved_output_dir / filename
out_path.write_bytes(image_bytes)
except OSError as e:
return [
TextContent(
type="text",
text=f"Cannot write to output directory: {resolved_output_dir}{e}",
text=f"{label} Cannot write to output directory: {resolved_output_dir}{e}",
)
]
@@ -296,7 +312,7 @@ async def generate_image(
TextContent(
type="text",
text=(
f"Generated: {out_path}\n"
f"{label} Generated: {out_path}\n"
f"Seed: {actual_seed}\n"
f"Elapsed: {elapsed:.1f}s\n"
f"Size: {width}x{height}, Steps: {steps}, Model: {model}"
@@ -310,6 +326,102 @@ async def generate_image(
]
# ---------------------------------------------------------------------------
# Tools
# ---------------------------------------------------------------------------
@mcp.tool()
async def generate_image(
prompt: str,
width: int = 1024,
height: int = 1024,
steps: int = 4,
model: str = "flux1-schnell.safetensors",
seed: int = -1,
negative_prompt: str = "",
output_dir: str = "",
name: str = "",
count: int = 1,
) -> list:
"""Generate an image from a text prompt using ComfyUI.
Returns both a file path (for persistence) and an inline base64 image
(for display in Claude / Roo Code chat).
Args:
prompt: Text description of the image to generate.
width: Image width in pixels (default: 1024).
height: Image height in pixels (default: 1024).
steps: Number of inference steps. FLUX.1-schnell works well at 4.
model: ComfyUI model filename (default: flux1-schnell.safetensors).
seed: Random seed for reproducibility. -1 = random.
When count > 1 and seed != -1, seeds are incremented per image
(seed, seed+1, seed+2, ...) to produce deterministic variation.
negative_prompt: Things to exclude from the image (optional).
output_dir: Override output directory. Defaults to IMAGE_OUTPUT_DIR env var
or ~/Pictures/mcp-generated.
name: Optional filename prefix. Saved as {name}_{timestamp}_{seed}.png.
Useful to avoid confusion with auto-generated timestamp filenames.
count: Number of images to generate (110). Each image is generated
sequentially. Partial failures are returned inline — the batch
continues even if one image fails.
Returns:
Flat interleaved list: [TextContent1, ImageContent1, TextContent2, ImageContent2, ...]
On error for any single image, that slot contains only [TextContent(error)].
"""
# Validate count
if count < 1:
return [
TextContent(
type="text",
text=f"count must be at least 1 (got {count}).",
)
]
if count > MAX_COUNT:
return [
TextContent(
type="text",
text=f"count must be at most {MAX_COUNT} (got {count}). Use multiple calls for larger batches.",
)
]
# Resolve output directory once
resolved_output_dir = Path(
output_dir or IMAGE_OUTPUT_DIR
).expanduser().resolve()
client = ComfyUIClient(COMFYUI_URL)
results = []
for i in range(1, count + 1):
# Compute seed for this image:
# - seed=-1 → each image gets an independent random seed
# - fixed seed → increment by i-1 for deterministic variation across the batch
image_seed = seed if seed == -1 else seed + (i - 1)
label = f"[{_sanitize_name(name) or 'image'} {i}/{count}]" if count > 1 else (
f"[{_sanitize_name(name)}]" if _sanitize_name(name) else ""
)
single_result = await _generate_single(
client=client,
prompt=prompt,
negative_prompt=negative_prompt,
width=width,
height=height,
steps=steps,
seed=image_seed,
model=model,
resolved_output_dir=resolved_output_dir,
name=name,
label=label,
)
results.extend(single_result)
return results
@mcp.tool()
async def list_available_models() -> list[str]:
"""List all checkpoint models available in ComfyUI.