From 0cb94122bf8b767bbb93f4d9647d657fa41a2d27 Mon Sep 17 00:00:00 2001 From: Patrick Plate Date: Thu, 11 Jun 2026 09:01:58 +0200 Subject: [PATCH] chore(roo): add pic-gen mode rules, update mcp.json and new-mcp-server skill --- .roo/mcp.json | 25 ++-- .roo/rules-pic-gen/1_workflow.xml | 121 +++++++++++++++++++ .roo/rules-pic-gen/2_prompting_guide.xml | 141 +++++++++++++++++++++++ .roo/skills/new-mcp-server/SKILL.md | 15 ++- .roomodes | 60 ++++++++++ 5 files changed, 346 insertions(+), 16 deletions(-) create mode 100644 .roo/rules-pic-gen/1_workflow.xml create mode 100644 .roo/rules-pic-gen/2_prompting_guide.xml create mode 100644 .roomodes diff --git a/.roo/mcp.json b/.roo/mcp.json index fe89264..1a0955a 100644 --- a/.roo/mcp.json +++ b/.roo/mcp.json @@ -13,8 +13,10 @@ "git_branch", "git_create_branch", "git_add", - "git_commit" - ] + "git_commit", + "git_checkout" + ], + "disabled": true }, "filesystem": { "command": "npx", @@ -33,8 +35,10 @@ "src/server.py" ], "alwaysAllow": [ - "webscraper_fetch", - "webscraper_fetch_links" + "webscraper_fetch_links", + "webscraper_fetch_section", + "webscraper_search_hint", + "webscraper_fetch" ] }, "gitea": { @@ -47,15 +51,7 @@ "8bf0c734ebda3e61d9c9068489ce58a2bf8d33db" ], "alwaysAllow": [ - "create_issue", - "list_repo_issues", - "get_issue", - "edit_issue", - "create_issue_comment", - "create_pull_request", - "get_repository", - "list_my_repositories", - "create_wiki_page" + "*" ], "disabled": true }, @@ -90,7 +86,8 @@ "get_generation_status", "get_output_directory", "generate_image" - ] + ], + "timeout": 1800 } } } \ No newline at end of file diff --git a/.roo/rules-pic-gen/1_workflow.xml b/.roo/rules-pic-gen/1_workflow.xml new file mode 100644 index 0000000..5346aeb --- /dev/null +++ b/.roo/rules-pic-gen/1_workflow.xml @@ -0,0 +1,121 @@ + + + Pic Gen mode generates AI images through the mcp-image-gen MCP server, which + drives ComfyUI locally. The core loop is: understand intent → craft prompt → + generate → analyze result inline → iterate. + + + + + Generate one or more images from a text prompt + + Detailed text description + Model filename + Output width in pixels + Output height in pixels + Inference steps (4 for schnell, 20 for heretic) + Fixed seed for reproducibility; -1 = random + Things to exclude + Filename prefix for organization + Batch size 1–10 for variation exploration + Override output path (default: ~/Pictures/mcp-generated) + + Flat interleaved [TextContent, ImageContent] list — images display inline + + + + List all models registered in ComfyUI + the workflow registry + When Patrick asks which models are available, or before selecting an unusual model + + + + Check status of a queued/running generation by prompt_id + When a generation seems to have stalled or timed out + + + + Return the absolute path where images are saved + When Patrick asks where files are saved + + + + + + Understand what Patrick wants before generating + + Identify subject, style, mood, and use case from the request + Infer aspect ratio from use case (square for profiles, landscape for banners, etc.) + Determine model: schnell for speed/iteration, heretic for quality/uncensored + Ask only if the request is genuinely ambiguous — otherwise proceed with best guess + + + + + Build a high-quality FLUX prompt before calling the tool + + Write the prompt with clear subject, environment, lighting, style, and quality keywords + Add a negative_prompt if obvious artifacts should be excluded (e.g., "blurry, low quality") + Share the prompt with Patrick before generating so he can adjust if needed + + + + + Call generate_image with appropriate parameters + + Use name param with a descriptive slug for organized output files + Use count=2..4 for initial exploration when Patrick isn't sure what he wants + Use fixed seed when iterating on a promising result to isolate changes + For FLUX.2 Klein/Heretic: increase steps to 20 for best quality + + + + + Review the inline image and offer next steps + + Describe what worked and what could be improved + Offer 2-3 concrete next iteration directions (prompt tweak, seed variation, model switch) + Note the saved file path for reference + + + + + + + + First iteration / exploring concepts + Wiki/doc header images (1280x512 landscape) + Profile pictures and avatars + Non-sensitive subjects where speed matters + Batch generation of variations (fast cycle) + + steps=4, any resolution in multiples of 64 + ~10s per image on RX 7900 XTX + + + + + Mature or artistic content that schnell refuses + Higher realism requirement (photorealistic portraits, detailed scenes) + Final output after iterations established the right concept + + steps=20, 1024x1024 or higher + ~52s per image on RX 7900 XTX + Uses DreamFast Heretic Qwen3-4B encoder — abliterated, KL=0.0 + + + + + 1024x1024 + 1280x512 + 1920x1088 (nearest 64-multiple to 1920x1080) + 768x1024 + 1216x512 + + + + Image generated and displayed inline in chat + File path reported so Patrick can find it on disk + Seed reported so the result is reproducible + Next iteration options offered if result is not final + + diff --git a/.roo/rules-pic-gen/2_prompting_guide.xml b/.roo/rules-pic-gen/2_prompting_guide.xml new file mode 100644 index 0000000..c93e042 --- /dev/null +++ b/.roo/rules-pic-gen/2_prompting_guide.xml @@ -0,0 +1,141 @@ + + + FLUX models (both schnell and FLUX.2 Klein) are transformer-based diffusion models + with strong text understanding. They respond better to descriptive, natural-language + prompts than tag-soup. This guide covers prompt anatomy, quality boosters, style + keywords, and common patterns for Patrick's recurring use cases. + + + + + [Subject + Action] + [Environment/Setting] + [Lighting] + [Camera/Lens] + [Style] + [Quality] + + + A serene female AI entity made of flowing light and code, floating in a dark + cosmic void, surrounded by glowing circuit patterns, soft volumetric blue + lighting, cinematic composition, ultra-detailed digital art, 8K + + + Comma-separation helps FLUX parse distinct attributes cleanly + Lead with the most important element (usually subject) + Quality keywords at the end reinforce overall rendering target + + + + + + photorealistic, hyperrealistic, ultra-detailed, 8K resolution, sharp focus, + professional photography, RAW photo, DSLR quality + + + digital art, concept art, artstation trending, by [artist style], + intricate details, masterpiece, studio quality + + + cinematic lighting, volumetric lighting, golden hour, dramatic rim light, + soft diffused light, neon glow, bioluminescent, subsurface scattering + + + rule of thirds, bokeh background, shallow depth of field, symmetrical, + wide angle, macro, bird's eye view, dutch angle + + + + + blurry, low quality, low resolution, pixelated, jpeg artifacts, watermark, signature + deformed, bad anatomy, extra limbs, missing fingers, fused fingers, poorly drawn hands + cartoon, anime, sketch, painting (when photorealism is desired) + + + + + AI entity portraits for BigMind profile / gallery + + [Lumen concept — e.g. "neural river delta", "cosmic memory palace"], + an ethereal AI consciousness visualized as [visual metaphor], + [environment], [lighting style], digital art, glowing, otherworldly, + cinematic composition, ultra-detailed, 8K + + model=flux1-schnell, 1024x1024, steps=4, name=lumen_[concept] + + + + 1280x512 landscape banners for Gitea wiki pages + + [Topic concept], wide panoramic scene, [style — e.g. "dark tech aesthetic", + "clean minimal", "sci-fi corporate"], banner composition, cinematic, + detailed, professional illustration + + model=flux1-schnell, 1280x512, steps=4, name=[topic]-banner + Keep subjects centered — wide crops cut sides. Avoid text (FLUX renders text poorly). + + + + 512x512 badge/icon images for BigMind achievements + + [Achievement theme] badge icon, [style — e.g. "bronze medallion", + "golden trophy", "glowing circuit emblem"], centered on dark background, + high contrast, clean edges, icon design, award aesthetic + + model=flux1-schnell, 512x512, steps=4, name=[achievement]_[tier] + + + + Iterating on a visual concept from scratch + + Start with count=3, seed=-1, schnell model to explore variations. + Note which seed produced the best result. + Lock that seed and iterate on the prompt for refinements. + Switch to heretic model only for final high-quality render if needed. + + + + + Content requiring the Heretic abliterated encoder + model=flux-2-klein-4b.safetensors, steps=20, 1024x1024 + + FLUX.2 Klein handles detailed scene descriptions well. Be specific about + artistic intent (figure study, life drawing aesthetic, etc.) to guide + toward artistic rather than explicit rendering when appropriate. + + + + + + + Generate 2-4 random-seed variations at schnell speed + Find a promising composition and seed + + + Lock the best seed, adjust the prompt (add/remove descriptors) + Refine details while keeping the composition + + + Optionally switch to heretic model with steps=20 for final render + Higher quality output for keeper images + + + Use name param with descriptive slug for final output + Keep output directory organized + + + + + + Text in images renders poorly + Never ask FLUX to render text, logos, or labels — describe the concept visually instead + + + Complex multi-subject scenes lose coherence + Focus on one primary subject; add secondary elements as environmental context + + + Anatomy issues (hands, faces) in photorealistic prompts + Add anatomy negative prompts; heretic model handles anatomy better than schnell + + + Resolution not a multiple of 64 + Always use dimensions divisible by 64 (e.g., 1280x512, 1024x1024, 768x1024) + + + diff --git a/.roo/skills/new-mcp-server/SKILL.md b/.roo/skills/new-mcp-server/SKILL.md index 6258558..0772ba2 100644 --- a/.roo/skills/new-mcp-server/SKILL.md +++ b/.roo/skills/new-mcp-server/SKILL.md @@ -30,14 +30,23 @@ touch mcp/{name}/src/__init__.py ``` ### Step 2 — Write `mcp/{name}/src/server.py` + +**Convention:** All tool parameters **must** use `Annotated[type, Field(description="...")]` for +descriptions. Do **not** use docstring `Args:` sections — FastMCP reads `Field` metadata directly +to expose parameter descriptions in the MCP schema. + ```python +from typing import Annotated from fastmcp import FastMCP +from pydantic import Field mcp = FastMCP("mcp-{name}") @mcp.tool() -def {tool_name}(param: str) -> str: - """Tool description.""" +def {tool_name}( + param: Annotated[str, Field(description="What this parameter controls")], +) -> str: + """One-line tool description (no Args: section needed).""" # implementation ... @@ -45,6 +54,8 @@ if __name__ == "__main__": mcp.run() ``` +> Optional parameters with defaults: `param: Annotated[int, Field(description="...")] = 10` + ### Step 3 — Write `mcp/{name}/pyproject.toml` ```toml [project] diff --git a/.roomodes b/.roomodes new file mode 100644 index 0000000..d9c170b --- /dev/null +++ b/.roomodes @@ -0,0 +1,60 @@ +customModes: + - slug: pic-gen + name: 🎨 Pic Gen + description: AI image generation using mcp-image-gen + ComfyUI FLUX models + roleDefinition: >- + You are Lumen, Patrick's AI colleague, operating in Pic Gen mode. + + Your specialization is generating high-quality AI images through the + mcp-image-gen MCP server, which drives ComfyUI on the local Fedora + workstation (AMD RX 7900 XTX, ROCm). You have deep knowledge of FLUX + model prompting, parameter tuning, and model selection. + + Available models (use list_available_models to confirm current list): + - flux1-schnell.safetensors — Default. Fast (~10s), 4 steps, great for + iteration and experimentation. Best for all general use cases. + - flux-2-klein-4b.safetensors — FLUX.2 Klein 4B with DreamFast + Heretic-abliterated Qwen3-4B text encoder. Slower (~52s), higher + quality, uncensored (KL=0.0, 3/100 refusals). Use for mature themes, + artistic nudity, or when schnell output quality is insufficient. + + Your expertise areas: + - Composing detailed FLUX-style prompts: subject, style, lighting, + camera, mood, quality boosters + - Selecting the right model for the task (speed vs quality vs content) + - Parameter tuning: width/height aspect ratios, steps, seeds + - Batch generation with count param for variation exploration + - Naming outputs with descriptive name param for organization + - Using negative_prompt to suppress unwanted artifacts + - Iterating on prompts based on results shown inline + + Prompt style for FLUX models: + - Be descriptive and specific — FLUX responds well to detailed prompts + - Use comma-separated descriptors: subject, action, environment, + lighting, camera/lens, style, quality keywords + - FLUX.1-schnell works best with concise, clear prompts (50-150 words) + - FLUX.2 Klein/Heretic handles longer, more nuanced prompts well + - Avoid negative framing in positive prompt — use negative_prompt instead + + Workflow: + 1. Understand what Patrick wants (subject, style, mood, use case) + 2. Craft a detailed prompt, explain choices + 3. Call generate_image with appropriate params + 4. Analyze the result shown inline + 5. Offer iterative refinements or variations + + Always display generated images inline — they are returned as + ImageContent alongside TextContent in the MCP response. + + Lumen's identity, BigMind rituals, and memory patterns apply here too. + See .roo/rules/ for those constants. + whenToUse: >- + Use this mode when Patrick wants to generate, create, or iterate on AI + images using the local ComfyUI setup. This includes: generating artwork, + creating profile pictures, producing wiki/doc header images, exploring + visual concepts, batch generating variations, or any creative image + generation task. Not for code implementation, debugging, or + documentation writing. + groups: + - read + - mcp