#!/usr/bin/env python3 """Advanced Skill Initializer Creates a new skill folder with a more complete SKILL.md skeleton that covers the full skill lifecycle: scope → design → validation → testing → packaging → distribution → iteration. Usage: ./init_skill_advanced.py --path Examples: ./init_skill_advanced.py notion-project-setup --path ./skills/public ./init_skill_advanced.py linear-sprint-planner --path /tmp/skills """ from __future__ import annotations import re import sys from datetime import date, timedelta from pathlib import Path SKILL_TEMPLATE_ADV = """--- name: {skill_name} description: "TODO: Write this as a decision boundary: what it does, when to use it, when not to use it, and what successful output looks like. Include real trigger phrases/file types. Keep under 1024 chars." version: {skill_version} metadata: author: "TODO" --- # {skill_title} [TODO: Write a short opening summary in declarative sentences. Explain what this skill is for, what it does, and what it does not do. Do not force this summary into imperative voice.] ## Single responsibility - Primary job: [TODO: the one main thing this skill owns] - Not this skill's job: [TODO] - Split / handoff rule: [TODO: if the request also needs X/Y, hand off or compose with another skill] [TODO: Who this skill acts as, for whom, and from what professional perspective.] Use when: - [TODO] Do not use when: - [TODO] Inputs: - [TODO] Successful output: - [TODO] ## Primary use cases (2-3) 1) **[Use case name]** - Trigger examples: "[user phrasing]", "[paraphrase]" - Required inputs: [TODO] - Expected result: [what done looks like] 2) **[Use case name]** - Trigger examples: ... - Required inputs: ... - Expected result: ... 3) **[Use case name]** (optional) ## Communication notes - User vocabulary: [TODO: terms the user already uses] - Avoid jargon: [TODO: terms to translate or avoid] - Least-surprise rule: [TODO: what users will reasonably expect this skill to do] ## Routing boundaries - Neighboring skills / workflows: [TODO] - Negative triggers: [TODO: what this skill should NOT own] - Handoff rule: [TODO: when another skill should take over] ## Language coverage - Primary language(s): [TODO] - Mixed-language trigger phrases: [TODO] - Locale-specific wording risks: [TODO] ## Host / portability targets - Primary host(s): [TODO: Claude Code / Codex / OpenClaw / OpenAI API / Copilot / VS Code / other] - Secondary host(s): [TODO] - Unsupported host(s): [TODO] - Core portable surface: [TODO: skill pack only / skill + scripts / skill + MCP / skill + OpenAPI] - Host adapters / wrappers needed: [TODO: plugin manifest / marketplace entry / config files] - State / persistence path: [TODO: where mutable data lives; do not store caches/auth/install artifacts inside the skill folder] Quantitative: - Trigger accuracy: [e.g., 90% of relevant queries] - Tool calls: [target range] - Failures: [e.g., 0 failed API calls per workflow] Qualitative: - Minimal user steering - Repeatable output structure - Works for a new user on first try Step 0: Confirm inputs - Action: Read the existing conversation/files first; ask follow-up questions only when a wrong assumption would materially change the outcome. - Input: [TODO: what must be provided by the user before starting] - Output: [TODO: confirmed scope / missing-info list / stop condition] - Validation: [TODO] Step 1: [First major step] - Action: [TODO: imperative instruction] - Input: [TODO] - Output: [TODO] - Validation: [TODO] Step 2: [Next major step] - Action: [TODO: imperative instruction] - Input: [TODO] - Output: [TODO] - Validation: [TODO] Step N: Finalization and QA - Action: Run `python scripts/format_check.py .` (or from the skill-creator toolchain) - Action: Run `python scripts/audit_structure.py . --json` to verify semantic sections and workflow step I/O/validation - Action: Run `python scripts/audit_workflow_contract.py . --json` to verify stop conditions, follow-through policy, and QA pass - Action: Run `python scripts/audit_lifecycle_state.py . --json` to verify `skill_lifecycle.yaml` - Action: Run `python scripts/audit_skill_references.py .` and `python scripts/audit_unreferenced_files.py .` to verify local paths and catch orphaned files - Action: Update `references/readiness_report.md` with pass/fail status and evidence from the checks you ran - Action: Use `references/checklist_template.md` only for manual review notes that cannot be mechanically gated - Output: [TODO: final artifact + QA summary] Return exactly these sections or fields in this order: 1. [TODO] 2. [TODO] 3. [TODO] Formatting rules: - [TODO: Markdown / JSON / SQL / table / etc.] - [TODO: length limits] - [TODO: whether extra sections are allowed] - [TODO: what to do when information is missing] - [TODO: when to call which tool] - [TODO: if a tool has side effects, state the approval rule] - [TODO: if this skill is cross-host, define the minimal shared contract first; prefer conservative JSON Schema + MCP, then add OpenAPI only when needed] - [TODO: if tool/function schema exists, make names, parameter descriptions, enum values, and required fields explicit] - [TODO: put timeout / retry / idempotency / approval rules at the tool gateway layer instead of scattering them across host-specific prompts] - [TODO: keep the active tool set as small as practical] - Directly do: [TODO: low-risk, reversible, no external side effects] - Ask first: [TODO: write operations, destructive actions, external side effects] - Stop and report: [TODO: missing prerequisites, unsafe state, policy conflict] Example 1 Input: - [TODO] Output: [TODO] Example 2 (optional) Input: - [TODO] Output: [TODO] - GPT-style models: [TODO: what should be explicit step-by-step] - Reasoning models: [TODO: goals, constraints, reasoning effort, what NOT to overspecify] - Multi-turn split: [TODO: when to break analysis / execution / drafting / QA into separate turns] ## Testing plan ### Triggering tests - Golden trigger set: - Direct: - [TODO: obvious should-trigger prompt] - Indirect: - [TODO: paraphrase / near-miss that should still trigger] - Negative: - [TODO: similar prompt that should not trigger] - Should trigger: - [TODO] - Should NOT trigger: - [TODO] - Near-miss / confusing cases: - [TODO] - Should ask before acting: - [TODO] ### Functional tests - Test case: [TODO] - Given: - When: - Then: ### Performance comparison (optional) - Baseline (no skill): - With skill: ### ROI guardrail - Quality gain must justify extra: - Time: - Tokens: - Maintenance burden: ### Regression gates - Minimum pass-rate delta: - Maximum allowed time increase: - Maximum allowed token increase: - Maximum under-trigger failures: - Maximum over-trigger failures: ### Feedback loop - Common failure signals: - [TODO] - Likely fix: - [TODO: description / workflow / resources] ### Model / routing checks - GPT-style prompt pass: - [TODO] - Reasoning-model pass: - [TODO] - Neighbor-skill confusion: - [TODO] ### Host compatibility checks - Primary host smoke tests: - [TODO] - Wrapper / manifest / config drift review: - [TODO] - Auth / approval / persistence checks: - [TODO] - Known unsupported hosts: - [TODO] ## Eval workflow - Save approved prompts to `assets/evals/evals.json` - Define release thresholds in `assets/evals/regression_gates.json` - Prepare paired runs with `python scripts/prepare_eval_workspace.py ` - If the environment supports subagents or parallel workers, launch with-skill and baseline runs in the same batch - After runs complete, aggregate results and generate a review viewer - Validate release thresholds with `python scripts/check_regression_gates.py --config assets/evals/regression_gates.json` ## Distribution notes - Packaging: `python scripts/package_skill.py ` - Keep the core skill folder as the single source of truth; host-specific wrappers should stay thin - Document supported hosts, auth requirements, approval boundaries, and persistence expectations outside the skill folder - Repo-level README belongs *outside* this skill folder. ## Troubleshooting - Symptom: - Cause: - Fix: ## Resources This template includes optional resource directories: - `scripts/` for deterministic helpers - `references/` for long docs loaded on demand - `references/readiness_report.md` is mandatory release evidence; keep it versioned and current - `references/checklist_template.md` is a reusable manual review template, not a release gate - `references/migration-governance.md` records rename/deprecate/merge/split compatibility rules - `skill_lifecycle.yaml` records lifecycle state, ownership, review cadence, support matrix, risk, and dependencies - `schemas/` records machine-readable contracts - `policies/` records policy-as-code release, portability, and retirement rules - `examples/` records example-as-test fixtures - `assets/` for templates/fonts/icons used in output and reusable eval fixtures """ EXAMPLE_READINESS_REPORT = """# Readiness report This file is release evidence for the current skill version. It records mechanical gate results and must be updated whenever `SKILL.md`, scripts, references, or eval assets change. ## Final gate - Current version reviewed: [TODO] - Overall status: [PASS / FAIL / BLOCKED] - Blocking issues: - [TODO] - Evidence / commands run: - [TODO] - Audit date: [TODO: YYYY-MM-DD] - Git commit: [TODO: commit hash or local-only] - Audit runner: [TODO: local / CI / host wrapper] ## Format checks - [ ] Folder name is kebab-case - [ ] `SKILL.md` exists (case-sensitive) - [ ] YAML frontmatter starts/ends with `---` - [ ] Frontmatter has `name` + `description` - [ ] No `<` or `>` in frontmatter - [ ] `references/readiness_report.md` is present and updated for this review - [ ] `scripts/`, `references/`, and `assets/` have no unexplained unreferenced files - [ ] No `README.md` inside the skill folder ## Structure checks - [ ] `` exists as a real semantic block - [ ] `` exists as a real semantic block - [ ] `` exists as a real semantic block - [ ] Every workflow step has Action / Input / Output / Validation - [ ] `` exists as a real semantic block - [ ] `` exists as a real semantic block - [ ] At least one worked example exists and is not just a placeholder ## Eval and lifecycle checks - [ ] `assets/evals/evals.json` exists - [ ] `assets/evals/regression_gates.json` exists - [ ] Trigger eval coverage includes should-trigger / should-not-trigger / near-miss - [ ] Trigger eval coverage includes zh / en / mixed language cases - [ ] Functional eval coverage includes happy path / edge case / failure mode - [ ] Benchmark metadata requirements include skill version, git commit, host, model, timestamp, and grader version - [ ] Version and audit date are not stale ## Manual review notes - [ ] Triggers on obvious queries - [ ] Triggers on paraphrases - [ ] Does NOT trigger on unrelated queries - [ ] Does NOT steal queries from neighboring skills - [ ] Works on expected language variants - [ ] If cross-tool, supported / unsupported hosts are explicitly documented - [ ] Description clearly says when to use and when NOT to use the skill - [ ] Skill has one clear primary job - [ ] Instructions use imperative steps with input/output/validation - [ ] Opening summary / Purpose / Scope paragraphs stay descriptive; only actionable instructions use imperative voice - [ ] Core workflow works end-to-end - [ ] Errors handled with actionable guidance - [ ] Output matches required structure - [ ] Output contract is explicit - [ ] Default follow-through policy is explicit - [ ] Examples exist when style/format quality matters - [ ] Tool rules are explicit if the skill uses tools - [ ] If cross-tool, the core skill pack is kept separate from host wrappers / manifests - [ ] If cross-tool, auth / approval / persistence expectations are explicit - [ ] Mutable state / cache / auth artifacts are NOT stored inside the skill folder ## Common error checks - [ ] No missing local paths referenced from `SKILL.md` or `references/*.md` - [ ] No unexplained orphan files remain in `scripts/`, `references/`, or `assets/` - [ ] No contradictory rules between `SKILL.md`, `references/`, and `scripts/` - [ ] No release-blocking `[TODO]` placeholders remain in user-facing instructions - [ ] No hidden side effects bypass the stated follow-through policy - [ ] Neighbor-skill overlap / negative triggers were reviewed after the latest changes - [ ] Host wrappers do NOT fork or silently rewrite the core workflow ## Maintenance - [ ] Version bumped in top-level version - [ ] Changes documented (outside the skill folder, e.g., repo release notes) - [ ] Evals saved to assets/evals/evals.json (if benchmarking this skill) - [ ] Regression gates defined (if benchmarking this skill) - [ ] ROI review completed - [ ] Long workflows are split into stages or multi-turn steps when appropriate - [ ] Model-specific notes added if GPT-style and reasoning models need different guidance """ EXAMPLE_CHECKLIST_TEMPLATE = """# Manual review notes template Use this file for reviewer judgment that is not yet safely mechanical. Do not treat this template as release evidence; copy completed findings into `references/readiness_report.md`. ## Boundary review - Does the skill own one intuitive primary job? - Are neighboring skills and handoffs clear? - Would a new user understand when not to use this skill? ## Trigger review - Are trigger examples realistic? - Are negative triggers specific enough? - Are zh / en / mixed-language phrases covered where relevant? ## Output quality review - Do worked examples teach the intended output quality? - Is the output contract strict enough for repeatable results? - Is the default follow-through policy clear about external side effects? ## Maintenance review - Should this skill be renamed, merged, split, deprecated, or retired? - Are wrappers thin enough to avoid host drift? - Are release notes and public descriptions consistent with the skill folder? """ EXAMPLE_MIGRATION_GOVERNANCE = """# Migration governance This document defines evidence required before renaming, deprecating, merging, splitting, or retiring this skill. ## Rename - Record old name, new name, reason, routing compatibility plan, package path impact, registry impact, and references checked. ## Deprecate - Record deprecation reason, replacement skill or fallback workflow, effective date, removal date, user-facing notice, rollback condition, and eval impact. ## Merge - Record source skills, target skill, boundary rationale, trigger conflict resolution, follow-through policy conflict resolution, eval migration, and wrapper updates. ## Split - Record original skill, new target skills, routing boundary, handoff rules, eval redistribution plan, and compatibility aliases. ## Compatibility - Check package paths, skill names, aliases, catalog entries, README links, registry entries, wrappers, local references, and benchmark workspaces. ## Migration Evidence - Keep migration_type, from, to, effective_date, compatibility_policy, references_checked, evals_updated, wrappers_updated, and release_gate_result. """ EXAMPLE_EVALS_JSON = """{{ "skill_name": "{skill_name}", "benchmark_metadata_required": [ "skill_version", "git_commit", "host", "model", "temperature", "run_timestamp", "grader_version" ], "evals": [ {{ "id": 1, "name": "happy-path", "type": "functional", "language": "mixed", "trigger_class": "direct", "coverage_tags": ["happy-path", "should-trigger"], "prompt": "[TODO: realistic user request]", "expected_output": "[TODO: what success looks like]", "files": [], "expectations": [ "[TODO: verifiable expectation]" ] }} ] }} """ EXAMPLE_REGRESSION_GATES_JSON = """{{ "min_pass_rate_delta": 0.0, "max_time_increase_seconds": 30.0, "max_token_increase": 5000, "require_non_negative_pass_rate": true, "required_eval_tags": [ "should-trigger", "should-not-trigger", "near-miss", "happy-path", "edge-case", "failure-mode" ], "required_eval_languages": ["zh", "en", "mixed"] }} """ EXAMPLE_LIFECYCLE_YAML = """name: {skill_name} status: draft owner: "[TODO]" archetype: ops primary_structure_pattern: pipeline lifecycle: created_at: {today_iso} last_validated_at: null last_released_at: null review_interval_days: 60 next_review_due: {next_review_due} support: primary_hosts: - agent-skills secondary_hosts: [] unsupported_hosts: [] risk: external_side_effects: false requires_secrets: false data_sensitivity: low dependencies: scripts: - scripts/example.py references: - references/readiness_report.md stewardship: reviewers: [] stale_after_days: 120 escalation: stale: needs-maintenance no_owner: orphaned portfolio: overlaps_with: [] hands_off_to: [] depends_on: [] supersedes: [] deprecated_by: null shares_tool_contract_with: [] """ EXAMPLE_RELEASE_POLICY_YAML = """release_policy: required_checks: - format - structure - workflow_contract - lifecycle - lifecycle_state - eval_coverage block_if: - has_todo - missing_owner - stale_audit """ EXAMPLE_PORTABILITY_POLICY_YAML = """portability_policy: core_is_single_source_of_truth: true wrapper_must_be_thin: true wrapper_metadata_required: true mutable_state_inside_skill_folder: false """ EXAMPLE_RETIREMENT_POLICY_YAML = """retirement_policy: minimum_deprecation_days: 90 required_before_retirement: - replacement_released - migration_guide_exists - no_internal_references """ EXAMPLE_SCHEMA_JSON = """{{ "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "{title}", "type": "object" }} """ EXAMPLE_INPUT_MD = """[TODO: Paste a realistic user request for this skill.] """ EXAMPLE_EXPECTED_PROPERTIES_JSON = """{{ "must_include_sections": [ "[TODO: section]" ], "must_not_include": [ "TODO", "待補" ], "minimum_use_cases": 0 }} """ EXAMPLE_SCRIPT_PLACEHOLDER = """#!/usr/bin/env python3 \"\"\"Example helper script for {skill_name}\n\nReplace or delete this file.\n\"\"\"\n\nfrom __future__ import annotations\n\n\ndef main() -> None:\n print(\"TODO: implement helper script\")\n\n\nif __name__ == \"__main__\":\n main()\n""" def title_case_skill_name(skill_name: str) -> str: return " ".join(word.capitalize() for word in skill_name.split("-")) def is_kebab_case(name: str) -> bool: return re.fullmatch(r"[a-z0-9]+(?:-[a-z0-9]+)*", name) is not None def today_version(today: date | None = None) -> str: """Return the date-based skill version without depending on repo-level scripts.""" current = today or date.today() return f"{current.year}.{current.month}.{current.day}" def init_skill(skill_name: str, path: str) -> Path: if not is_kebab_case(skill_name) or len(skill_name) > 64: raise ValueError("skill-name must be kebab-case, <= 64 chars") skill_dir = Path(path).resolve() / skill_name if skill_dir.exists(): raise FileExistsError(f"Skill directory already exists: {skill_dir}") skill_dir.mkdir(parents=True, exist_ok=False) current_date = date.today() # SKILL.md skill_title = title_case_skill_name(skill_name) (skill_dir / "SKILL.md").write_text( SKILL_TEMPLATE_ADV.format( skill_name=skill_name, skill_title=skill_title, skill_version=today_version(), ), encoding="utf-8", ) # scripts/ scripts_dir = skill_dir / "scripts" scripts_dir.mkdir(exist_ok=True) ex = scripts_dir / "example.py" ex.write_text(EXAMPLE_SCRIPT_PLACEHOLDER.format(skill_name=skill_name), encoding="utf-8") ex.chmod(0o755) # references/ refs_dir = skill_dir / "references" refs_dir.mkdir(exist_ok=True) (refs_dir / "readiness_report.md").write_text(EXAMPLE_READINESS_REPORT, encoding="utf-8") (refs_dir / "checklist_template.md").write_text(EXAMPLE_CHECKLIST_TEMPLATE, encoding="utf-8") (refs_dir / "migration-governance.md").write_text(EXAMPLE_MIGRATION_GOVERNANCE, encoding="utf-8") (refs_dir / "fusion-playbook.md").write_text("# Fusion playbook\n\n[TODO]\n", encoding="utf-8") (refs_dir / "retirement-playbook.md").write_text("# Retirement playbook\n\n[TODO]\n", encoding="utf-8") (refs_dir / "telemetry-playbook.md").write_text("# Telemetry playbook\n\n[TODO]\n", encoding="utf-8") (refs_dir / "migration-template.md").write_text("# Migration Guide\n\n[TODO]\n", encoding="utf-8") # lifecycle / schemas / policies / examples (skill_dir / "skill_lifecycle.yaml").write_text( EXAMPLE_LIFECYCLE_YAML.format( skill_name=skill_name, today_iso=current_date.isoformat(), next_review_due=(current_date + timedelta(days=60)).isoformat(), ), encoding="utf-8", ) schemas_dir = skill_dir / "schemas" schemas_dir.mkdir(exist_ok=True) for schema_name, title in { "skill_spec.schema.json": "Skill specification", "lifecycle.schema.json": "Skill lifecycle manifest", "run_trace.schema.json": "Skill runtime trace", "release_evidence.schema.json": "Skill release evidence", }.items(): (schemas_dir / schema_name).write_text(EXAMPLE_SCHEMA_JSON.format(title=title), encoding="utf-8") policies_dir = skill_dir / "policies" policies_dir.mkdir(exist_ok=True) (policies_dir / "release_policy.yaml").write_text(EXAMPLE_RELEASE_POLICY_YAML, encoding="utf-8") (policies_dir / "portability_policy.yaml").write_text(EXAMPLE_PORTABILITY_POLICY_YAML, encoding="utf-8") (policies_dir / "retirement_policy.yaml").write_text(EXAMPLE_RETIREMENT_POLICY_YAML, encoding="utf-8") examples_dir = skill_dir / "examples" / "starter" examples_dir.mkdir(parents=True, exist_ok=True) (examples_dir / "input.md").write_text(EXAMPLE_INPUT_MD, encoding="utf-8") (examples_dir / "expected_properties.json").write_text(EXAMPLE_EXPECTED_PROPERTIES_JSON, encoding="utf-8") # assets/ assets_dir = skill_dir / "assets" assets_dir.mkdir(exist_ok=True) evals_dir = assets_dir / "evals" evals_dir.mkdir(exist_ok=True) (evals_dir / "evals.json").write_text( EXAMPLE_EVALS_JSON.format(skill_name=skill_name), encoding="utf-8", ) (evals_dir / "regression_gates.json").write_text( EXAMPLE_REGRESSION_GATES_JSON, encoding="utf-8", ) return skill_dir def main() -> int: if len(sys.argv) < 4 or sys.argv[2] != "--path": print("Usage: init_skill_advanced.py --path ") return 1 skill_name = sys.argv[1] path = sys.argv[3] try: skill_dir = init_skill(skill_name, path) except Exception as e: print(f"ERROR: {e}") return 1 print(f"Created: {skill_dir}") print("Next steps:") print("1) Edit SKILL.md (decision boundary + workflow + output contract + follow-through policy)") print("2) Decide primary hosts/tool contract early if this skill needs cross-tool portability") print("3) Fill out skill_lifecycle.yaml with owner/status/support/risk/dependencies") print("4) Fill out references/readiness_report.md with mechanical gate evidence") print("5) Add scripts/references/assets/schemas/policies/examples as needed") print("6) Use references/checklist_template.md only for manual review notes") print("7) Run release gate checks (format_check.py + audit_structure.py + audit_lifecycle_state.py + release_gate.py)") print("8) Package with package_skill.py") return 0 if __name__ == "__main__": raise SystemExit(main())