# SQL Fundamentals — Interactive Workshop Scenarios
# Uses SQLite. Setup creates DB files; student writes queries in queries.sql.

scenarios:
  - id: basic-select
    title: "SELECT with WHERE"
    difficulty: beginner
    xp: 20
    setup:
      - "mkdir -p basic-select && cd basic-select"
      - "sqlite3 workshop.db \"CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT, email TEXT, role TEXT); INSERT INTO users VALUES (1, 'Alice', 'alice@example.com', 'admin'); INSERT INTO users VALUES (2, 'Bob', 'bob@example.com', 'user'); INSERT INTO users VALUES (3, 'Carol', 'carol@example.com', 'user'); INSERT INTO users VALUES (4, 'Dave', 'dave@example.com', 'guest'); INSERT INTO users VALUES (5, 'Eve', 'eve@example.com', 'admin');\""
    files:
      - name: queries.sql
        content: |
          -- Write a SELECT query that returns users with role = 'admin'
          -- Run with: sqlite3 workshop.db < queries.sql
        language: sql
        readonly: false
    task: "Write a SELECT query that returns only users whose role is 'admin'. Run it with sqlite3 workshop.db < queries.sql. You should see Alice and Eve."
    validations:
      - label: "Query returns admin users"
        check: output-contains
        pattern: "Alice|Eve"
    hints:
      - "Use SELECT * FROM users WHERE role = 'admin';"
      - "Remember the semicolon at the end of your query."
      - "You can also SELECT specific columns: SELECT name, email FROM users WHERE role = 'admin';"
    agent_prompts:
      on_start: "You have five users. How would you pick only those with role 'admin'? What part of SQL filters rows?"

  - id: joins
    title: "JOIN Queries"
    difficulty: beginner
    xp: 25
    setup:
      - "mkdir -p joins && cd joins"
      - "sqlite3 workshop.db \"CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT); CREATE TABLE orders (id INTEGER PRIMARY KEY, user_id INTEGER, product TEXT, amount REAL); INSERT INTO users VALUES (1, 'Alice'); INSERT INTO users VALUES (2, 'Bob'); INSERT INTO users VALUES (3, 'Carol'); INSERT INTO orders VALUES (1, 1, 'Widget', 29.99); INSERT INTO orders VALUES (2, 1, 'Gadget', 49.99); INSERT INTO orders VALUES (3, 2, 'Widget', 29.99); INSERT INTO orders VALUES (4, 3, 'Thing', 19.99);\""
    files:
      - name: queries.sql
        content: |
          -- Write a JOIN query to show each order with the customer's name
          -- Run with: sqlite3 workshop.db < queries.sql
        language: sql
        readonly: false
    task: "Write an INNER JOIN query that returns each order's id, product, amount, and the user's name. The orders table has user_id linking to users.id. Run with sqlite3 workshop.db < queries.sql."
    validations:
      - label: "JOIN returns order and user data"
        check: output-contains
        pattern: "Alice|Bob|Carol"
    hints:
      - "INNER JOIN connects rows from two tables: SELECT orders.id, orders.product, orders.amount, users.name FROM orders JOIN users ON orders.user_id = users.id;"
      - "Alias tables for brevity: FROM orders o JOIN users u ON o.user_id = u.id"
      - "You should see 4 rows — one per order, each with a customer name."
    agent_prompts:
      on_start: "Orders have user_id but not the user's name. How can you combine data from two tables in one query?"

  - id: group-by
    title: "GROUP BY and Aggregation"
    difficulty: intermediate
    xp: 30
    setup:
      - "mkdir -p group-by && cd group-by"
      - "sqlite3 workshop.db \"CREATE TABLE sales (id INTEGER PRIMARY KEY, region TEXT, product TEXT, amount REAL); INSERT INTO sales VALUES (1, 'North', 'A', 100); INSERT INTO sales VALUES (2, 'North', 'A', 150); INSERT INTO sales VALUES (3, 'North', 'B', 80); INSERT INTO sales VALUES (4, 'South', 'A', 120); INSERT INTO sales VALUES (5, 'South', 'B', 90); INSERT INTO sales VALUES (6, 'South', 'B', 110);\""
    files:
      - name: queries.sql
        content: |
          -- Write a query that groups by region and product, sums amount,
          -- and only includes groups where the sum is >= 200
          -- Run with: sqlite3 workshop.db < queries.sql
        language: sql
        readonly: false
    task: "Write a GROUP BY query that shows total sales amount per region and product. Add a HAVING clause to show only groups where the total is 200 or more. Run with sqlite3 workshop.db < queries.sql."
    validations:
      - label: "GROUP BY with HAVING returns aggregated results"
        check: output-contains
        pattern: "North|250|South"
    hints:
      - "GROUP BY region, product with SUM(amount)."
      - "HAVING filters after aggregation: HAVING SUM(amount) >= 200"
      - "North,A = 250; South,B = 200. Both should appear."
    agent_prompts:
      on_start: "You want totals per region and product. What does GROUP BY do? And when would you use HAVING instead of WHERE?"

  - id: subqueries
    title: "Correlated Subqueries"
    difficulty: intermediate
    xp: 35
    setup:
      - "mkdir -p subqueries && cd subqueries"
      - "sqlite3 workshop.db \"CREATE TABLE categories (id INTEGER PRIMARY KEY, name TEXT); CREATE TABLE products (id INTEGER PRIMARY KEY, category_id INTEGER, name TEXT, price REAL); INSERT INTO categories VALUES (1, 'Electronics'); INSERT INTO categories VALUES (2, 'Books'); INSERT INTO products VALUES (1, 1, 'Laptop', 999); INSERT INTO products VALUES (2, 1, 'Phone', 599); INSERT INTO products VALUES (3, 2, 'SQL Book', 39); INSERT INTO products VALUES (4, 2, 'React Book', 49);\""
    files:
      - name: queries.sql
        content: |
          -- Write a correlated subquery to find products priced above
          -- their category's average
          -- Run with: sqlite3 workshop.db < queries.sql
        language: sql
        readonly: false
    task: "Write a correlated subquery that returns products whose price is above the average price for their category. Electronics avg = 799; Books avg = 44. You should get Laptop (999) and React Book (49). Run with sqlite3 workshop.db < queries.sql."
    validations:
      - label: "Correlated subquery returns correct products"
        check: output-contains
        pattern: "Laptop|React"
    hints:
      - "A correlated subquery references the outer query: WHERE price > (SELECT AVG(price) FROM products p2 WHERE p2.category_id = products.category_id)"
      - "The subquery runs once per row in the outer query."
      - "Laptop (999) > 799; React Book (49) > 44. Both qualify."
    agent_prompts:
      on_start: "How would you find the average price per category? Now, how would you compare each product's price to that average?"
