|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# begin prompt_pipeline/entrypoint.py |
| 3 | +# |
| 4 | +# Generate exercise.py from student's prompt.txt via LLM. |
| 5 | +# Called from the Docker container during the prompt pipeline workflow step. |
| 6 | +# |
| 7 | +# Environment variables (set by classroom-prompt-reusable.yml): |
| 8 | +# INPUT_PROMPT-FILE Path to student's prompt.txt inside the container |
| 9 | +# CONTAINER_OUTPUT Directory where exercise.py will be written |
| 10 | +# INPUT_MODEL LLM model name (optional) |
| 11 | +# INPUT_CLAUDE_API_KEY, INPUT_GEMINI-API-KEY, INPUT_GROK-API-KEY, |
| 12 | +# INPUT_NVIDIA-API-KEY, INPUT_PERPLEXITY-API-KEY (at least one required) |
| 13 | + |
| 14 | +import logging |
| 15 | +import os |
| 16 | +import pathlib |
| 17 | +import re |
| 18 | +import sys |
| 19 | + |
| 20 | +logging.basicConfig(level=logging.INFO) |
| 21 | + |
| 22 | +# ai_tutor/ lives one level above prompt_pipeline/ inside the container: |
| 23 | +# /app/ai_tutor/ ← llm_client.py, llm_configs.py, entrypoint.py |
| 24 | +# /app/prompt_pipeline/ ← this file |
| 25 | +_ai_tutor = pathlib.Path(__file__).parent.parent / 'ai_tutor' |
| 26 | +sys.path.insert(0, str(_ai_tutor)) |
| 27 | + |
| 28 | +from llm_client import LLMAPIClient # noqa: E402 |
| 29 | +from entrypoint import get_model_key_from_env, get_config_class # noqa: E402 |
| 30 | + |
| 31 | + |
| 32 | +# Python code detection patterns — prompts containing these are rejected. |
| 33 | +# The intent is to prevent students from submitting code instead of prompts. |
| 34 | +_CODE_PATTERNS = re.compile( |
| 35 | + r'^\s*(def |class |import |from .+ import|if .+:|for .+:|while .+:|print\(|[a-zA-Z_]\w*\s*=\s*)', |
| 36 | + re.MULTILINE, |
| 37 | +) |
| 38 | + |
| 39 | + |
| 40 | +def contains_python_code(text: str) -> bool: |
| 41 | + """Return True if *text* appears to contain Python code constructs.""" |
| 42 | + return bool(_CODE_PATTERNS.search(text)) |
| 43 | + |
| 44 | + |
| 45 | +_SYSTEM_INSTRUCTION = ( |
| 46 | + "You are a Python code generator for a university programming assignment. " |
| 47 | + "The student has described what Python code they need. " |
| 48 | + "Generate clean, correct Python code that satisfies their requirements. " |
| 49 | + "Rules:\n" |
| 50 | + "- Output ONLY a single ```python code block.\n" |
| 51 | + "- Do not include any explanations, prose, or text outside the code block.\n" |
| 52 | + "- All executable code must be inside functions.\n" |
| 53 | + "- Do not import modules that are not needed.\n" |
| 54 | + "- Do not add a main guard or standalone executable code at module level.\n" |
| 55 | +) |
| 56 | + |
| 57 | + |
| 58 | +def extract_python_code(response: str) -> str: |
| 59 | + """Return Python source from the first ```python … ``` block in *response*. |
| 60 | +
|
| 61 | + Falls back to the first plain ``` … ``` block, then to the raw response. |
| 62 | + """ |
| 63 | + for pattern in (r'```python\s*(.*?)```', r'```\s*(.*?)```'): |
| 64 | + match = re.search(pattern, response, re.DOTALL) |
| 65 | + if match: |
| 66 | + return match.group(1).strip() |
| 67 | + return response.strip() |
| 68 | + |
| 69 | + |
| 70 | +def build_question(student_prompt: str) -> str: |
| 71 | + return f"{_SYSTEM_INSTRUCTION}\n\nStudent requirements:\n{student_prompt}" |
| 72 | + |
| 73 | + |
| 74 | +def main() -> None: |
| 75 | + prompt_path = pathlib.Path(os.environ['INPUT_PROMPT-FILE']) |
| 76 | + output_dir = pathlib.Path(os.environ['CONTAINER_OUTPUT']) |
| 77 | + |
| 78 | + if not prompt_path.exists(): |
| 79 | + logging.error("Prompt file not found: %s", prompt_path) |
| 80 | + sys.exit(1) |
| 81 | + |
| 82 | + student_prompt = prompt_path.read_text(encoding='utf-8').strip() |
| 83 | + if not student_prompt: |
| 84 | + logging.error("Prompt file is empty: %s", prompt_path) |
| 85 | + sys.exit(1) |
| 86 | + |
| 87 | + if contains_python_code(student_prompt): |
| 88 | + logging.error( |
| 89 | + "Prompt contains Python code constructs. " |
| 90 | + "Write a natural language description, not code." |
| 91 | + ) |
| 92 | + sys.exit(1) |
| 93 | + |
| 94 | + logging.info("Prompt length: %d chars", len(student_prompt)) |
| 95 | + |
| 96 | + model, api_key = get_model_key_from_env() |
| 97 | + config_class = get_config_class(model) |
| 98 | + config_args = {'api_key': api_key} |
| 99 | + if model: |
| 100 | + config_args['model'] = model |
| 101 | + config = config_class(**config_args) |
| 102 | + client = LLMAPIClient(config) |
| 103 | + |
| 104 | + question = build_question(student_prompt) |
| 105 | + logging.info("Calling %s for code generation...", model) |
| 106 | + response = client.call_api(question) |
| 107 | + |
| 108 | + if not response: |
| 109 | + logging.error("No response from LLM — check API key and model name") |
| 110 | + sys.exit(1) |
| 111 | + |
| 112 | + code = extract_python_code(response) |
| 113 | + if not code: |
| 114 | + logging.error("LLM response contained no Python code block") |
| 115 | + logging.error("Raw response (first 500 chars): %s", response[:500]) |
| 116 | + sys.exit(1) |
| 117 | + |
| 118 | + output_dir.mkdir(parents=True, exist_ok=True) |
| 119 | + output_file = output_dir / 'exercise.py' |
| 120 | + output_file.write_text(code, encoding='utf-8') |
| 121 | + logging.info("exercise.py written to %s (%d chars)", output_file, len(code)) |
| 122 | + |
| 123 | + |
| 124 | +if __name__ == '__main__': |
| 125 | + main() |
| 126 | + |
| 127 | +# end prompt_pipeline/entrypoint.py |
0 commit comments