1616import uuid
1717import time
1818import os
19+ import re
1920import boto3
2021
2122dynamodb = boto3 .resource ("dynamodb" )
2829VALID_PYTHON_VERSIONS = ["3.10" , "3.11" , "3.12" , "3.13" , "3.14" ]
2930VALID_ARCHITECTURES = ["x86_64" , "arm64" ]
3031MAX_REQUIREMENTS_LENGTH = 10000 # 10KB max
32+ MAX_ACTIVE_BUILDS = int (os .environ .get ("MAX_ACTIVE_BUILDS" , "10" ))
3133
3234
3335def handler (event , context ):
@@ -47,6 +49,10 @@ def handler(event, context):
4749 "error" : f"requirements too large (max { MAX_REQUIREMENTS_LENGTH } chars)"
4850 })
4951
52+ req_error = _validate_requirements (requirements )
53+ if req_error :
54+ return _response (400 , {"error" : req_error })
55+
5056 python_version = body .get ("python_version" , "3.13" )
5157 if python_version not in VALID_PYTHON_VERSIONS :
5258 return _response (400 , {
@@ -67,6 +73,35 @@ def handler(event, context):
6773 if not isinstance (single_file , bool ):
6874 return _response (400 , {"error" : "single_file must be a boolean" })
6975
76+ # --- Enforce concurrent-build cap ---
77+ # Sum approximate queued + in-flight messages to estimate active builds.
78+ # This prevents a single caller from queueing unbounded EC2 Spot launches.
79+ try :
80+ attrs = sqs .get_queue_attributes (
81+ QueueUrl = QUEUE_URL ,
82+ AttributeNames = [
83+ "ApproximateNumberOfMessages" ,
84+ "ApproximateNumberOfMessagesNotVisible" ,
85+ ],
86+ )["Attributes" ]
87+ active = (
88+ int (attrs .get ("ApproximateNumberOfMessages" , 0 ))
89+ + int (attrs .get ("ApproximateNumberOfMessagesNotVisible" , 0 ))
90+ )
91+ if active >= MAX_ACTIVE_BUILDS :
92+ return _response (
93+ 429 ,
94+ {
95+ "error" : (
96+ f"Build queue is at capacity ({ MAX_ACTIVE_BUILDS } concurrent builds). "
97+ "Please retry later."
98+ )
99+ },
100+ )
101+ except Exception as e :
102+ print (f"WARNING: could not check queue depth: { e } " )
103+ # Fail open — allow the build rather than blocking on a transient error.
104+
70105 # --- Create build record ---
71106 build_id = str (uuid .uuid4 ())
72107 now = int (time .time ())
@@ -113,6 +148,47 @@ def handler(event, context):
113148 })
114149
115150
151+ def _validate_requirements (requirements : str ):
152+ """
153+ Validate requirements.txt content.
154+
155+ Returns an error string if invalid, or None if the content is acceptable.
156+
157+ Rules:
158+ - URL-based installs (git+, http://, https://, file://, vcs+...) are rejected.
159+ They bypass PyPI and allow arbitrary code to be pulled from any host.
160+ - Recursive includes (-r / --requirement) and constraint files
161+ (-c / --constraint) are rejected to prevent file-system reads on the
162+ builder instance.
163+ - Lines with obvious shell metacharacters (;, |, &, $, `) are rejected
164+ as a defence-in-depth measure against injection in downstream tools.
165+ """
166+ URL_PREFIXES = ("git+" , "http://" , "https://" , "file://" , "svn+" , "hg+" , "bzr+" )
167+ BLOCKED_FLAGS = ("-r " , "-r\t " , "--requirement" , "-c " , "-c\t " , "--constraint" )
168+ SHELL_META = re .compile (r'[;|&$`]' )
169+
170+ for lineno , raw_line in enumerate (requirements .splitlines (), 1 ):
171+ line = raw_line .strip ()
172+ if not line or line .startswith ("#" ):
173+ continue
174+ if any (line .startswith (p ) for p in URL_PREFIXES ):
175+ return (
176+ f"Line { lineno } : URL-based requirements are not allowed. "
177+ "Specify packages by name and version (e.g., requests==2.32.4)."
178+ )
179+ if any (line .startswith (f ) for f in BLOCKED_FLAGS ):
180+ return (
181+ f"Line { lineno } : recursive includes (-r) and constraint files (-c) "
182+ "are not allowed."
183+ )
184+ if SHELL_META .search (line ):
185+ return (
186+ f"Line { lineno } : shell metacharacters (;, |, &, $, `) are not "
187+ "allowed in requirements."
188+ )
189+ return None
190+
191+
116192def _response (status_code , body ):
117193 """Create API Gateway response with CORS headers."""
118194 return {
0 commit comments