Skip to content

Commit f4de72c

Browse files
committed
feat: add requirements validation and enforce concurrent build limits
1 parent d51b8a0 commit f4de72c

3 files changed

Lines changed: 84 additions & 6 deletions

File tree

infrastructure/lambdas/check_status/index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def handler(event, context):
110110
"filename": filename,
111111
"download_url": download_url,
112112
"architecture": architecture,
113-
"s3_key": s3_key,
113+
# s3_key intentionally omitted — callers only need the presigned URL
114114
})
115115
except ClientError as e:
116116
print(f"Failed to generate presigned URL for {s3_key}: {e}")

infrastructure/lambdas/process_build/index.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,11 @@ def _update_status(build_id, status, error=None):
174174

175175
def _generate_user_data(build_id, arch, python_version, requirements, single_file, log_group_name):
176176
"""Generate the EC2 user-data bash script for a single-architecture build."""
177-
req_escaped = requirements.replace("\\", "\\\\").replace("'", "'\\''")
177+
# Base64-encode requirements to prevent heredoc/shell-injection.
178+
# Embedding raw user content in a bash heredoc is unsafe: a line matching
179+
# the sentinel (REQUIREMENTS_EOF) would break out of the heredoc and allow
180+
# arbitrary command execution on the EC2 instance.
181+
req_b64 = base64.b64encode(requirements.encode("utf-8")).decode("ascii")
178182
if arch == "arm64":
179183
arch_label = "arm64"
180184
platform = "linux/arm64"
@@ -268,11 +272,9 @@ def _generate_user_data(build_id, arch, python_version, requirements, single_fil
268272
&& echo "$(date): CloudWatch streaming active \u2192 {log_group_name}/{build_id}" \\
269273
|| echo "$(date): WARNING: CloudWatch agent failed to start"
270274
271-
# --- Create requirements file ---
275+
# --- Create requirements file (base64-encoded to prevent injection) ---
272276
mkdir -p /build/input /build/output
273-
cat > /build/input/requirements.txt << 'REQUIREMENTS_EOF'
274-
{requirements}
275-
REQUIREMENTS_EOF
277+
echo '{req_b64}' | base64 -d > /build/input/requirements.txt
276278
277279
echo "$(date): Requirements:"
278280
cat /build/input/requirements.txt

infrastructure/lambdas/submit_build/index.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import uuid
1717
import time
1818
import os
19+
import re
1920
import boto3
2021

2122
dynamodb = boto3.resource("dynamodb")
@@ -28,6 +29,7 @@
2829
VALID_PYTHON_VERSIONS = ["3.10", "3.11", "3.12", "3.13", "3.14"]
2930
VALID_ARCHITECTURES = ["x86_64", "arm64"]
3031
MAX_REQUIREMENTS_LENGTH = 10000 # 10KB max
32+
MAX_ACTIVE_BUILDS = int(os.environ.get("MAX_ACTIVE_BUILDS", "10"))
3133

3234

3335
def handler(event, context):
@@ -47,6 +49,10 @@ def handler(event, context):
4749
"error": f"requirements too large (max {MAX_REQUIREMENTS_LENGTH} chars)"
4850
})
4951

52+
req_error = _validate_requirements(requirements)
53+
if req_error:
54+
return _response(400, {"error": req_error})
55+
5056
python_version = body.get("python_version", "3.13")
5157
if python_version not in VALID_PYTHON_VERSIONS:
5258
return _response(400, {
@@ -67,6 +73,35 @@ def handler(event, context):
6773
if not isinstance(single_file, bool):
6874
return _response(400, {"error": "single_file must be a boolean"})
6975

76+
# --- Enforce concurrent-build cap ---
77+
# Sum approximate queued + in-flight messages to estimate active builds.
78+
# This prevents a single caller from queueing unbounded EC2 Spot launches.
79+
try:
80+
attrs = sqs.get_queue_attributes(
81+
QueueUrl=QUEUE_URL,
82+
AttributeNames=[
83+
"ApproximateNumberOfMessages",
84+
"ApproximateNumberOfMessagesNotVisible",
85+
],
86+
)["Attributes"]
87+
active = (
88+
int(attrs.get("ApproximateNumberOfMessages", 0))
89+
+ int(attrs.get("ApproximateNumberOfMessagesNotVisible", 0))
90+
)
91+
if active >= MAX_ACTIVE_BUILDS:
92+
return _response(
93+
429,
94+
{
95+
"error": (
96+
f"Build queue is at capacity ({MAX_ACTIVE_BUILDS} concurrent builds). "
97+
"Please retry later."
98+
)
99+
},
100+
)
101+
except Exception as e:
102+
print(f"WARNING: could not check queue depth: {e}")
103+
# Fail open — allow the build rather than blocking on a transient error.
104+
70105
# --- Create build record ---
71106
build_id = str(uuid.uuid4())
72107
now = int(time.time())
@@ -113,6 +148,47 @@ def handler(event, context):
113148
})
114149

115150

151+
def _validate_requirements(requirements: str):
152+
"""
153+
Validate requirements.txt content.
154+
155+
Returns an error string if invalid, or None if the content is acceptable.
156+
157+
Rules:
158+
- URL-based installs (git+, http://, https://, file://, vcs+...) are rejected.
159+
They bypass PyPI and allow arbitrary code to be pulled from any host.
160+
- Recursive includes (-r / --requirement) and constraint files
161+
(-c / --constraint) are rejected to prevent file-system reads on the
162+
builder instance.
163+
- Lines with obvious shell metacharacters (;, |, &, $, `) are rejected
164+
as a defence-in-depth measure against injection in downstream tools.
165+
"""
166+
URL_PREFIXES = ("git+", "http://", "https://", "file://", "svn+", "hg+", "bzr+")
167+
BLOCKED_FLAGS = ("-r ", "-r\t", "--requirement", "-c ", "-c\t", "--constraint")
168+
SHELL_META = re.compile(r'[;|&$`]')
169+
170+
for lineno, raw_line in enumerate(requirements.splitlines(), 1):
171+
line = raw_line.strip()
172+
if not line or line.startswith("#"):
173+
continue
174+
if any(line.startswith(p) for p in URL_PREFIXES):
175+
return (
176+
f"Line {lineno}: URL-based requirements are not allowed. "
177+
"Specify packages by name and version (e.g., requests==2.32.4)."
178+
)
179+
if any(line.startswith(f) for f in BLOCKED_FLAGS):
180+
return (
181+
f"Line {lineno}: recursive includes (-r) and constraint files (-c) "
182+
"are not allowed."
183+
)
184+
if SHELL_META.search(line):
185+
return (
186+
f"Line {lineno}: shell metacharacters (;, |, &, $, `) are not "
187+
"allowed in requirements."
188+
)
189+
return None
190+
191+
116192
def _response(status_code, body):
117193
"""Create API Gateway response with CORS headers."""
118194
return {

0 commit comments

Comments
 (0)