Skip to content

Commit 5283da2

Browse files
committed
Change upload endpoint to work with new s3 bucket
1 parent 157e3cc commit 5283da2

4 files changed

Lines changed: 266 additions & 112 deletions

File tree

api/handler.py

Lines changed: 110 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
log = logging.getLogger()
2828
log.setLevel(logging.INFO)
2929

30+
images_table = dynamodb_r.Table(os.environ['IMAGES_TABLE'])
3031
info_images_table = dynamodb_r.Table(os.getenv('INFO_IMAGES_TABLE'))
3132
recent_uploads_table = dynamodb_r.Table(os.getenv('UPLOADS_LOG_TABLE_NAME'))
3233
s3 = boto3.client('s3', REGION, config=Config(signature_version='s3v4'))
@@ -45,116 +46,115 @@ def default(event, context):
4546
log.info(json.dumps(event, indent=2))
4647
return http_response(HTTPStatus.OK, "New photon ranch API")
4748

48-
49-
def upload(event, context):
50-
"""Generates a presigned URL to upload files at AWS.
51-
52-
A request for a presigned post URL requires the name of the object.
53-
This is sent in a single string under the key 'object_name' in the
54-
json-string body of the request.
55-
56-
Args:
57-
event.body.s3_directory (str): Name of the s3 bucket to use.
58-
event.body.filename (str): Name of the file to upload.
59-
60-
Returns:
61-
204 status code with presigned upload URL string if successful.
62-
403 status code if incorrect s3 bucket or info channel supplied.
63-
64-
Example request body:
65-
'{"object_name":"a_file.txt", "s3_directory": "data"}'
66-
This request will save an image into the main s3 bucket as:
67-
MAIN_BUCKET_NAME/data/a_file.txt
68-
69-
* * *
70-
71-
Another example Python program using the presigned URL to upload a file:
72-
73-
with open(object_name, 'rb') as f:
74-
files = {'file': (object_name, f)}
75-
http_response = requests.post(
76-
response['url'], data=response['fields'], files=files)
77-
# If successful, returns HTTP status code 204
78-
log.info(f'File upload HTTP status code: {http_response.status_code}')
79-
80-
* * *
81-
82-
If the upload URL is to be used with an info image, then
83-
the request must include 'info_channel' with a value of 1, 2, or 3.
84-
This will prompt an update to the info-images table, where it will
85-
store the provided base_filename in the row with pk=={site}#metadata,
86-
under the attribute channel{n}.
87-
88-
For example, the request body:
89-
{
90-
"object_name": "tst-inst-20211231-00000001-EX10.jpg",
91-
"s3_directory": "info-images",
92-
"info_channel": 2
93-
}
94-
will result in the info-images table being updated with:
95-
{
96-
"pk": "tst#metadata",
97-
"channel2": "tst-inst-20211231-00000001",
98-
...
99-
}
100-
The URL returned by this endpoint will allow a POST request
101-
to s3 with the actual file. The code that processes new s3 objects
102-
will see that it is an info image, then query the info-images table
103-
to find which channel to use, and finally update the info-images
104-
table with an entry like:
105-
{
106-
"pk": "tst#2",
107-
"jpg_10_exists": true,
108-
...
109-
}
110-
This is the object that is queried to find the info image at
111-
site 'tst', channel 2.
112-
"""
113-
114-
log.info(json.dumps(event, indent=2))
115-
body = _get_body(event)
116-
117-
# Retrieve and validate the s3_directory
118-
s3_directory = body.get('s3_directory', 'data')
119-
filename = body.get('object_name')
120-
if s3_directory not in ['data', 'info-images', 'allsky', 'test']:
121-
error_msg = "s3_directory must be either 'data', 'info-images', or 'allsky'."
122-
log.warning(error_msg)
123-
return http_response(HTTPStatus.FORBIDDEN, error_msg)
124-
125-
# If info image: get the channel number to use
126-
if s3_directory == 'info-images':
127-
128-
site = filename.split('-')[0]
129-
base_filename = get_base_filename_from_full_filename(filename)
130-
channel = int(body.get('info_channel', 1))
131-
if channel not in [1,2,3]:
132-
error_msg = f"Value for info_channel must be either 1, 2, or 3. Received {channel} instead."
133-
log.warning(error_msg)
134-
return http_response(HTTPStatus.FORBIDDEN, error_msg)
135-
136-
# Create an entry to track metadata for the next info image that will be uploaded
137-
info_images_table.update_item(
138-
Key={ 'pk': f'{site}#metadata' },
139-
UpdateExpression=f"set channel{channel}=:basefilename",
140-
ExpressionAttributeValues={':basefilename': base_filename}
141-
)
142-
143-
# Get upload metadata
144-
metadata = body.get('metadata', None)
145-
if metadata is not None:
146-
metadata = json.dumps(json.loads(metadata), cls=DecimalEncoder)
147-
148-
# TODO: if applicable, add metadata to database
149-
150-
key = f"{s3_directory}/{body['object_name']}"
151-
url = s3.generate_presigned_post(
152-
Bucket = BUCKET_NAME,
153-
Key = key,
154-
ExpiresIn = S3_PUT_TTL
155-
)
156-
log.info(f"Presigned upload url: {url}")
157-
return http_response(HTTPStatus.OK, url)
49+
# def upload(event, context):
50+
# """Generates a presigned URL to upload files at AWS.
51+
52+
# A request for a presigned post URL requires the name of the object.
53+
# This is sent in a single string under the key 'object_name' in the
54+
# json-string body of the request.
55+
56+
# Args:
57+
# event.body.s3_directory (str): Name of the s3 bucket to use.
58+
# event.body.filename (str): Name of the file to upload.
59+
60+
# Returns:
61+
# 204 status code with presigned upload URL string if successful.
62+
# 403 status code if incorrect s3 bucket or info channel supplied.
63+
64+
# Example request body:
65+
# '{"object_name":"a_file.txt", "s3_directory": "data"}'
66+
# This request will save an image into the main s3 bucket as:
67+
# MAIN_BUCKET_NAME/data/a_file.txt
68+
69+
# * * *
70+
71+
# Another example Python program using the presigned URL to upload a file:
72+
73+
# with open(object_name, 'rb') as f:
74+
# files = {'file': (object_name, f)}
75+
# http_response = requests.post(
76+
# response['url'], data=response['fields'], files=files)
77+
# # If successful, returns HTTP status code 204
78+
# log.info(f'File upload HTTP status code: {http_response.status_code}')
79+
80+
# * * *
81+
82+
# If the upload URL is to be used with an info image, then
83+
# the request must include 'info_channel' with a value of 1, 2, or 3.
84+
# This will prompt an update to the info-images table, where it will
85+
# store the provided base_filename in the row with pk=={site}#metadata,
86+
# under the attribute channel{n}.
87+
88+
# For example, the request body:
89+
# {
90+
# "object_name": "tst-inst-20211231-00000001-EX10.jpg",
91+
# "s3_directory": "info-images",
92+
# "info_channel": 2
93+
# }
94+
# will result in the info-images table being updated with:
95+
# {
96+
# "pk": "tst#metadata",
97+
# "channel2": "tst-inst-20211231-00000001",
98+
# ...
99+
# }
100+
# The URL returned by this endpoint will allow a POST request
101+
# to s3 with the actual file. The code that processes new s3 objects
102+
# will see that it is an info image, then query the info-images table
103+
# to find which channel to use, and finally update the info-images
104+
# table with an entry like:
105+
# {
106+
# "pk": "tst#2",
107+
# "jpg_10_exists": true,
108+
# ...
109+
# }
110+
# This is the object that is queried to find the info image at
111+
# site 'tst', channel 2.
112+
# """
113+
114+
# log.info(json.dumps(event, indent=2))
115+
# body = _get_body(event)
116+
117+
# # Retrieve and validate the s3_directory
118+
# s3_directory = body.get('s3_directory', 'data')
119+
# filename = body.get('object_name')
120+
# if s3_directory not in ['data', 'info-images', 'allsky', 'test']:
121+
# error_msg = "s3_directory must be either 'data', 'info-images', or 'allsky'."
122+
# log.warning(error_msg)
123+
# return http_response(HTTPStatus.FORBIDDEN, error_msg)
124+
125+
# # If info image: get the channel number to use
126+
# if s3_directory == 'info-images':
127+
128+
# site = filename.split('-')[0]
129+
# base_filename = get_base_filename_from_full_filename(filename)
130+
# channel = int(body.get('info_channel', 1))
131+
# if channel not in [1,2,3]:
132+
# error_msg = f"Value for info_channel must be either 1, 2, or 3. Received {channel} instead."
133+
# log.warning(error_msg)
134+
# return http_response(HTTPStatus.FORBIDDEN, error_msg)
135+
136+
# # Create an entry to track metadata for the next info image that will be uploaded
137+
# info_images_table.update_item(
138+
# Key={ 'pk': f'{site}#metadata' },
139+
# UpdateExpression=f"set channel{channel}=:basefilename",
140+
# ExpressionAttributeValues={':basefilename': base_filename}
141+
# )
142+
143+
# # Get upload metadata
144+
# metadata = body.get('metadata', None)
145+
# if metadata is not None:
146+
# metadata = json.dumps(json.loads(metadata), cls=DecimalEncoder)
147+
148+
# # TODO: if applicable, add metadata to database
149+
150+
# key = f"{s3_directory}/{body['object_name']}"
151+
# url = s3.generate_presigned_post(
152+
# Bucket = BUCKET_NAME,
153+
# Key = key,
154+
# ExpiresIn = S3_PUT_TTL
155+
# )
156+
# log.info(f"Presigned upload url: {url}")
157+
# return http_response(HTTPStatus.OK, url)
158158

159159

160160
def download(event, context):

api/tests/unit/test_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from http import HTTPStatus
44

55
from api.handler import info_images_table
6-
from api.handler import upload
6+
from api.upload import upload
77
from api.handler import get_recent_uploads
88

99
from api.helpers import get_base_filename_from_full_filename

api/upload.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# api/handler.py (modified upload function)
2+
import json
3+
import os
4+
import boto3
5+
import time
6+
from http import HTTPStatus
7+
8+
from api.helpers import BUCKET_NAME, REGION, S3_PUT_TTL, get_base_filename_from_full_filename, http_response, _get_body, DecimalEncoder
9+
10+
# Initialize AWS clients
11+
s3 = boto3.client('s3', REGION)
12+
dynamodb = boto3.resource('dynamodb', REGION)
13+
14+
# Get DynamoDB table references
15+
images_table = dynamodb.Table(os.environ['IMAGES_TABLE'])
16+
info_images_table = dynamodb.Table(os.environ['INFO_IMAGES_TABLE'])
17+
18+
def upload(event, context):
19+
"""Generates a presigned URL to upload files at AWS.
20+
21+
This modified version now also stores image metadata in DynamoDB
22+
when the upload URL is requested, rather than waiting for the
23+
file to arrive in S3.
24+
25+
Args:
26+
event.body.s3_directory (str): Name of the s3 bucket to use.
27+
event.body.filename (str): Name of the file to upload.
28+
event.body.header_data (dict, optional): Metadata for the image.
29+
event.body.info_channel (int, optional): Channel for info images.
30+
31+
Returns:
32+
200 status code with presigned upload URL if successful.
33+
403 status code if incorrect s3 bucket or info channel supplied.
34+
"""
35+
36+
# Parse the request body
37+
body = _get_body(event)
38+
39+
# Retrieve and validate the s3_directory
40+
s3_directory = body.get('s3_directory', 'data')
41+
filename = body.get('object_name')
42+
43+
if s3_directory not in ['data', 'info-images', 'allsky', 'test']:
44+
error_msg = "s3_directory must be either 'data', 'info-images', or 'allsky'."
45+
return http_response(HTTPStatus.FORBIDDEN, error_msg)
46+
47+
# Handle info images
48+
if s3_directory == 'info-images':
49+
site = filename.split('-')[0]
50+
base_filename = get_base_filename_from_full_filename(filename)
51+
channel = int(body.get('info_channel', 1))
52+
53+
if channel not in [1, 2, 3]:
54+
error_msg = f"Value for info_channel must be either 1, 2, or 3. Received {channel} instead."
55+
return http_response(HTTPStatus.FORBIDDEN, error_msg)
56+
57+
# Create an entry to track metadata for the next info image
58+
info_images_table.update_item(
59+
Key={'pk': f'{site}#metadata'},
60+
UpdateExpression=f"set channel{channel}=:basefilename",
61+
ExpressionAttributeValues={':basefilename': base_filename}
62+
)
63+
64+
# Handle regular images
65+
elif s3_directory == 'data' and filename.lower().endswith('.jpg'):
66+
base_filename = get_base_filename_from_full_filename(filename)
67+
site = base_filename.split('-')[0]
68+
69+
# Get header metadata from the request
70+
header_data = body.get('header_data', {})
71+
72+
# Convert strings to appropriate types if needed
73+
header_data = sanitize_header_data(header_data)
74+
75+
# Get the capture timestamp from the header data
76+
capture_date = get_capture_timestamp(header_data)
77+
78+
# Store the metadata in DynamoDB
79+
try:
80+
images_table.put_item(
81+
Item={
82+
'site': site,
83+
'sort_date': capture_date,
84+
'base_filename': base_filename,
85+
'capture_date': capture_date,
86+
'username': header_data.get('USERNAME', ''),
87+
'user_id': header_data.get('USERID', ''),
88+
'header_data': header_data,
89+
'processed': False,
90+
'last_updated': int(time.time() * 1000)
91+
}
92+
)
93+
except Exception as e:
94+
print(f"Error storing metadata in DynamoDB: {str(e)}")
95+
# Continue anyway to provide the upload URL
96+
97+
# Generate the presigned upload URL
98+
key = f"{s3_directory}/{filename}"
99+
url = s3.generate_presigned_post(
100+
Bucket=BUCKET_NAME,
101+
Key=key,
102+
ExpiresIn=S3_PUT_TTL
103+
)
104+
105+
return http_response(HTTPStatus.OK, url)
106+
107+
def sanitize_header_data(header_data):
108+
"""Sanitizes header data to ensure correct types."""
109+
sanitized = {}
110+
111+
for key, value in header_data.items():
112+
# Try to convert numeric values to the appropriate type
113+
if isinstance(value, str):
114+
value = value.strip()
115+
116+
# Remove quotes from string values
117+
if value.startswith("'") and value.endswith("'"):
118+
value = value[1:-1]
119+
120+
# Try to convert numeric values
121+
try:
122+
if '.' in value:
123+
# Try float conversion
124+
sanitized[key] = float(value)
125+
else:
126+
# Try integer conversion
127+
sanitized[key] = int(value)
128+
except (ValueError, TypeError):
129+
# Keep as string if conversion fails
130+
sanitized[key] = value
131+
else:
132+
# Keep non-string values as is
133+
sanitized[key] = value
134+
135+
return sanitized
136+
137+
def get_capture_timestamp(header_data):
138+
"""Extracts the capture timestamp from header data."""
139+
try:
140+
date_obs = header_data.get('DATE-OBS')
141+
if date_obs:
142+
# Replace 'T' with space if present
143+
date_obs = date_obs.replace('T', ' ')
144+
145+
# Parse the timestamp
146+
from datetime import datetime
147+
dt = datetime.strptime(date_obs, '%Y-%m-%d %H:%M:%S')
148+
149+
# Convert to milliseconds
150+
return int(dt.timestamp() * 1000)
151+
except Exception as e:
152+
print(f"Error parsing DATE-OBS: {str(e)}")
153+
# Return current time as fallback
154+
return int(time.time() * 1000)

0 commit comments

Comments
 (0)