Skip to content

Commit 6199420

Browse files
authored
Merge pull request #18 from DropThe8bit/feat/yolo
[feat] YOLO 객체 탐지 모델 구현
2 parents 9e1e82d + bcfb6a5 commit 6199420

4 files changed

Lines changed: 96 additions & 3 deletions

File tree

everTale/app/api.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11

22
import os, shutil, uuid
33
from typing import List
4-
from fastapi import APIRouter, HTTPException
4+
from fastapi import APIRouter
55
from fastapi import File, UploadFile, Form
66
from fastapi.responses import JSONResponse, StreamingResponse
77

88
from . import dto
9-
from .service import image_service, quiz_service, story_service, voice_cloning_service
9+
from .service import image_service, quiz_service, story_service, voice_cloning_service, yolo_service
1010

1111
router = APIRouter()
1212

@@ -144,5 +144,14 @@ def delete_voice(request: dto.DeleteVoiceRequest):
144144
else:
145145
return JSONResponse(status_code=500, content={"error": "삭제 실패 또는 voice_key가 존재하지 않습니다."})
146146

147+
except Exception as e:
148+
return JSONResponse(status_code=500, content={"error": str(e)})
149+
150+
@router.post("/yolo", summary="객체 탐지 API", description="이미지를 리스트로 받아 객체를 탐지하고 이미지 index와 좌표를 반환합니다.")
151+
def detect_object(request: dto.YOLOImageUrlsRequest):
152+
try:
153+
object = yolo_service.detect_object(request.image_urls)
154+
return JSONResponse(content=object)
155+
147156
except Exception as e:
148157
return JSONResponse(status_code=500, content={"error": str(e)})

everTale/app/dto.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,16 @@ class DeleteVoiceRequest(BaseModel):
9191
description="삭제할 voice_key",
9292
json_schema_extra={"example": "9c74576ba45e6852f1c7d03"}
9393
)
94+
95+
class YOLOImageUrlsRequest(BaseModel):
96+
image_urls: List[str] = Field(
97+
...,
98+
description="외부 서버에 업로드된 이미지 URL 리스트",
99+
json_schema_extra={
100+
"example": [
101+
"https://example.com/image1.jpg",
102+
"https://example.com/image2.jpg",
103+
"https://example.com/image3.jpg"
104+
]
105+
}
106+
)

everTale/app/service/voice_cloning_service.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import os
2-
import random
32
import requests
43

54
from io import BytesIO
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from ultralytics import YOLO
2+
from typing import List, Dict, Any
3+
4+
import os
5+
import cv2
6+
import random
7+
import requests
8+
import numpy as np
9+
10+
YOLO_MODEL_PATH = os.environ["YOLO_MODEL_PATH"]
11+
12+
def load_model() -> YOLO:
13+
try:
14+
model = YOLO(YOLO_MODEL_PATH)
15+
print("모델이 성공적으로 로드되었습니다.")
16+
return model
17+
except Exception as e:
18+
print(f"모델 로드 중 오류가 발생했습니다: {e}")
19+
return None
20+
21+
def _url_to_bgr(url: str) -> np.ndarray:
22+
resp = requests.get(url, timeout=10)
23+
resp.raise_for_status()
24+
arr = np.frombuffer(resp.content, np.uint8)
25+
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
26+
if img is None:
27+
raise ValueError(f"이미지 디코딩 실패: {url}")
28+
return img
29+
30+
def detect_object(image_paths: List[str]) -> Dict[str, Any]:
31+
"""
32+
입력: 이미지 URL 리스트(최대 8장)
33+
처리: 모든 이미지를 탐지 → (이미지idx, 객체좌표) 후보들을 모은 뒤 → 랜덤으로 1개 선택
34+
출력: {"index": int, "url":..., "detection": {"center_x":..., "center_y":..., "half_width":..., "half_height":...}}
35+
탐지 후보가 전혀 없으면 {"index": None, "url": None, "detection": None}
36+
"""
37+
model = load_model()
38+
urls = image_paths[:8]
39+
candidates: List[Dict[str, Any]] = []
40+
41+
for idx, url in enumerate(urls):
42+
try:
43+
img = _url_to_bgr(url)
44+
results = model.predict(source=img, verbose=False)
45+
if not results or results[0].boxes is None or results[0].boxes.shape[0] == 0:
46+
continue
47+
48+
for box in results[0].boxes.xyxy:
49+
xmin, ymin, xmax, ymax = box
50+
center_x = float((xmin + xmax) / 2.0)
51+
center_y = float((ymin + ymax) / 2.0)
52+
half_width = float((xmax - xmin) / 2.0)
53+
half_height = float((ymax - ymin) / 2.0)
54+
55+
candidates.append({
56+
"index": idx+1,
57+
"url": url,
58+
"detection": {
59+
"xCoordinate": center_x,
60+
"yCoordinate": center_y,
61+
"width": half_width,
62+
"height": half_height,
63+
}
64+
})
65+
except Exception:
66+
continue
67+
68+
if not candidates:
69+
return {"index": None, "url": None, "detection": None}
70+
71+
chosen = random.choice(candidates)
72+
return chosen

0 commit comments

Comments
 (0)