Skip to content

Commit 5d57c7b

Browse files
Fix: Aufgaben 1-3 implementiert - Stop-Taste GPU-Kill, Slider Swipe-Bug, SecurityException bei Human Expert, detect-changes für workflow_dispatch
1 parent 3957246 commit 5d57c7b

4 files changed

Lines changed: 143 additions & 75 deletions

File tree

.github/workflows/manual.yml

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,7 @@ jobs:
2121
- name: Detect changed files
2222
id: changes
2323
run: |
24-
# Bei workflow_dispatch immer alles bauen
25-
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
26-
echo "app=true" >> $GITHUB_OUTPUT
27-
echo "humanoperator=true" >> $GITHUB_OUTPUT
28-
echo "shared=true" >> $GITHUB_OUTPUT
29-
echo "Manual dispatch - building all modules"
30-
exit 0
31-
fi
32-
33-
# Geänderte Dateien im letzten Commit ermitteln
24+
# Geänderte Dateien im letzten Commit ermitteln (gilt für push UND workflow_dispatch)
3425
CHANGED_FILES=$(git diff --name-only HEAD~1 HEAD 2>/dev/null || echo "")
3526
3627
# Falls kein vorheriger Commit existiert (erster Commit), alles bauen

app/src/main/kotlin/com/google/ai/sample/MenuScreen.kt

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,47 @@ import android.os.StatFs
5151
import com.google.ai.sample.feature.multimodal.ModelDownloadManager
5252
import androidx.compose.runtime.collectAsState
5353
import androidx.compose.ui.input.pointer.pointerInput
54+
import androidx.compose.ui.input.pointer.PointerEventPass
55+
import androidx.compose.foundation.gestures.awaitEachGesture
56+
import androidx.compose.foundation.gestures.awaitFirstDown
5457
import androidx.compose.foundation.gestures.detectHorizontalDragGestures
5558
import java.io.File
5659

60+
/**
61+
* Modifier, der sicherstellt dass horizontale Touch-Events für Slider
62+
* nicht von einer übergeordneten LazyColumn abgefangen werden.
63+
* Behebt den Swipe-Bug wo das Wischen über Slider in LazyColumn hakelt.
64+
*
65+
* Konsumiert nur horizontale Drag-Events auf Main-Pass-Ebene,
66+
* damit die LazyColumn nicht vorzeitig das Scrollen übernimmt.
67+
* Der Slider selbst behält die volle Kontrolle über die Interaktion.
68+
*/
69+
fun Modifier.sliderFriendly(): Modifier = this.pointerInput(Unit) {
70+
awaitEachGesture {
71+
// Ersten Touch abwarten (keine Konsumation, nur beobachten)
72+
val down = awaitFirstDown(requireUnconsumed = false)
73+
var lastX = down.position.x
74+
var isDraggingHorizontally = false
75+
76+
// Weitere Pointer-Events beobachten
77+
do {
78+
val event = awaitPointerEvent(pass = PointerEventPass.Main)
79+
val change = event.changes.firstOrNull() ?: break
80+
81+
val dx = kotlin.math.abs(change.position.x - lastX)
82+
val dy = kotlin.math.abs(change.position.y - change.previousPosition.y)
83+
84+
// Wenn horizontale Bewegung dominiert, konsumiere den Event
85+
// damit die LazyColumn nicht vertikal scrollt
86+
if (dx > dy || isDraggingHorizontally) {
87+
isDraggingHorizontally = true
88+
change.consume()
89+
}
90+
lastX = change.position.x
91+
} while (event.changes.any { it.pressed })
92+
}
93+
}
94+
5795
data class MenuItem(
5896
val routeId: String,
5997
val titleResId: Int,
@@ -357,7 +395,7 @@ fun MenuScreen(
357395
},
358396
valueRange = 0f..2f,
359397
steps = 0,
360-
modifier = Modifier.fillMaxWidth()
398+
modifier = Modifier.fillMaxWidth().sliderFriendly()
361399
)
362400

363401
Spacer(modifier = Modifier.height(8.dp))
@@ -380,7 +418,7 @@ fun MenuScreen(
380418
},
381419
valueRange = 0f..1f,
382420
steps = 0,
383-
modifier = Modifier.fillMaxWidth()
421+
modifier = Modifier.fillMaxWidth().sliderFriendly()
384422
)
385423

386424
Spacer(modifier = Modifier.height(8.dp))
@@ -403,7 +441,7 @@ fun MenuScreen(
403441
},
404442
valueRange = 0f..100f,
405443
steps = 0,
406-
modifier = Modifier.fillMaxWidth()
444+
modifier = Modifier.fillMaxWidth().sliderFriendly()
407445
)
408446

409447
if (selectedModel == ModelOption.GEMMA_3N_E4B_IT) {

app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreen.kt

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ internal fun PhotoReasoningRoute(
172172
val isInitialized by viewModel.isInitialized.collectAsState()
173173
val modelName by viewModel.modelNameState.collectAsState()
174174
val userInput by viewModel.userInput.collectAsState()
175+
val isGenerationRunning by viewModel.isGenerationRunningFlow.collectAsState()
176+
val isOfflineGpuModelLoaded by viewModel.isOfflineGpuModelLoadedFlow.collectAsState()
175177

176178
// Hoisted: var showNotificationRationaleDialog by rememberSaveable { mutableStateOf(false) }
177179
// This state will now be managed in PhotoReasoningRoute and passed down.
@@ -248,12 +250,12 @@ internal fun PhotoReasoningRoute(
248250
},
249251
isKeyboardOpen = isKeyboardOpen,
250252
onStopClicked = { viewModel.onStopClicked() },
251-
// showNotificationRationaleDialog = showNotificationRationaleDialogStateInRoute, // Removed
252-
// onShowNotificationRationaleDialogChange = { showNotificationRationaleDialogStateInRoute = it }, // Removed
253-
isInitialized = isInitialized, // Pass the collected state
253+
isInitialized = isInitialized,
254254
modelName = modelName,
255255
userQuestion = userInput,
256-
onUserQuestionChanged = { viewModel.updateUserInput(it) }
256+
onUserQuestionChanged = { viewModel.updateUserInput(it) },
257+
isGenerationRunning = isGenerationRunning,
258+
isOfflineGpuModelLoaded = isOfflineGpuModelLoaded
257259
)
258260
}
259261

@@ -274,12 +276,12 @@ fun PhotoReasoningScreen(
274276
onClearChatHistory: () -> Unit = {},
275277
isKeyboardOpen: Boolean,
276278
onStopClicked: () -> Unit = {},
277-
// showNotificationRationaleDialog: Boolean, // Removed
278-
// onShowNotificationRationaleDialogChange: (Boolean) -> Unit, // Removed
279-
isInitialized: Boolean = true, // Added parameter with default for preview
279+
isInitialized: Boolean = true,
280280
modelName: String = "",
281281
userQuestion: String = "",
282-
onUserQuestionChanged: (String) -> Unit = {}
282+
onUserQuestionChanged: (String) -> Unit = {},
283+
isGenerationRunning: Boolean = false,
284+
isOfflineGpuModelLoaded: Boolean = false
283285
) {
284286
val imageUris = rememberSaveable(saver = UriSaver()) { mutableStateListOf() }
285287
var isSystemMessageFocused by rememberSaveable { mutableStateOf(false) }
@@ -499,11 +501,9 @@ fun PhotoReasoningScreen(
499501
)
500502
}
501503

502-
// Task 18: Always show Stop button for offline model to allow manual closing
503-
val showStopButton = modelName == "gemma-3n-e4b-it" || uiState is PhotoReasoningUiState.Loading
504-
505-
val isGenerating = (uiState is PhotoReasoningUiState.Loading) && (messages.lastOrNull()?.isPending == true)
506-
val showTextFieldRow = !isGenerating
504+
val showStopButton = isGenerationRunning || isOfflineGpuModelLoaded
505+
val stopButtonText = if (isGenerationRunning) "Stop" else "Modell entladen"
506+
val showTextFieldRow = !isGenerationRunning
507507

508508
if (showTextFieldRow) {
509509
Card(modifier = Modifier.fillMaxWidth()) {
@@ -574,10 +574,18 @@ fun PhotoReasoningScreen(
574574
} // Closes Card
575575
}
576576

577-
// Task 1: Stop button is independent and below the text field
577+
// Stop button: zeigt 'Stop' bei aktiver Generierung, 'Modell entladen' bei geladenem GPU-Modell
578578
if (showStopButton) {
579579
Spacer(modifier = Modifier.height(8.dp))
580-
StopButton(onClick = onStopClicked)
580+
Button(
581+
onClick = onStopClicked,
582+
colors = ButtonDefaults.buttonColors(containerColor = Color.Red),
583+
modifier = Modifier
584+
.fillMaxWidth()
585+
.padding(8.dp)
586+
) {
587+
Text(stopButtonText, color = Color.White)
588+
}
581589
}
582590
}
583591

app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt

Lines changed: 78 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ class PhotoReasoningViewModel(
114114

115115
private val _showStopNotificationFlow = MutableStateFlow(false)
116116
val showStopNotificationFlow: StateFlow<Boolean> = _showStopNotificationFlow.asStateFlow()
117+
118+
private val _isGenerationRunningFlow = MutableStateFlow(false)
119+
val isGenerationRunningFlow: StateFlow<Boolean> = _isGenerationRunningFlow.asStateFlow()
120+
121+
private val _isOfflineGpuModelLoadedFlow = MutableStateFlow(false)
122+
val isOfflineGpuModelLoadedFlow: StateFlow<Boolean> = _isOfflineGpuModelLoadedFlow.asStateFlow()
117123

118124
// Keep track of the latest screenshot URI
119125
private var latestScreenshotUri: Uri? = null
@@ -389,11 +395,32 @@ class PhotoReasoningViewModel(
389395
}
390396
}
391397

398+
private fun isGenerationRunning(): Boolean {
399+
val lastMessage = _chatState.getAllMessages().lastOrNull()
400+
val hasPendingModelMessage =
401+
lastMessage?.participant == PhotoParticipant.MODEL && lastMessage.isPending
402+
val hasActiveJob =
403+
currentReasoningJob?.isActive == true || commandProcessingJob?.isActive == true
404+
return hasPendingModelMessage || hasActiveJob
405+
}
406+
407+
private fun isOfflineGpuModelLoaded(): Boolean {
408+
return com.google.ai.sample.GenerativeAiViewModelFactory.getCurrentModel() == ModelOption.GEMMA_3N_E4B_IT &&
409+
com.google.ai.sample.GenerativeAiViewModelFactory.getBackend() == InferenceBackend.GPU &&
410+
llmInference != null
411+
}
412+
413+
private fun refreshStopButtonState() {
414+
_isGenerationRunningFlow.value = isGenerationRunning()
415+
_isOfflineGpuModelLoadedFlow.value = isOfflineGpuModelLoaded()
416+
}
417+
392418
fun closeOfflineModel() {
393419
try {
394420
llmInference?.close()
395421
llmInference = null
396422
System.gc()
423+
refreshStopButtonState()
397424
Log.d(TAG, "Offline model explicitly closed to free RAM")
398425
} catch (e: Exception) {
399426
Log.w(TAG, "Error closing offline model", e)
@@ -1073,18 +1100,27 @@ class PhotoReasoningViewModel(
10731100
}
10741101

10751102
fun onStopClicked() {
1076-
if (isLiveMode) {
1077-
// For live mode, close the connection
1078-
liveApiManager?.close()
1103+
_showStopNotificationFlow.value = false
1104+
1105+
val generationRunning = isGenerationRunning()
1106+
1107+
// Kein aktiver Lauf: zweiter Klick => Modell entladen, keine Chat-Nachricht
1108+
if (!generationRunning) {
1109+
if (isOfflineGpuModelLoaded()) {
1110+
closeOfflineModel()
1111+
Log.d(TAG, "Stop clicked while idle: offline GPU model closed to free RAM")
1112+
} else {
1113+
refreshStopButtonState()
1114+
Log.d(TAG, "Stop clicked while idle: nothing to stop and no offline GPU model loaded")
1115+
}
1116+
return
10791117
}
10801118

1081-
// Close offline model instance to force stop generation or just release RAM
1082-
if (com.google.ai.sample.GenerativeAiViewModelFactory.getCurrentModel() == ModelOption.GEMMA_3N_E4B_IT) {
1083-
closeOfflineModel()
1119+
// Aktive Generierung: nur stoppen, Modell NICHT direkt schließen
1120+
if (isLiveMode) {
1121+
liveApiManager?.close()
10841122
}
10851123

1086-
// Rest of the existing onStopClicked code
1087-
_showStopNotificationFlow.value = false
10881124
stopExecutionFlag.set(true)
10891125
currentReasoningJob?.cancel()
10901126
commandProcessingJob?.cancel()
@@ -1103,31 +1139,28 @@ class PhotoReasoningViewModel(
11031139
)
11041140
)
11051141
} else if (lastMessage != null && lastMessage.participant == PhotoParticipant.MODEL && !lastMessage.isPending) {
1106-
// If the last message was a successful model response, update it.
1107-
messages[messages.size - 1] = lastMessage.copy(text = lastMessage.text + "\n\n[Stopped by user]")
1142+
messages[messages.lastIndex] =
1143+
lastMessage.copy(text = lastMessage.text + "\n\n[Stopped by user]")
11081144
} else {
1109-
// If no relevant model message, or last message was user/error, add a new model message
1110-
messages.add(
1145+
messages.add(
11111146
PhotoReasoningMessage(
11121147
text = statusMessage,
11131148
participant = PhotoParticipant.MODEL,
11141149
isPending = false
11151150
)
11161151
)
11171152
}
1153+
11181154
_chatState.setAllMessages(messages)
11191155
_chatMessagesFlow.value = _chatState.getAllMessages()
1120-
1121-
1122-
// _uiState.value = PhotoReasoningUiState.Stopped; // No longer setting this as the final state.
1123-
_commandExecutionStatus.value = "" // Set to empty string
1156+
_commandExecutionStatus.value = ""
11241157
_detectedCommands.value = emptyList()
11251158
Log.d(TAG, "Stop clicked, operations cancelled, command status cleared.")
11261159

1127-
// Set a success state to indicate the stop operation itself was successful
1128-
// and the UI can return to an idle/interactive state.
11291160
_uiState.value = PhotoReasoningUiState.Success("Operation stopped.")
11301161
Log.d(TAG, "UI updated to Success state after stop.")
1162+
1163+
refreshStopButtonState()
11311164
}
11321165

11331166
/**
@@ -1328,42 +1361,40 @@ class PhotoReasoningViewModel(
13281361
viewModelScope.launch(Dispatchers.Main) {
13291362
replaceAiMessageText("Expert found! Requesting screen capture permission...", isPending = true)
13301363

1331-
// Request a fresh MediaProjection specifically for WebRTC
1332-
// This does NOT start ScreenCaptureService - avoids token reuse crash
1364+
// Request a fresh MediaProjection specifically for WebRTC.
1365+
// MainActivity startet bereits ACTION_KEEP_ALIVE_FOR_WEBRTC BEVOR dieser Callback gerufen wird.
1366+
// Kein weiterer startForegroundService()-Aufruf nötig - verhindert ForegroundServiceDidNotStartInTimeException.
13331367
val mainActivity = MainActivity.getInstance()
13341368
if (mainActivity != null) {
13351369
mainActivity.requestMediaProjectionForWebRTC { resultCode, resultData ->
1336-
Log.d(TAG, "WebRTC MediaProjection granted. Starting foreground service first, then screen capture.")
1370+
Log.d(TAG, "WebRTC MediaProjection granted. Service läuft bereits via KEEP_ALIVE. Starte Screen Capture.")
13371371
replaceAiMessageText("Establishing video connection...", isPending = true)
13381372

1339-
// Task 1: Only start ScreenCaptureService if not already running
1340-
// This prevents ForegroundServiceDidNotStartInTimeException
1341-
if (!ScreenCaptureService.isRunning()) {
1342-
val serviceIntent = Intent(mainActivity, ScreenCaptureService::class.java).apply {
1343-
action = ScreenCaptureService.ACTION_START_CAPTURE
1344-
putExtra(ScreenCaptureService.EXTRA_RESULT_CODE, resultCode)
1345-
putExtra(ScreenCaptureService.EXTRA_RESULT_DATA, resultData)
1346-
putExtra(ScreenCaptureService.EXTRA_TAKE_SCREENSHOT_ON_START, false)
1347-
}
1348-
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
1349-
mainActivity.startForegroundService(serviceIntent)
1350-
} else {
1351-
mainActivity.startService(serviceIntent)
1352-
}
1353-
} else {
1354-
Log.d(TAG, "ScreenCaptureService already running, skipping service start.")
1355-
}
1373+
// KEIN startForegroundService() hier - MainActivity hat bereits ACTION_KEEP_ALIVE_FOR_WEBRTC gesendet.
1374+
// Das vermeidet doppelten Service-Start und ForegroundServiceDidNotStartInTimeException.
13561375

1357-
// Delay to ensure foreground service is up before WebRTC capture
13581376
viewModelScope.launch {
1359-
delay(500)
1360-
// Start screen capture for WebRTC with fresh permission data
1361-
webRTCSender?.startScreenCapture(resultData)
1362-
webRTCSender?.createPeerConnection()
1363-
1364-
// Create Offer
1365-
webRTCSender?.createOffer { sdp ->
1366-
signalingClient?.sendOffer(sdp)
1377+
// Kurze Verzögerung zur Stabilisierung des Foreground-Services
1378+
delay(300)
1379+
try {
1380+
// Start screen capture for WebRTC with fresh permission data
1381+
webRTCSender?.startScreenCapture(resultData)
1382+
webRTCSender?.createPeerConnection()
1383+
1384+
// Create Offer
1385+
webRTCSender?.createOffer { sdp ->
1386+
signalingClient?.sendOffer(sdp)
1387+
}
1388+
} catch (e: SecurityException) {
1389+
Log.e(TAG, "SecurityException beim WebRTC Screen Capture - MediaProjection Token ungültig?", e)
1390+
viewModelScope.launch(Dispatchers.Main) {
1391+
_uiState.value = PhotoReasoningUiState.Error("Screen capture permission expired. Please try again.")
1392+
}
1393+
} catch (e: Exception) {
1394+
Log.e(TAG, "Fehler beim Starten des WebRTC Screen Capture", e)
1395+
viewModelScope.launch(Dispatchers.Main) {
1396+
_uiState.value = PhotoReasoningUiState.Error("Video connection failed: ${e.message}")
1397+
}
13671398
}
13681399
}
13691400
}

0 commit comments

Comments
 (0)