diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h index 74bc7ce87..e888189c6 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h @@ -1,2 +1,3 @@ #import "RunnerObjCExceptionCatcher.h" +#import "RunnerAXSnapshotBridge.h" #import "RunnerSynthesizedGesture.h" diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.h b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.h new file mode 100644 index 000000000..24d95de74 --- /dev/null +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.h @@ -0,0 +1,14 @@ +#import +#import + +NS_ASSUME_NONNULL_BEGIN + +@interface RunnerAXSnapshotBridge : NSObject + ++ (NSDictionary *)snapshotTreeForApplication:(XCUIApplication *)application + maxDepth:(NSInteger)maxDepth + maxNodes:(NSInteger)maxNodes; + +@end + +NS_ASSUME_NONNULL_END diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.m b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.m new file mode 100644 index 000000000..b6a34db49 --- /dev/null +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.m @@ -0,0 +1,297 @@ +#import "RunnerAXSnapshotBridge.h" + +#import +#import + +static NSString *const RunnerAXSnapshotOkKey = @"ok"; +static NSString *const RunnerAXSnapshotErrorKey = @"error"; +static NSString *const RunnerAXSnapshotRootKey = @"root"; +static NSString *const RunnerAXSnapshotTruncatedKey = @"truncated"; + +typedef id (*RunnerAXObjectMsgSend)(id, SEL); +typedef NSInteger (*RunnerAXIntegerMsgSend)(id, SEL); +typedef id (*RunnerAXSnapshotMsgSend)(id, SEL, id, id, id, NSError **); + +@implementation RunnerAXSnapshotBridge + ++ (NSDictionary *)snapshotTreeForApplication:(XCUIApplication *)application + maxDepth:(NSInteger)maxDepth + maxNodes:(NSInteger)maxNodes +{ + @try { + id axClient = [self objectFrom:XCUIDevice.sharedDevice selectorName:@"accessibilityInterface"]; + if (nil == axClient) { + return [self failure:@"XCUIDevice accessibilityInterface is unavailable"]; + } + + id target = [self accessibilityApplicationForApplication:application axClient:axClient]; + if (nil == target) { + return [self failure:@"Could not match active AX application for XCTest application"]; + } + + NSMutableDictionary *parameters = [NSMutableDictionary dictionary]; + id defaults = [self objectFrom:axClient selectorName:@"defaultParameters"]; + if ([defaults isKindOfClass:NSDictionary.class]) { + [parameters addEntriesFromDictionary:(NSDictionary *)defaults]; + } + parameters[@"maxDepth"] = @(MAX(0, maxDepth)); + parameters[@"maxChildren"] = @(MAX(1, maxNodes)); + parameters[@"maxArrayCount"] = @(MAX(1, maxNodes)); + parameters[@"traverseFromParentsToChildren"] = @YES; + + SEL requestSelector = NSSelectorFromString(@"requestSnapshotForElement:attributes:parameters:error:"); + if (![axClient respondsToSelector:requestSelector]) { + return [self failure:@"AX client does not support requestSnapshotForElement"]; + } + + NSError *error = nil; + NSArray *keyPaths = @[ + @"elementType", + @"identifier", + @"label", + @"value", + @"frame", + @"enabled", + @"selected", + @"hasFocus", + @"children", + ]; + // The AX server expects real accessibility attribute identifiers, not snapshot keypath + // strings; passing raw keypaths silently drops attributes it does not recognize (frame + // came back zeroed). XCElementSnapshot owns the keypath -> AX attribute mapping. + NSArray *attributes = keyPaths; + Class snapshotClass = NSClassFromString(@"XCElementSnapshot"); + SEL mapSelector = NSSelectorFromString(@"axAttributesForElementSnapshotKeyPaths:isMacOS:"); + if ([snapshotClass respondsToSelector:mapSelector]) { + typedef id (*RunnerAXMapMsgSend)(id, SEL, id, BOOL); + RunnerAXMapMsgSend mapSend = (RunnerAXMapMsgSend)objc_msgSend; + id mapped = mapSend(snapshotClass, mapSelector, keyPaths, NO); + if ([mapped isKindOfClass:NSSet.class]) { + mapped = [(NSSet *)mapped allObjects]; + } + if ([mapped isKindOfClass:NSArray.class] && [(NSArray *)mapped count] > 0) { + // The mapper expands keypaths with extra attributes (automation type, window display + // id, base type) that are disproportionately expensive for the AX server to compute + // on large React Native trees. Keep only the attributes we actually consume. + NSArray *needed = @[ @"ElementType", @"Identifier", @"Label", @"Value", @"Frame", + @"Enabled", @"Selected", @"Focus" ]; + NSMutableArray *filtered = [NSMutableArray array]; + for (id attribute in (NSArray *)mapped) { + NSString *name = [attribute description]; + for (NSString *suffix in needed) { + if ([name hasSuffix:suffix]) { + [filtered addObject:attribute]; + break; + } + } + } + attributes = filtered.count > 0 ? filtered : mapped; + } + } + RunnerAXSnapshotMsgSend send = (RunnerAXSnapshotMsgSend)objc_msgSend; + id result = send(axClient, requestSelector, target, attributes, parameters.copy, &error); + if (nil == result) { + return [self failure:error.localizedDescription ?: @"AX snapshot request returned nil"]; + } + + id root = nil; + @try { + root = [result valueForKey:@"_rootElementSnapshot"]; + } @catch (NSException *exception) { + root = nil; + } + if (nil == root) { + root = result; + } + + BOOL truncated = NO; + NSInteger nodeCount = 0; + NSDictionary *rootNode = [self dictionaryForSnapshot:root + depth:0 + maxDepth:maxDepth + maxNodes:maxNodes + nodeCount:&nodeCount + truncated:&truncated]; + if (nil == rootNode) { + return [self failure:@"AX snapshot root could not be serialized"]; + } + + return @{ + RunnerAXSnapshotOkKey: @YES, + RunnerAXSnapshotRootKey: rootNode, + RunnerAXSnapshotTruncatedKey: @(truncated), + }; + } @catch (NSException *exception) { + return [self failure:exception.reason ?: exception.name ?: @"AX snapshot bridge exception"]; + } +} + ++ (NSDictionary *)failure:(NSString *)message +{ + return @{ + RunnerAXSnapshotOkKey: @NO, + RunnerAXSnapshotErrorKey: message, + }; +} + ++ (id)objectFrom:(id)target selectorName:(NSString *)selectorName +{ + SEL selector = NSSelectorFromString(selectorName); + if (![target respondsToSelector:selector]) { + return nil; + } + RunnerAXObjectMsgSend send = (RunnerAXObjectMsgSend)objc_msgSend; + return send(target, selector); +} + ++ (NSInteger)integerFrom:(id)target selectorName:(NSString *)selectorName +{ + SEL selector = NSSelectorFromString(selectorName); + if (![target respondsToSelector:selector]) { + return 0; + } + // processID/processIdentifier return pid_t (int32); reading them through an + // NSInteger-returning cast is not upper-32-bit safe on arm64. Use the method + // signature to pick the correctly sized call. + NSMethodSignature *signature = [target methodSignatureForSelector:selector]; + const char *returnType = signature.methodReturnType; + if (returnType != NULL && strcmp(returnType, @encode(int)) == 0) { + typedef int (*RunnerAXIntMsgSend)(id, SEL); + RunnerAXIntMsgSend send = (RunnerAXIntMsgSend)objc_msgSend; + return (NSInteger)send(target, selector); + } + RunnerAXIntegerMsgSend send = (RunnerAXIntegerMsgSend)objc_msgSend; + return send(target, selector); +} + ++ (id)accessibilityApplicationForApplication:(XCUIApplication *)application axClient:(id)axClient +{ + NSInteger targetProcessID = [self integerFrom:application selectorName:@"processID"]; + id activeApplications = [self objectFrom:axClient selectorName:@"activeApplications"]; + if (![activeApplications isKindOfClass:NSArray.class]) { + return nil; + } + + for (id candidate in (NSArray *)activeApplications) { + NSInteger candidateProcessID = [self integerFrom:candidate selectorName:@"processIdentifier"]; + if (targetProcessID > 0 && candidateProcessID == targetProcessID) { + return candidate; + } + } + return nil; +} + ++ (nullable NSDictionary *)dictionaryForSnapshot:(id)snapshot + depth:(NSInteger)depth + maxDepth:(NSInteger)maxDepth + maxNodes:(NSInteger)maxNodes + nodeCount:(NSInteger *)nodeCount + truncated:(BOOL *)truncated +{ + if (nil == snapshot || *nodeCount >= maxNodes) { + *truncated = YES; + return nil; + } + + *nodeCount += 1; + NSMutableDictionary *result = [NSMutableDictionary dictionary]; + result[@"type"] = [self numberValueForKey:@"elementType" snapshot:snapshot] ?: @0; + result[@"identifier"] = [self stringValueForKey:@"identifier" snapshot:snapshot] ?: @""; + result[@"label"] = [self stringValueForKey:@"label" snapshot:snapshot] ?: @""; + result[@"value"] = [self stringValueForKey:@"value" snapshot:snapshot] ?: @""; + result[@"frame"] = [self frameValueForSnapshot:snapshot]; + result[@"enabled"] = [self boolNumberForKey:@"enabled" snapshot:snapshot defaultValue:YES]; + result[@"selected"] = [self boolNumberForKey:@"selected" snapshot:snapshot defaultValue:NO]; + result[@"focused"] = [self boolNumberForKey:@"hasFocus" snapshot:snapshot defaultValue:NO]; + + NSMutableArray *children = [NSMutableArray array]; + if (depth < maxDepth) { + for (id child in [self childrenForSnapshot:snapshot]) { + NSDictionary *childNode = [self dictionaryForSnapshot:child + depth:depth + 1 + maxDepth:maxDepth + maxNodes:maxNodes + nodeCount:nodeCount + truncated:truncated]; + if (nil != childNode) { + [children addObject:childNode]; + } + if (*nodeCount >= maxNodes) { + *truncated = YES; + break; + } + } + } + result[@"children"] = children; + return result.copy; +} + ++ (NSArray *)childrenForSnapshot:(id)snapshot +{ + id children = nil; + @try { + children = [snapshot valueForKey:@"children"]; + } @catch (NSException *exception) { + children = nil; + } + return [children isKindOfClass:NSArray.class] ? children : @[]; +} + ++ (nullable NSNumber *)numberValueForKey:(NSString *)key snapshot:(id)snapshot +{ + id value = nil; + @try { + value = [snapshot valueForKey:key]; + } @catch (NSException *exception) { + return nil; + } + return [value isKindOfClass:NSNumber.class] ? value : nil; +} + ++ (nullable NSString *)stringValueForKey:(NSString *)key snapshot:(id)snapshot +{ + id value = nil; + @try { + value = [snapshot valueForKey:key]; + } @catch (NSException *exception) { + return nil; + } + if (nil == value || value == NSNull.null) { + return nil; + } + if ([value isKindOfClass:NSString.class]) { + return [(NSString *)value stringByTrimmingCharactersInSet:NSCharacterSet.whitespaceAndNewlineCharacterSet]; + } + return [[value description] stringByTrimmingCharactersInSet:NSCharacterSet.whitespaceAndNewlineCharacterSet]; +} + ++ (NSNumber *)boolNumberForKey:(NSString *)key snapshot:(id)snapshot defaultValue:(BOOL)defaultValue +{ + NSNumber *value = [self numberValueForKey:key snapshot:snapshot]; + return nil == value ? @(defaultValue) : @([value boolValue]); +} + ++ (NSDictionary *)frameValueForSnapshot:(id)snapshot +{ + CGRect frame = CGRectZero; + @try { + id value = [snapshot valueForKey:@"frame"]; + if ([value isKindOfClass:NSValue.class] + && strcmp([(NSValue *)value objCType], @encode(CGRect)) == 0) { + [(NSValue *)value getValue:&frame]; + } + } @catch (NSException *exception) { + frame = CGRectZero; + } + if (CGRectIsNull(frame) || CGRectIsInfinite(frame)) { + frame = CGRectZero; + } + return @{ + @"x": @(CGRectGetMinX(frame)), + @"y": @(CGRectGetMinY(frame)), + @"width": @(CGRectGetWidth(frame)), + @"height": @(CGRectGetHeight(frame)), + }; +} + +@end diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift new file mode 100644 index 000000000..671fbb415 --- /dev/null +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift @@ -0,0 +1,227 @@ +import XCTest + +extension RunnerTests { + private static let privateAXSnapshotMaxNodes = 5_000 + /// Deep React Native trees make the AX server reject bulk snapshot requests outright with + /// kAXErrorIllegalArgument once the requested depth crosses a tree-size-dependent limit + /// (observed between depth 56 and 64 on the Bluesky Home feed; the limit moves with live + /// content). Retrying the same request at a shallower depth succeeds, so on failure we walk + /// this ladder instead of giving up. Capped at 4 attempts to bound worst-case latency on + /// apps where the AX surface is genuinely unavailable. + static let privateAXSnapshotDepthLadder = [56, 40, 24, 12] + + func privateAXSnapshotFallback( + app: XCUIApplication, + options: SnapshotOptions, + reason: String + ) -> DataPayload? { + #if os(iOS) && targetEnvironment(simulator) + let requestedDepth = options.depth ?? 64 + var attemptDepths = [requestedDepth] + attemptDepths.append( + contentsOf: Self.privateAXSnapshotDepthLadder.filter { $0 < requestedDepth } + ) + var response: [String: Any] = [:] + var effectiveDepth = requestedDepth + var lastError = "unknown private AX snapshot failure" + for depth in attemptDepths { + response = RunnerAXSnapshotBridge.snapshotTree( + for: app, + maxDepth: depth, + maxNodes: Self.privateAXSnapshotMaxNodes + ) + if response["ok"] as? Bool == true { + effectiveDepth = depth + break + } + lastError = response["error"] as? String ?? lastError + NSLog( + "AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_DEPTH_RETRY depth=%ld error=%@", + depth, + lastError + ) + } + guard response["ok"] as? Bool == true else { + NSLog("AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_FAILED=%@", lastError) + return nil + } + guard let root = response["root"] as? [String: Any] else { + NSLog("AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_FAILED=missing root") + return nil + } + + // The public windows query backing safeSnapshotViewport can fail on the same apps that + // need this fallback, degrading to an infinite viewport that marks off-screen content + // (e.g. closed drawer menus at negative x) as visible and tappable. The private root's + // own frame is the reliable screen rect here. + var viewport = safeSnapshotViewport(app: app) + let rootFrame = privateAXRect(root["frame"]) + if viewport.isInfinite || viewport.isNull || viewport.isEmpty, !rootFrame.isEmpty { + viewport = rootFrame + } + var nodes: [SnapshotNode] = [] + appendPrivateAXNode( + root, + to: &nodes, + options: options, + viewport: viewport, + depth: 0, + parentIndex: nil + ) + if nodes.count <= 1 { + NSLog("AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_SPARSE=%ld", nodes.count) + return nil + } + + let depthLimited = effectiveDepth < requestedDepth + let truncated = (response["truncated"] as? Bool) == true || depthLimited + var message = + "Recovered this snapshot with the fallback accessibility backend after \(reason). This usually means the app publishes an unhealthy accessibility tree (too large or deep to serialize, or containers that hide their children) — fixing the app's accessibility is the real cure. The fallback is simulator-only and may expose a partial tree; treat screenshot as visual truth when this warning appears." + if depthLimited { + message += + " The accessibility server rejected deeper requests; this tree is capped at depth \(effectiveDepth) — re-run with --depth \(effectiveDepth) --scope to inspect deeper content." + } + NSLog( + "AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_USED reason=%@ nodes=%ld depth=%ld truncated=%@", + reason, + nodes.count, + effectiveDepth, + truncated ? "true" : "false" + ) + return DataPayload(message: message, nodes: nodes, truncated: truncated) + #else + return nil + #endif + } + + private func appendPrivateAXNode( + _ rawNode: [String: Any], + to nodes: inout [SnapshotNode], + options: SnapshotOptions, + viewport: CGRect, + depth: Int, + parentIndex: Int? + ) { + if let limit = options.depth, depth > limit { return } + + let rect = privateAXRect(rawNode["frame"]) + let label = privateAXString(rawNode["label"]) + let identifier = privateAXString(rawNode["identifier"]) + let value = privateAXString(rawNode["value"]) + let rawType = privateAXInt(rawNode["type"]) ?? 0 + let typeName = elementTypeName(rawElementType: rawType) + let enabled = privateAXBool(rawNode["enabled"]) ?? true + let visible = isVisibleInViewport(rect, viewport) + let hasContent = !label.isEmpty || !identifier.isEmpty || !value.isEmpty + let isRoot = parentIndex == nil + + let include: Bool + if isRoot { + include = true + } else if options.interactiveOnly && !visible { + include = false + } else if let scope = options.scope?.trimmingCharacters(in: .whitespacesAndNewlines), !scope.isEmpty { + let haystack = [label, identifier, value].joined(separator: "\n") + include = haystack.localizedCaseInsensitiveContains(scope) + } else if options.compact { + include = hasContent || privateAXLikelyInteractive(rawElementType: rawType) + } else { + include = true + } + + let currentIndex: Int? + if include { + currentIndex = nodes.count + nodes.append( + SnapshotNode( + index: nodes.count, + type: typeName, + label: label.isEmpty ? nil : label, + identifier: identifier.isEmpty ? nil : identifier, + value: value.isEmpty ? nil : value, + rect: snapshotRect(from: rect), + enabled: enabled, + focused: privateAXBool(rawNode["focused"]) == true ? true : nil, + selected: privateAXBool(rawNode["selected"]) == true ? true : nil, + hittable: visible && enabled && privateAXLikelyInteractive(rawElementType: rawType), + depth: depth, + parentIndex: parentIndex, + hiddenContentAbove: nil, + hiddenContentBelow: nil + ) + ) + } else { + currentIndex = parentIndex + } + + guard let children = rawNode["children"] as? [[String: Any]] else { + return + } + for child in children { + appendPrivateAXNode( + child, + to: &nodes, + options: options, + viewport: viewport, + depth: depth + 1, + parentIndex: currentIndex + ) + } + } + + private func elementTypeName(rawElementType: Int) -> String { + if let raw = UInt(exactly: rawElementType), + let type = XCUIElement.ElementType(rawValue: raw) + { + return elementTypeName(type) + } + return "Element(\(rawElementType))" + } + + private func privateAXLikelyInteractive(rawElementType: Int) -> Bool { + guard let raw = UInt(exactly: rawElementType), + let type = XCUIElement.ElementType(rawValue: raw) + else { + return false + } + return interactiveTypes.contains(type) || Self.scrollContainerTypes.contains(type) + } + + private func privateAXString(_ value: Any?) -> String { + guard let value else { return "" } + if let string = value as? String { + return string.trimmingCharacters(in: .whitespacesAndNewlines) + } + return String(describing: value).trimmingCharacters(in: .whitespacesAndNewlines) + } + + private func privateAXInt(_ value: Any?) -> Int? { + if let value = value as? Int { return value } + if let value = value as? NSNumber { return value.intValue } + return nil + } + + private func privateAXBool(_ value: Any?) -> Bool? { + if let value = value as? Bool { return value } + if let value = value as? NSNumber { return value.boolValue } + return nil + } + + private func privateAXRect(_ value: Any?) -> CGRect { + guard let frame = value as? [String: Any] else { + return .zero + } + return CGRect( + x: privateAXDouble(frame["x"]) ?? 0, + y: privateAXDouble(frame["y"]) ?? 0, + width: privateAXDouble(frame["width"]) ?? 0, + height: privateAXDouble(frame["height"]) ?? 0 + ) + } + + private func privateAXDouble(_ value: Any?) -> Double? { + if let value = value as? Double { return value } + if let value = value as? NSNumber { return value.doubleValue } + return nil + } +} diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift index 5b90f056e..d3c2c773b 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift @@ -11,6 +11,14 @@ extension RunnerTests { private static let rawSnapshotMaxNodes = 5_000 private static let rawSnapshotTooLargeHint = "Raw iOS snapshot exceeded the runner payload guard. Use regular snapshot for visible UI, or scope/depth-limit raw snapshot when inspecting a large accessibility tree." + private static let publicQueryRecoveryMessage = + "Recovered iOS snapshot through XCTest accessibility element queries after the public snapshot tree was sparse. This usually means the app publishes an unhealthy accessibility tree - fixing the app accessibility is the real cure. The recovered nodes are a flattened view of on-screen controls; treat screenshot as visual truth when this warning appears." + private static let structuralOnlyNodeTypes: Set = [ + "Application", + "Window", + "Other", + "ScrollView" + ] private static let collapsedTabCandidateTypes: Set = [ .button, .link, @@ -18,7 +26,7 @@ extension RunnerTests { .other, .staticText ] - private static let scrollContainerTypes: Set = [ + static let scrollContainerTypes: Set = [ .collectionView, .scrollView, .table @@ -99,12 +107,18 @@ extension RunnerTests { } func snapshotFast(app: XCUIApplication, options: SnapshotOptions) throws -> DataPayload { - if options.interactiveOnly && options.compact { - return snapshotFlatInteractive(app: app, options: options) - } if let blocking = blockingSystemAlertSnapshot() { return blocking } + if options.interactiveOnly && options.compact { + let payload = snapshotFlatInteractive(app: app, options: options) + return snapshotWithPrivateAXFallbackIfSparse( + payload, + app: app, + options: options, + reason: "compact interactive XCTest snapshot was sparse" + ) + } let capture = try captureSnapshotTraversalContext( app: app, @@ -243,10 +257,128 @@ extension RunnerTests { } - return DataPayload( + let payload = DataPayload( nodes: applyHiddenContentHints(hiddenContentHintsByNodeIndex, to: nodes), truncated: false ) + return snapshotWithFallbackIfSparse( + payload, + app: app, + options: options, + reason: "XCTest snapshot returned a sparse application/window tree" + ) + } + + private func snapshotWithFallbackIfSparse( + _ payload: DataPayload, + app: XCUIApplication, + options: SnapshotOptions, + reason: String + ) -> DataPayload { + guard Self.snapshotPayloadNeedsRecovery(payload) else { + return payload + } + if let fallback = publicQuerySnapshotFallback( + app: app, + options: options, + reason: reason + ) { + return fallback + } + return betterSnapshotPayload( + payload, + recovered: privateAXSnapshotFallback(app: app, options: options, reason: reason) + ) + } + + private func snapshotWithPrivateAXFallbackIfSparse( + _ payload: DataPayload, + app: XCUIApplication, + options: SnapshotOptions, + reason: String + ) -> DataPayload { + guard Self.snapshotPayloadNeedsRecovery(payload) else { + return payload + } + return betterSnapshotPayload( + payload, + recovered: privateAXSnapshotFallback(app: app, options: options, reason: reason) + ) + } + + /// A payload needs recovery when the tree is structural-only, OR when the capture was cut + /// off by a budget/deadline with almost nothing collected. The second condition matters on + /// large React Native trees: the typed-query sweep can resolve one or two stray controls + /// before its deadline, which defeats an all-structural check while the payload is still + /// useless in practice. A legitimately minimal screen finishes the sweep without truncation, + /// so it never pays for recovery. + static let sparseRecoveryTruncatedNodeThreshold = 8 + + static func snapshotPayloadNeedsRecovery(_ payload: DataPayload) -> Bool { + guard let nodes = payload.nodes, !nodes.isEmpty else { return false } + if isSparseApplicationWindowTree(nodes) { return true } + return payload.truncated == true && nodes.count <= sparseRecoveryTruncatedNodeThreshold + } + + /// Keeps the original payload unless the recovered tree actually carries more nodes — + /// recovery must never replace a partial-but-real capture with something thinner. + private func betterSnapshotPayload( + _ payload: DataPayload, + recovered: DataPayload? + ) -> DataPayload { + guard let recovered, let recoveredNodes = recovered.nodes, + recoveredNodes.count > (payload.nodes?.count ?? 0) + else { + return payload + } + return recovered + } + + private static func isSparseApplicationWindowTree(_ nodes: [SnapshotNode]) -> Bool { + guard !nodes.isEmpty else { return false } + return nodes.allSatisfy { node in + // Application/Window labels are just the app/window name, and full-screen roots + // compute as hittable; neither says anything about tree health, so neither counts + // as content for these types (a labeled app+window pair is still a sparse tree). + let isRootContainer = node.type == "Application" || node.type == "Window" + let hasContent = (!isRootContainer && node.label?.isEmpty == false) + || node.identifier?.isEmpty == false + || node.value?.isEmpty == false + return !hasContent + && (isRootContainer || !node.hittable) + && Self.structuralOnlyNodeTypes.contains(node.type) + } + } + + private func publicQuerySnapshotFallback( + app: XCUIApplication, + options: SnapshotOptions, + reason: String + ) -> DataPayload? { + let fallback = snapshotFlatInteractive( + app: app, + options: SnapshotOptions( + interactiveOnly: false, + compact: options.compact, + depth: options.depth, + scope: options.scope, + raw: false + ) + ) + guard let nodes = fallback.nodes, !Self.isSparseApplicationWindowTree(nodes) else { + return nil + } + NSLog( + "AGENT_DEVICE_RUNNER_PUBLIC_QUERY_SNAPSHOT_USED reason=%@ nodes=%ld truncated=%@", + reason, + nodes.count, + fallback.truncated == true ? "true" : "false" + ) + return DataPayload( + message: Self.publicQueryRecoveryMessage, + nodes: nodes, + truncated: true + ) } func snapshotRaw(app: XCUIApplication, options: SnapshotOptions) throws -> DataPayload { @@ -307,7 +439,12 @@ extension RunnerTests { } try walk(context.rootSnapshot, depth: 0, parentIndex: nil) - return DataPayload(nodes: nodes, truncated: false) + return snapshotWithPrivateAXFallbackIfSparse( + DataPayload(nodes: nodes, truncated: false), + app: app, + options: options, + reason: "XCTest raw snapshot returned a sparse application/window tree" + ) } private func snapshotFlatInteractive(app: XCUIApplication, options: SnapshotOptions) -> DataPayload { @@ -401,6 +538,15 @@ extension RunnerTests { } return .context(context) } catch let failure as SnapshotCaptureFailure { + if Self.isAxSnapshotFailure(failure), + let fallback = privateAXSnapshotFallback( + app: app, + options: options, + reason: failure.message + ) + { + return .fallback(fallback) + } if let fallback = snapshotDepthLimitedAccessibilityFallback( app: app, options: options, @@ -513,6 +659,120 @@ extension RunnerTests { XCTAssertTrue(message.contains(Self.axSnapshotHint)) } + func testSparseApplicationWindowTreeDetectionIsConservative() { + let root = compactInteractiveRootNode(rect: .zero) + func node( + index: Int, + type: String, + label: String? = nil, + identifier: String? = nil, + value: String? = nil, + hittable: Bool = false + ) -> SnapshotNode { + SnapshotNode( + index: index, + type: type, + label: label, + identifier: identifier, + value: value, + rect: snapshotRect(from: .zero), + enabled: true, + focused: nil, + selected: nil, + hittable: hittable, + depth: 1, + parentIndex: 0, + hiddenContentAbove: nil, + hiddenContentBelow: nil + ) + } + let window = node(index: 1, type: "Window") + let structuralOther = node(index: 2, type: "Other") + let structuralScroll = node(index: 3, type: "ScrollView") + let labeledOther = node(index: 4, type: "Other", label: "Visible content") + let identifiedOther = node(index: 5, type: "Other", identifier: "test-id") + let valuedOther = node(index: 6, type: "Other", value: "Selected") + let hittableOther = node(index: 7, type: "Other", hittable: true) + let button = node( + index: 8, + type: "Button", + label: "Sign in", + hittable: true + ) + + XCTAssertTrue(Self.isSparseApplicationWindowTree([root])) + XCTAssertTrue(Self.isSparseApplicationWindowTree([root, window])) + XCTAssertTrue(Self.isSparseApplicationWindowTree([root, window, structuralOther, structuralScroll])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, labeledOther])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, identifiedOther])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, valuedOther])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, hittableOther])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, button])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, window, button])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([])) + // App/window name labels and full-screen-root hittability are not content: a labeled, + // hittable Application root over a bare Window is still a sparse tree (a shape seen on + // production React Native login screens behind full-screen modal overlays). + let labeledHittableRoot = node( + index: 0, type: "Application", label: "Example App", hittable: true) + XCTAssertTrue(Self.isSparseApplicationWindowTree([labeledHittableRoot, window])) + XCTAssertFalse( + Self.isSparseApplicationWindowTree([ + labeledHittableRoot, node(index: 1, type: "Application", identifier: "custom-id"), + ]) + ) + } + + func testSnapshotPayloadNeedsRecoveryOnDeadlineTruncatedNearEmptySweep() { + let root = compactInteractiveRootNode(rect: .zero) + func node(index: Int, label: String) -> SnapshotNode { + SnapshotNode( + index: index, + type: "Button", + label: label, + identifier: nil, + value: nil, + rect: snapshotRect(from: .zero), + enabled: true, + focused: nil, + selected: nil, + hittable: true, + depth: 1, + parentIndex: 0, + hiddenContentAbove: nil, + hiddenContentBelow: nil + ) + } + let button = node(index: 1, label: "Home") + + // Deadline-truncated sweep with a stray control: still needs recovery. + XCTAssertTrue( + Self.snapshotPayloadNeedsRecovery(DataPayload(nodes: [root, button], truncated: true)) + ) + // Structural-only tree needs recovery regardless of truncation. + XCTAssertTrue( + Self.snapshotPayloadNeedsRecovery(DataPayload(nodes: [root], truncated: false)) + ) + // A completed sweep on a legitimately minimal screen does not. + XCTAssertFalse( + Self.snapshotPayloadNeedsRecovery(DataPayload(nodes: [root, button], truncated: false)) + ) + // A truncated but reasonably populated sweep does not. + var populated: [SnapshotNode] = [root] + for index in 1...Self.sparseRecoveryTruncatedNodeThreshold { + populated.append(node(index: index, label: "b\(index)")) + } + XCTAssertFalse( + Self.snapshotPayloadNeedsRecovery(DataPayload(nodes: populated, truncated: true)) + ) + XCTAssertFalse(Self.snapshotPayloadNeedsRecovery(DataPayload(nodes: [], truncated: true))) + } + + func testPublicQueryRecoveryMessageExplainsFlattenedFallback() { + XCTAssertTrue(Self.publicQueryRecoveryMessage.contains("XCTest accessibility element queries")) + XCTAssertTrue(Self.publicQueryRecoveryMessage.contains("flattened")) + } + func testRawSnapshotTooLargeFailureIsStructured() { let failure = rawSnapshotTooLargeFailure(nodeCount: Self.rawSnapshotMaxNodes + 1) @@ -692,7 +952,7 @@ extension RunnerTests { return nil } - private func safeSnapshotViewport(app: XCUIApplication) -> CGRect { + func safeSnapshotViewport(app: XCUIApplication) -> CGRect { safely("SNAPSHOT_VIEWPORT", CGRect.infinite) { snapshotViewport(app: app) } } @@ -725,6 +985,10 @@ extension RunnerTests { || (normalized.contains("illegal argument") && normalized.contains("snapshot")) } + private static func isAxSnapshotFailure(_ failure: SnapshotCaptureFailure) -> Bool { + failure.code == Self.axSnapshotErrorCode || isAxIllegalArgument(failure.message) + } + private func evaluateSnapshot( _ snapshot: XCUIElementSnapshot, in context: SnapshotTraversalContext @@ -855,7 +1119,7 @@ extension RunnerTests { return nil } - private func isVisibleInViewport(_ rect: CGRect, _ viewport: CGRect) -> Bool { + func isVisibleInViewport(_ rect: CGRect, _ viewport: CGRect) -> Bool { if rect.isNull || rect.isEmpty { return false } return rect.intersects(viewport) } diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift index 01f0344f2..b2dd8b2ec 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift @@ -29,12 +29,27 @@ extension RunnerTests { } var nodes: [SnapshotNode] = [modalNode] + for content in informativeElements(in: modal, excluding: actions) { + guard let contentNode = safeMakeSnapshotNode( + element: content, + index: nodes.count, + type: elementTypeName(content.elementType), + depth: 1, + parentIndex: 0, + hittableOverride: false + ) else { + continue + } + nodes.append(contentNode) + } + for action in actions { guard let actionNode = safeMakeSnapshotNode( element: action, index: nodes.count, type: elementTypeName(action.elementType), depth: 1, + parentIndex: 0, hittableOverride: true ) else { continue @@ -101,9 +116,7 @@ extension RunnerTests { func actionableElements(in element: XCUIElement) -> [XCUIElement] { var seen = Set() var actions: [XCUIElement] = [] - let descendants = safeElementsQuery { - element.descendants(matching: .any).allElementsBoundByIndex - } + let descendants = actionableTypes.flatMap { modalDescendants(in: element, matching: $0) } for candidate in descendants { if !safeIsActionableCandidate(candidate, seen: &seen) { continue } actions.append(candidate) @@ -124,6 +137,61 @@ extension RunnerTests { } } + private func informativeElements(in element: XCUIElement, excluding actions: [XCUIElement]) -> [XCUIElement] { + let actionKeys = Set(actions.map(systemModalElementKey)) + var seen = Set() + var contents: [XCUIElement] = [] + let descendants = readableSystemModalTypes.flatMap { + modalDescendants(in: element, matching: $0, limit: 2) + } + for candidate in descendants { + guard let key = safeInformativeElementKey(candidate, actionKeys: actionKeys) else { + continue + } + if seen.contains(key) { continue } + seen.insert(key) + contents.append(candidate) + } + return contents + } + + private var readableSystemModalTypes: [XCUIElement.ElementType] { + [.staticText, .textView] + } + + private func modalDescendants( + in element: XCUIElement, + matching type: XCUIElement.ElementType, + limit: Int? = nil + ) -> [XCUIElement] { + let elements = safeElementsQuery { + element.descendants(matching: type).allElementsBoundByIndex + } + guard let limit else { + return elements + } + return Array(elements.prefix(limit)) + } + + private func safeInformativeElementKey(_ candidate: XCUIElement, actionKeys: Set) -> String? { + safely("MODAL_CONTENT") { () -> String? in + let key = systemModalElementKey(candidate) + if actionKeys.contains(key) { return nil } + if actionableTypes.contains(candidate.elementType) { return nil } + if !candidate.exists { return nil } + let frame = candidate.frame + if frame.isNull || frame.isEmpty { return nil } + let label = candidate.label.trimmingCharacters(in: .whitespacesAndNewlines) + if label.isEmpty { return nil } + return key + } + } + + private func systemModalElementKey(_ element: XCUIElement) -> String { + let frame = element.frame + return "\(element.elementType.rawValue)-\(frame.origin.x)-\(frame.origin.y)-\(frame.size.width)-\(frame.size.height)-\(element.label)-\(element.identifier)" + } + private func preferredSystemModalTitle(_ element: XCUIElement) -> String { let label = element.label if !label.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { @@ -143,6 +211,7 @@ extension RunnerTests { labelOverride: String? = nil, identifierOverride: String? = nil, depth: Int, + parentIndex: Int? = nil, hittableOverride: Bool? = nil ) -> SnapshotNode { let label = (labelOverride ?? element.label).trimmingCharacters(in: .whitespacesAndNewlines) @@ -155,11 +224,11 @@ extension RunnerTests { value: nil, rect: snapshotRect(from: element.frame), enabled: element.isEnabled, - focused: elementHasFocus(element) ? true : nil, - selected: element.isSelected ? true : nil, + focused: nil, + selected: nil, hittable: hittableOverride ?? element.isHittable, depth: depth, - parentIndex: nil, + parentIndex: parentIndex, hiddenContentAbove: nil, hiddenContentBelow: nil ) @@ -172,6 +241,7 @@ extension RunnerTests { labelOverride: String? = nil, identifierOverride: String? = nil, depth: Int, + parentIndex: Int? = nil, hittableOverride: Bool? = nil ) -> SnapshotNode? { safely("MODAL_NODE") { @@ -182,6 +252,7 @@ extension RunnerTests { labelOverride: labelOverride, identifierOverride: identifierOverride, depth: depth, + parentIndex: parentIndex, hittableOverride: hittableOverride ) } diff --git a/scripts/write-xcuitest-cache-metadata.mjs b/scripts/write-xcuitest-cache-metadata.mjs index 13bf818e4..f27d7f3c0 100644 --- a/scripts/write-xcuitest-cache-metadata.mjs +++ b/scripts/write-xcuitest-cache-metadata.mjs @@ -90,6 +90,8 @@ function isRunnerSourceFile(fileName, filePath) { '.json', '.png', '.swift', + '.m', + '.h', '.plist', '.entitlements', '.xctestplan', diff --git a/src/__tests__/runtime-snapshot.test.ts b/src/__tests__/runtime-snapshot.test.ts index 93df48239..b4c7f5288 100644 --- a/src/__tests__/runtime-snapshot.test.ts +++ b/src/__tests__/runtime-snapshot.test.ts @@ -144,6 +144,53 @@ test('runtime snapshot warns when iOS compact interactive output is root-only', ]); }); +test('runtime snapshot flags a merged accessibility leaf and surfaces backend warnings', async () => { + const mergedLabel = Array.from({ length: 30 }, (_, i) => `Row ${i}, Tap`).join(', '); + const device = createSnapshotOnlyDevice({ + nodes: [ + { ref: 'e1', index: 0, depth: 0, type: 'Application', label: 'App' }, + { ref: 'e2', index: 1, depth: 1, parentIndex: 0, type: 'Other', label: mergedLabel }, + { ref: 'e3', index: 2, depth: 1, parentIndex: 0, type: 'Button', label: 'Ok' }, + ], + truncated: false, + backend: 'xctest', + warnings: [ + 'Recovered this snapshot with the fallback accessibility backend after sparse tree.', + ], + }); + + const result = await device.capture.snapshot({ session: 'default' }); + + assert.equal(result.warnings?.length, 2); + assert.equal( + result.warnings?.[0], + 'Recovered this snapshot with the fallback accessibility backend after sparse tree.', + ); + assert.match(String(result.warnings?.[1]), /e2 \[Other\] merges ~60 labels/); + assert.match(String(result.warnings?.[1]), /marks a container as accessible/); + assert.match(String(result.warnings?.[1]), /screenshot as visual truth/); +}); + +test('runtime snapshot does not flag prose text or labeled containers with children', async () => { + const prose = Array.from({ length: 30 }, (_, i) => `clause ${i}`).join(', '); + const device = createSnapshotOnlyDevice({ + nodes: [ + { ref: 'e1', index: 0, depth: 0, type: 'Application', label: 'App' }, + // Long comma-joined prose on a text node: content, not a collapsed container. + { ref: 'e2', index: 1, depth: 1, parentIndex: 0, type: 'StaticText', label: prose }, + // Same label shape on a container WITH children: not a merged leaf. + { ref: 'e3', index: 2, depth: 1, parentIndex: 0, type: 'Other', label: prose }, + { ref: 'e4', index: 3, depth: 2, parentIndex: 2, type: 'Button', label: 'Ok' }, + ], + truncated: false, + backend: 'xctest', + }); + + const result = await device.capture.snapshot({ session: 'default' }); + + assert.deepEqual(result.warnings ?? [], []); +}); + test('runtime snapshot does not warn for a normal iOS compact interactive output', async () => { const device = createSnapshotOnlyDevice({ nodes: [ diff --git a/src/commands/capture-snapshot.ts b/src/commands/capture-snapshot.ts index 45868a56b..9896a703d 100644 --- a/src/commands/capture-snapshot.ts +++ b/src/commands/capture-snapshot.ts @@ -225,6 +225,8 @@ function buildSnapshotWarnings(params: { const reactNativeOverlayWarning = formatReactNativeOverlayWarning(params.snapshot.nodes); if (reactNativeOverlayWarning) warnings.push(reactNativeOverlayWarning); + warnings.push(...buildMergedAccessibilityLeafWarnings(params.snapshot.nodes)); + const recentDropWarning = formatRecentSnapshotDropWarning(params); if (recentDropWarning) warnings.push(recentDropWarning); @@ -253,6 +255,34 @@ function buildSparseIosInteractiveWarnings(params: { ]; } +const MERGED_LEAF_MIN_SEGMENTS = 10; + +/** + * A leaf whose label joins many short segments is the signature of a container marked as an + * accessibility element: the platform folds every descendant into one merged node, so the + * children exist on screen but cannot be addressed by assistive tech or automation. This is + * an app-side accessibility bug, not a snapshot failure — the same merged element is all + * VoiceOver users get. + */ +function buildMergedAccessibilityLeafWarnings(nodes: SnapshotState['nodes']): string[] { + const parents = new Set( + nodes.map((node) => node.parentIndex).filter((index) => index !== undefined), + ); + return nodes + .filter((node) => { + if (parents.has(node.index)) return false; + const type = node.type?.toLowerCase() ?? ''; + if (type.includes('text')) return false; + const label = node.label ?? ''; + return label.split(', ').length > MERGED_LEAF_MIN_SEGMENTS; + }) + .map((node) => { + const segments = (node.label ?? '').split(', ').length; + const name = node.identifier ? ` (${node.identifier})` : ''; + return `@${node.ref} [${node.type ?? 'element'}]${name} merges ~${segments} labels into a single accessibility element. The app likely marks a container as accessible, which hides every descendant from assistive tech and automation — the children cannot be addressed individually. Fix the app's accessibility (mark the rows, not the container); until then use screenshot as visual truth and coordinate taps.`; + }); +} + function buildEmptyAndroidInteractiveWarnings(params: { result: BackendSnapshotResult; snapshot: SnapshotState; diff --git a/src/core/interactors/apple.ts b/src/core/interactors/apple.ts index 744adcb01..46f56d8b0 100644 --- a/src/core/interactors/apple.ts +++ b/src/core/interactors/apple.ts @@ -18,7 +18,7 @@ import { withDiagnosticTimer } from '../../utils/diagnostics.ts'; import type { DeviceInfo } from '../../utils/device.ts'; import { AppError } from '../../utils/errors.ts'; import type { RawSnapshotNode } from '../../utils/snapshot.ts'; -import type { Interactor, RunnerContext, SnapshotResult } from '../interactor-types.ts'; +import type { Interactor, RunnerContext } from '../interactor-types.ts'; export function createAppleInteractor( device: DeviceInfo, @@ -70,7 +70,12 @@ export function createAppleInteractor( if (nodes.length === 0 && device.kind === 'simulator') { throw new AppError('COMMAND_FAILED', 'XCTest snapshot returned 0 nodes on iOS simulator.'); } - return { nodes, truncated: result.truncated ?? false, backend: 'xctest' }; + return { + nodes, + truncated: result.truncated ?? false, + backend: 'xctest', + ...(result.message ? { warnings: [result.message] } : {}), + }; }, back: async (mode) => { if (device.target === 'tv') { @@ -127,11 +132,19 @@ export function createAppleInteractor( }; } -function readAppleSnapshotResult( - result: Record, -): Pick { +function readAppleSnapshotResult(result: Record): { + nodes?: RawSnapshotNode[]; + truncated?: boolean; + message?: string; +} { return { nodes: Array.isArray(result.nodes) ? (result.nodes as RawSnapshotNode[]) : undefined, truncated: typeof result.truncated === 'boolean' ? result.truncated : undefined, + // Runner-attached context (e.g. "recovered with the fallback accessibility backend") + // surfaces as a snapshot warning so fallbacks are never silent. + message: + typeof result.message === 'string' && result.message.trim().length > 0 + ? result.message + : undefined, }; } diff --git a/src/daemon/handlers/__tests__/find.test.ts b/src/daemon/handlers/__tests__/find.test.ts index 4a64a6772..8f9773d22 100644 --- a/src/daemon/handlers/__tests__/find.test.ts +++ b/src/daemon/handlers/__tests__/find.test.ts @@ -160,6 +160,117 @@ test('handleFindCommands click prefers on-screen duplicate text matches', async expect(invokeCalls[0]!.positionals?.[0]).toBe('@e3'); }); +test('handleFindCommands click retries full snapshot when iOS compact snapshot is sparse', async () => { + const snapshotResponses = [ + { + backend: 'xctest', + nodes: [ + { + index: 0, + type: 'Application', + rect: { x: 0, y: 0, width: 0, height: 0 }, + }, + ], + }, + { + backend: 'xctest', + nodes: [ + { + index: 0, + type: 'Application', + hittable: false, + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + index: 1, + type: 'Button', + label: 'Search', + hittable: true, + rect: { x: 80, y: 792, width: 78, height: 48 }, + parentIndex: 0, + }, + ], + }, + ]; + mockDispatch.mockImplementation(async (_device, command) => { + if (command === 'snapshot') return snapshotResponses.shift() ?? { nodes: [] }; + return {}; + }); + + const { response, invokeCalls } = await runFindClickScenario({ + positionals: ['Search', 'click'], + }); + + expect(response.ok).toBe(true); + expect(invokeCalls[0]!.positionals?.[0]).toBe('@e2'); + const snapshotCalls = mockDispatch.mock.calls.filter((call) => call[1] === 'snapshot'); + expect(snapshotCalls).toHaveLength(2); + expect(snapshotCalls[0]![4]).toMatchObject({ + snapshotInteractiveOnly: true, + snapshotCompact: true, + }); + expect(snapshotCalls[1]![4]).toMatchObject({ + snapshotInteractiveOnly: false, + snapshotCompact: false, + }); +}); + +test('handleFindCommands click scopes full retry when unscoped iOS fallback fails', async () => { + const snapshotResponses = [ + { + backend: 'xctest', + nodes: [ + { + index: 0, + type: 'Application', + rect: { x: 0, y: 0, width: 0, height: 0 }, + }, + ], + }, + new Error('unscoped snapshot failed'), + { + backend: 'xctest', + nodes: [ + { + index: 0, + type: 'Application', + hittable: false, + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + index: 1, + type: 'Button', + label: 'Search', + hittable: true, + rect: { x: 80, y: 792, width: 78, height: 48 }, + parentIndex: 0, + }, + ], + }, + ]; + mockDispatch.mockImplementation(async (_device, command) => { + if (command !== 'snapshot') return {}; + const response = snapshotResponses.shift(); + if (response instanceof Error) throw response; + return response ?? { nodes: [] }; + }); + + const { response, invokeCalls } = await runFindClickScenario({ + positionals: ['Search', 'click'], + }); + + expect(response.ok).toBe(true); + expect(invokeCalls[0]!.positionals?.[0]).toBe('@e1'); + expect(response.ok ? response.data : undefined).toMatchObject({ x: 119, y: 816 }); + const snapshotCalls = mockDispatch.mock.calls.filter((call) => call[1] === 'snapshot'); + expect(snapshotCalls).toHaveLength(3); + expect(snapshotCalls[2]![4]).toMatchObject({ + snapshotInteractiveOnly: false, + snapshotCompact: false, + snapshotScope: 'Search', + }); +}); + test('handleFindCommands click prefers semantic controls over matching containers', async () => { const { response, invokeCalls } = await runFindClickScenario({ positionals: ['Later', 'click'], diff --git a/src/daemon/handlers/find.ts b/src/daemon/handlers/find.ts index 457ffc27b..13092544f 100644 --- a/src/daemon/handlers/find.ts +++ b/src/daemon/handlers/find.ts @@ -2,7 +2,7 @@ import { dispatchCommand, resolveTargetDevice } from '../../core/dispatch.ts'; import { sleep } from '../../utils/timeouts.ts'; import { findBestMatchesByLocator, parseFindArgs, type FindLocator } from '../../utils/finders.ts'; import { centerOfRect, type SnapshotState } from '../../utils/snapshot.ts'; -import type { DaemonInvokeFn, DaemonRequest, DaemonResponse } from '../types.ts'; +import type { DaemonInvokeFn, DaemonRequest, DaemonResponse, SessionState } from '../types.ts'; import { SessionStore } from '../session-store.ts'; import { contextFromFlags } from '../context.ts'; import { ensureDeviceReady } from '../device-ready.ts'; @@ -90,42 +90,18 @@ export async function handleFindCommands(params: { // Interaction targets need the full compact tree so duplicate labels can be // resolved against viewport visibility before an off-screen subtree wins. const scope = shouldScopeFind(locator) && !requiresRect ? query : undefined; - const interactiveOnly = requiresRect; - let lastSnapshotAt = 0; - let lastNodes: SnapshotState['nodes'] | null = null; - const fetchNodes = async (): Promise<{ - nodes: SnapshotState['nodes']; - truncated?: boolean; - backend?: SnapshotState['backend']; - }> => { - const now = Date.now(); - // Re-use a snapshot captured within the last 750 ms to avoid redundant dumps during - // rapid find iterations. Skipped when Android freshness tracking is active, because - // the cached tree may already be stale from a recent navigation action. - if (lastNodes && now - lastSnapshotAt < 750 && !getActiveAndroidSnapshotFreshness(session)) { - return { nodes: lastNodes }; - } - const { snapshot } = await captureSnapshot({ - device, - session, - flags: { - ...req.flags, - snapshotInteractiveOnly: interactiveOnly, - snapshotCompact: interactiveOnly, - }, - outPath: req.flags?.out, - logPath, - snapshotScope: scope, - }); - const nodes = snapshot.nodes; - lastSnapshotAt = now; - lastNodes = nodes; - if (session) { - setSessionSnapshot(session, snapshot); - sessionStore.set(sessionName, session); - } - return { nodes, truncated: snapshot.truncated, backend: snapshot.backend }; - }; + const fetchNodes = createFindNodeFetcher({ + device, + session, + req, + logPath, + locator, + query, + scope, + interactiveOnly: requiresRect, + sessionStore, + sessionName, + }); const ctx: FindContext = { req, @@ -174,6 +150,11 @@ export async function handleFindCommands(params: { return handler ? handler() : null; } +function isSparseIosInteractiveSnapshot(snapshot: SnapshotState): boolean { + if (snapshot.backend !== 'xctest' || snapshot.nodes.length !== 1) return false; + return snapshot.nodes[0]?.type === 'Application'; +} + // --- Per-action handlers --- function isReadOnlyFindAction(action: string): boolean { @@ -186,6 +167,86 @@ function findActionRequiresRect(action: string): boolean { return action === 'click' || action === 'focus' || action === 'fill' || action === 'type'; } +type FindNodeFetcher = () => Promise<{ + nodes: SnapshotState['nodes']; + truncated?: boolean; + backend?: SnapshotState['backend']; +}>; + +function createFindNodeFetcher(params: { + device: SessionState['device']; + session: SessionState | undefined; + req: DaemonRequest; + logPath: string; + locator: FindLocator; + query: string; + scope: string | undefined; + interactiveOnly: boolean; + sessionStore: SessionStore; + sessionName: string; +}): FindNodeFetcher { + const { device, session, req, logPath, locator, query, scope, interactiveOnly } = params; + const { sessionStore, sessionName } = params; + let lastSnapshotAt = 0; + let lastNodes: SnapshotState['nodes'] | null = null; + const capture = async (snapshotScope: string | undefined, interactive: boolean) => { + const { snapshot } = await captureSnapshot({ + device, + session, + flags: { + ...req.flags, + snapshotInteractiveOnly: interactive, + snapshotCompact: interactive, + }, + outPath: req.flags?.out, + logPath, + snapshotScope, + }); + return snapshot; + }; + return async () => { + const now = Date.now(); + // Re-use a snapshot captured within the last 750 ms to avoid redundant dumps during + // rapid find iterations. Skipped when Android freshness tracking is active, because + // the cached tree may already be stale from a recent navigation action. + if (lastNodes && now - lastSnapshotAt < 750 && !getActiveAndroidSnapshotFreshness(session)) { + return { nodes: lastNodes }; + } + let snapshot = await capture(scope, interactiveOnly); + if (interactiveOnly && isSparseIosInteractiveSnapshot(snapshot)) { + snapshot = await recoverSparseInteractiveSnapshot({ capture, locator, query, scope }); + } + const nodes = snapshot.nodes; + lastSnapshotAt = now; + lastNodes = nodes; + if (session) { + setSessionSnapshot(session, snapshot); + sessionStore.set(sessionName, session); + } + return { nodes, truncated: snapshot.truncated, backend: snapshot.backend }; + }; +} + +/** + * A sparse compact-interactive iOS snapshot usually means the runner could not enumerate the + * tree, not that the screen is empty: retry with a full snapshot, and when even unscoped AX + * serialization fails on unrelated content, with a query-scoped full snapshot. + */ +async function recoverSparseInteractiveSnapshot(params: { + capture: (scope: string | undefined, interactive: boolean) => Promise; + locator: FindLocator; + query: string; + scope: string | undefined; +}): Promise { + const { capture, locator, query, scope } = params; + try { + return await capture(scope, false); + } catch (error) { + if (!shouldScopeFind(locator)) throw error; + return await capture(query, false); + } +} + function resolveFindMatch(params: { nodes: SnapshotState['nodes']; locator: FindLocator; @@ -194,7 +255,10 @@ function resolveFindMatch(params: { flags: DaemonRequest['flags']; }): FindMatchResult { const { nodes, locator, query, requiresRect, flags } = params; - const bestMatches = findBestMatchesByLocator(nodes, locator, query, { + const searchableNodes = requiresRect + ? nodes.filter((node) => !isRootInteractionContainer(node, nodes[0])) + : nodes; + const bestMatches = findBestMatchesByLocator(searchableNodes, locator, query, { requireRect: requiresRect, }); if (requiresRect) { @@ -202,13 +266,11 @@ function resolveFindMatch(params: { } if (requiresRect && bestMatches.matches.length > 1) { - if (flags?.findFirst) { - bestMatches.matches = [bestMatches.matches[0]!]; - } else if (flags?.findLast) { - bestMatches.matches = [bestMatches.matches[bestMatches.matches.length - 1]!]; - } else { + const narrowed = narrowMultipleMatches(bestMatches.matches, flags); + if (!narrowed) { return { ok: false, response: buildAmbiguousMatchError(bestMatches.matches, locator, query) }; } + bestMatches.matches = narrowed; } const node = bestMatches.matches[0] ?? null; @@ -221,6 +283,15 @@ function resolveFindMatch(params: { return { ok: true, node }; } +function narrowMultipleMatches( + matches: SnapshotState['nodes'], + flags: DaemonRequest['flags'], +): SnapshotState['nodes'] | null { + if (flags?.findFirst) return [matches[0]!]; + if (flags?.findLast) return [matches[matches.length - 1]!]; + return null; +} + function preferOnscreenMatches( matches: SnapshotState['nodes'], nodes: SnapshotState['nodes'], @@ -260,17 +331,27 @@ function interactiveMatchScore( ): number { const resolution = resolveActionableTouchResolution(nodes, node); if (resolution.reason === 'covered') return 0; - if (resolution.reason === 'semantic-target' && resolution.node.rect) return 4; - if (resolution.reason === 'same-rect-descendant' && resolution.node.rect) return 4; + const resolved = resolvedTouchScore(resolution, nodes[0]); + if (resolved > 0) return resolved; + if (node.hittable && node.rect && !isRootInteractionContainer(node, nodes[0])) return 3; + return node.rect ? 1 : 0; +} + +function resolvedTouchScore( + resolution: ReturnType, + root: SnapshotState['nodes'][number] | undefined, +): number { + if (!resolution.node.rect) return 0; + if (resolution.reason === 'semantic-target' || resolution.reason === 'same-rect-descendant') { + return 4; + } if ( resolution.reason === 'hittable-ancestor' && - resolution.node.rect && - !isRootInteractionContainer(resolution.node, nodes[0]) + !isRootInteractionContainer(resolution.node, root) ) { return 2; } - if (node.hittable && node.rect && !isRootInteractionContainer(node, nodes[0])) return 3; - return node.rect ? 1 : 0; + return 0; } function rectArea(node: SnapshotState['nodes'][number]): number { @@ -281,7 +362,9 @@ function resolveInteractiveMatchNode( nodes: SnapshotState['nodes'], node: SnapshotState['nodes'][number], ): SnapshotState['nodes'][number] { - return resolveActionableTouchNode(nodes, node); + const resolved = resolveActionableTouchNode(nodes, node); + if (isRootInteractionContainer(resolved, nodes[0]) && node.rect) return node; + return resolved; } function isRootInteractionContainer( @@ -291,11 +374,18 @@ function isRootInteractionContainer( if (!root?.rect || !node.rect) return false; const type = node.type?.toLowerCase() ?? ''; if (!type.includes('application') && !type.includes('window')) return false; + return rectsMatch(node.rect, root.rect); +} + +function rectsMatch( + left: NonNullable, + right: NonNullable, +): boolean { return ( - node.rect.x === root.rect.x && - node.rect.y === root.rect.y && - node.rect.width === root.rect.width && - node.rect.height === root.rect.height + left.x === right.x && + left.y === right.y && + left.width === right.width && + left.height === right.height ); } diff --git a/src/daemon/handlers/snapshot-capture.ts b/src/daemon/handlers/snapshot-capture.ts index f16a77084..8691c962e 100644 --- a/src/daemon/handlers/snapshot-capture.ts +++ b/src/daemon/handlers/snapshot-capture.ts @@ -63,6 +63,7 @@ type SnapshotData = { backend?: SnapshotBackend; analysis?: AndroidSnapshotAnalysis; androidSnapshot?: AndroidSnapshotBackendMetadata; + warnings?: string[]; }; type SnapshotAttempt = { @@ -76,6 +77,7 @@ type CaptureSnapshotResult = { analysis?: AndroidSnapshotAnalysis; androidSnapshot?: AndroidSnapshotBackendMetadata; freshness?: AndroidFreshnessCaptureMeta; + warnings?: string[]; }; type AndroidFreshnessReason = 'empty-interactive' | 'sharp-drop' | 'stuck-route'; @@ -94,6 +96,7 @@ export async function captureSnapshot( snapshot: buildSnapshotState(data, resolveSnapshotStateFlags(params)), analysis: data.analysis, androidSnapshot: data.androidSnapshot, + warnings: data.warnings, }; } @@ -245,6 +248,7 @@ async function captureAndroidFreshnessAwareSnapshot( analysis: latest.data.analysis, androidSnapshot: latest.data.androidSnapshot, freshness: latest.freshness, + warnings: latest.data.warnings, }; } @@ -298,6 +302,7 @@ async function capturePostGestureAwareSnapshot( analysis: latest.data.analysis, androidSnapshot: latest.data.androidSnapshot, freshness: latest.freshness, + warnings: latest.data.warnings, }; } diff --git a/src/daemon/snapshot-runtime.ts b/src/daemon/snapshot-runtime.ts index 86bd5f977..f652b8376 100644 --- a/src/daemon/snapshot-runtime.ts +++ b/src/daemon/snapshot-runtime.ts @@ -290,6 +290,7 @@ function createDaemonSnapshotBackend(params: { analysis: capture.analysis, androidSnapshot: capture.androidSnapshot, freshness: capture.freshness, + warnings: capture.warnings, appName: session?.appBundleId ? (session.appName ?? session.appBundleId) : undefined, appBundleId: session?.appBundleId, }; diff --git a/src/platforms/ios/runner-xctestrun.ts b/src/platforms/ios/runner-xctestrun.ts index 618c594c6..7a5c31366 100644 --- a/src/platforms/ios/runner-xctestrun.ts +++ b/src/platforms/ios/runner-xctestrun.ts @@ -1044,6 +1044,8 @@ function isRunnerSourceFile(fileName: string, filePath: string): boolean { '.json', '.png', '.swift', + '.m', + '.h', '.plist', '.entitlements', '.xctestplan',