Skip to content

Commit d6d4d3f

Browse files
xaionaro@dx.centerxaionaro@dx.center
authored andcommitted
Fix split BR_TRANSACTION_COMPLETE/BR_REPLY and EINTR handling
Two binder protocol bugs fixed: 1. The kernel can send BR_TRANSACTION_COMPLETE and BR_REPLY in separate ioctl reads. When the target service hasn't responded by the time the first ioctl returns, we only get TC. Previously we returned an empty parcel; now we issue another read ioctl to wait for BR_REPLY. parseReadBuffer returns a gotReply flag to distinguish "no reply yet" from "reply with empty data". This fixes ~17% failure rate under rapid open/close cycles (250/250 pass, was ~40/50). 2. EINTR (interrupted system call) on any ioctl is now retried instead of returning an error.
1 parent 629c19f commit d6d4d3f

2 files changed

Lines changed: 92 additions & 39 deletions

File tree

kernelbinder/driver.go

Lines changed: 92 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -222,30 +222,76 @@ func (d *Driver) Transact(
222222
readBuffer: uint64(uintptr(unsafe.Pointer(&readBuf[0]))),
223223
}
224224

225-
_, _, errno := unix.Syscall(
226-
unix.SYS_IOCTL,
227-
uintptr(d.fd),
228-
binderWriteReadIoctl,
229-
uintptr(unsafe.Pointer(&bwr)),
230-
)
231-
if errno != 0 {
232-
return nil, &aidlerrors.BinderError{Op: "ioctl(BINDER_WRITE_READ)", Err: errno}
225+
for {
226+
_, _, errno := unix.Syscall(
227+
unix.SYS_IOCTL,
228+
uintptr(d.fd),
229+
binderWriteReadIoctl,
230+
uintptr(unsafe.Pointer(&bwr)),
231+
)
232+
switch errno {
233+
case 0:
234+
// Success — proceed to parse.
235+
case unix.EINTR:
236+
// Retry on interrupted system call.
237+
// Reset write buffer (already consumed) but keep read buffer.
238+
bwr.writeSize = 0
239+
continue
240+
default:
241+
return nil, &aidlerrors.BinderError{Op: "ioctl(BINDER_WRITE_READ)", Err: errno}
242+
}
243+
break
233244
}
234245

235-
// Parse the read buffer for BR codes.
236-
reply, err := d.parseReadBuffer(ctx, readBuf[:bwr.readConsumed])
237-
if err != nil {
238-
return nil, err
239-
}
246+
// Parse the read buffer for BR codes. The kernel may split
247+
// BR_TRANSACTION_COMPLETE and BR_REPLY across separate ioctl reads —
248+
// if we got TC but no reply yet, read again to wait for BR_REPLY.
249+
isOneway := flags&binder.FlagOneway != 0
250+
for {
251+
reply, gotReply, err := d.parseReadBuffer(ctx, readBuf[:bwr.readConsumed])
252+
if err != nil {
253+
return nil, err
254+
}
240255

241-
return reply, nil
256+
// If we received BR_REPLY (even with empty data), or this is
257+
// a oneway transaction, return the result.
258+
if gotReply || isOneway {
259+
return reply, nil
260+
}
261+
262+
// BR_TRANSACTION_COMPLETE without BR_REPLY — the service hasn't
263+
// responded yet. Issue a read-only ioctl to wait for BR_REPLY.
264+
logger.Debugf(ctx, "Transact: got BR_TRANSACTION_COMPLETE without BR_REPLY, reading again")
265+
bwr.writeSize = 0
266+
bwr.writeConsumed = 0
267+
bwr.readConsumed = 0
268+
for {
269+
_, _, errno := unix.Syscall(
270+
unix.SYS_IOCTL,
271+
uintptr(d.fd),
272+
binderWriteReadIoctl,
273+
uintptr(unsafe.Pointer(&bwr)),
274+
)
275+
switch errno {
276+
case 0:
277+
case unix.EINTR:
278+
continue
279+
default:
280+
return nil, &aidlerrors.BinderError{Op: "ioctl(BINDER_WRITE_READ/read)", Err: errno}
281+
}
282+
break
283+
}
284+
}
242285
}
243286

244-
// parseReadBuffer processes BR_* codes from the read buffer and returns the reply parcel.
287+
// parseReadBuffer processes BR_* codes from the read buffer.
288+
// Returns the reply parcel and whether BR_REPLY was seen.
289+
// gotReply is false when only BR_TRANSACTION_COMPLETE was received
290+
// (the service hasn't responded yet — caller should read again).
245291
func (d *Driver) parseReadBuffer(
246292
ctx context.Context,
247293
buf []byte,
248-
) (_reply *parcel.Parcel, _err error) {
294+
) (_reply *parcel.Parcel, _gotReply bool, _err error) {
249295
logger.Tracef(ctx, "parseReadBuffer")
250296
defer func() { logger.Tracef(ctx, "/parseReadBuffer: %v", _err) }()
251297

@@ -254,7 +300,7 @@ func (d *Driver) parseReadBuffer(
254300

255301
for offset < len(buf) {
256302
if offset+4 > len(buf) {
257-
return nil, fmt.Errorf("binder: truncated BR code at offset %d", offset)
303+
return nil, false, fmt.Errorf("binder: truncated BR code at offset %d", offset)
258304
}
259305

260306
cmd := binary.LittleEndian.Uint32(buf[offset:])
@@ -274,14 +320,14 @@ func (d *Driver) parseReadBuffer(
274320
logger.Debugf(ctx, "BR_REPLY")
275321
txnSize := int(unsafe.Sizeof(binderTransactionData{}))
276322
if offset+txnSize > len(buf) {
277-
return nil, fmt.Errorf("binder: truncated BR_REPLY data at offset %d", offset)
323+
return nil, true, fmt.Errorf("binder: truncated BR_REPLY data at offset %d", offset)
278324
}
279325

280326
var txn binderTransactionData
281327
copyBytesToStruct(unsafe.Pointer(&txn), buf[offset:], unsafe.Sizeof(txn))
282328

283329
if txn.dataSize == 0 {
284-
return parcel.New(), nil
330+
return parcel.New(), true, nil
285331
}
286332

287333
// Copy data from the mmap'd region into a new parcel.
@@ -297,43 +343,44 @@ func (d *Driver) parseReadBuffer(
297343
logger.Warnf(ctx, "failed to free binder buffer: %v", err)
298344
}
299345

300-
return parcel.FromBytes(replyData), nil
346+
return parcel.FromBytes(replyData), true, nil
301347

302348
case brDeadReply:
303349
logger.Debugf(ctx, "BR_DEAD_REPLY")
304-
return nil, &aidlerrors.TransactionError{
350+
return nil, true, &aidlerrors.TransactionError{
305351
Code: aidlerrors.TransactionErrorDeadObject,
306352
}
307353

308354
case brFailedReply:
309355
logger.Debugf(ctx, "BR_FAILED_REPLY")
310-
return nil, &aidlerrors.TransactionError{
356+
return nil, true, &aidlerrors.TransactionError{
311357
Code: aidlerrors.TransactionErrorFailedTransaction,
312358
}
313359

314360
case brError:
315361
if offset+4 > len(buf) {
316-
return nil, fmt.Errorf("binder: truncated BR_ERROR data")
362+
return nil, false, fmt.Errorf("binder: truncated BR_ERROR data")
317363
}
318364
errCode := int32(binary.LittleEndian.Uint32(buf[offset:]))
319-
return nil, fmt.Errorf("binder: BR_ERROR %d", errCode)
365+
return nil, false, fmt.Errorf("binder: BR_ERROR %d", errCode)
320366

321367
case brSpawnLooper:
322368
logger.Debugf(ctx, "BR_SPAWN_LOOPER (ignored)")
323369
continue
324370

325371
default:
326372
logger.Warnf(ctx, "binder: unknown BR code 0x%08x at offset %d", cmd, offset-4)
327-
return nil, fmt.Errorf("binder: unknown BR code 0x%08x", cmd)
373+
return nil, false, fmt.Errorf("binder: unknown BR code 0x%08x", cmd)
328374
}
329375
}
330376

331377
if !gotTransactionComplete {
332-
return nil, fmt.Errorf("binder: did not receive BR_TRANSACTION_COMPLETE")
378+
return nil, false, fmt.Errorf("binder: did not receive BR_TRANSACTION_COMPLETE")
333379
}
334380

335-
// BR_TRANSACTION_COMPLETE without BR_REPLY means a one-way transaction.
336-
return parcel.New(), nil
381+
// BR_TRANSACTION_COMPLETE without BR_REPLY — the service hasn't
382+
// responded yet. Caller should issue another read ioctl.
383+
return parcel.New(), false, nil
337384
}
338385

339386
// acquireReplyHandles scans the reply's flat_binder_objects (located via the
@@ -496,17 +543,24 @@ func (d *Driver) writeCommand(
496543
writeBuffer: uint64(uintptr(unsafe.Pointer(&writeBuf[0]))),
497544
}
498545

499-
_, _, errno := unix.Syscall(
500-
unix.SYS_IOCTL,
501-
uintptr(d.fd),
502-
binderWriteReadIoctl,
503-
uintptr(unsafe.Pointer(&bwr)),
504-
)
505-
if errno != 0 {
506-
return &aidlerrors.BinderError{Op: "ioctl(BINDER_WRITE_READ)", Err: errno}
546+
for {
547+
_, _, errno := unix.Syscall(
548+
unix.SYS_IOCTL,
549+
uintptr(d.fd),
550+
binderWriteReadIoctl,
551+
uintptr(unsafe.Pointer(&bwr)),
552+
)
553+
switch errno {
554+
case 0:
555+
return nil
556+
case unix.EINTR:
557+
// Retry on interrupted system call — the command was not
558+
// processed and must be resent.
559+
continue
560+
default:
561+
return &aidlerrors.BinderError{Op: "ioctl(BINDER_WRITE_READ)", Err: errno}
562+
}
507563
}
508-
509-
return nil
510564
}
511565

512566
// copyStructToBytes copies a struct's raw memory into a byte slice.

tests/e2e/aidlcli_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,6 @@ func logAndExit(msg string) {
191191
// Arguments are joined into a single shell command string to preserve
192192
// empty/quoted values across the adb shell boundary.
193193
func runAidlcli(args ...string) (string, string, error) {
194-
// Build a single shell command string to avoid adb shell arg splitting issues.
195194
parts := make([]string, 0, len(args)+3)
196195
parts = append(parts, deviceBinary, "--format", "json")
197196
for _, a := range args {

0 commit comments

Comments
 (0)