Skip to content

Commit 27b544b

Browse files
committed
base: mitigate system_server boot OOM on 512 MB heaps
Insert three explicit GC passes at strategic points during system_server boot to relieve heap pressure from transient service allocations: 1. After PHASE_SYSTEM_SERVICES_READY — reclaims garbage accumulated during startBootstrapServices and startCoreServices. 2. Before AMS.systemReady() — reclaims transient allocations from startOtherServices before the heaviest boot phase begins. 3. Before PHASE_THIRD_PARTY_APPS_CAN_START — reclaims 100-300 MB of garbage from service-ready callbacks that spike heap usage after Phase 550, preventing OOM during app launch ramp-up. Additionally, tune the AppsFilter boot cache rebuild: - Defer the initial cache build by 20 seconds so transient boot allocations can be collected first. - Build the cache in small chunks with brief lock releases between them, preceded by an explicit GC, so the work does not spike heap usage. - Remove upfront setCapacity() from the boot path so the filter matrix grows incrementally instead of pre-allocating. - Avoid repeated ArraySet allocation in shouldFilterApplication() for non-shared-user packages by using a static empty instance. On memory-constrained devices the boot allocation storm can exhaust a 512 MB system_server heap within 20 seconds. These changes give the garbage collector enough breathing room to keep up. Tested: 6 consecutive reboots with zero OOM events, heap peak 197/228 MB (vs. OOM crash within 20s pre-patch), PMS init ~31s. Change-Id: Iee1253f2b01d5d018f377bf10ff3fe338b240bac Signed-off-by: Quince <quinceroms@gmail.com>
1 parent 8d67df6 commit 27b544b

3 files changed

Lines changed: 104 additions & 22 deletions

File tree

services/core/java/com/android/server/pm/AppsFilterBase.java

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,19 @@ public abstract class AppsFilterBase implements AppsFilterSnapshot {
6868
protected static final boolean DEBUG_LOGGING = false;
6969
public static final boolean DEBUG_TRACING = false;
7070

71-
// Allow some time for cache rebuilds.
72-
protected static final int CACHE_REBUILD_DELAY_MIN_MS = 10000;
73-
// With each new rebuild the delay doubles until it reaches max delay.
74-
protected static final int CACHE_REBUILD_DELAY_MAX_MS = 10000;
71+
// Delay the initial boot cache build so transient boot-service allocations
72+
// can be GC'd first, preventing OOM on memory-constrained devices (512 MB heap).
73+
protected static final int CACHE_REBUILD_DELAY_MIN_MS = 20000;
74+
// Fixed 20 s delay; MIN == MAX disables exponential backoff intentionally.
75+
protected static final int CACHE_REBUILD_DELAY_MAX_MS = 20000;
76+
77+
// Shared immutable empty instance to avoid allocating a new ArraySet on
78+
// every shouldFilterApplicationInternal() call for non-shared-user packages.
79+
// This is accessed ~130 K times per cache build and must never be modified.
80+
// ArraySet is final so we cannot override add/clear; immutability is enforced
81+
// by convention — the non-shared-user code path only reads (size/valueAt/isEmpty).
82+
private static final ArraySet<PackageStateInternal> EMPTY_SHARED_PKG_SETTINGS =
83+
new ArraySet<>(0);
7584

7685
/**
7786
* This contains a list of app UIDs that are implicitly queryable because another app explicitly
@@ -428,7 +437,7 @@ protected boolean shouldFilterApplicationInternal(Computer snapshot, int calling
428437
if (DEBUG_TRACING) {
429438
Trace.traceBegin(TRACE_TAG_PACKAGE_MANAGER, "callingSetting instanceof");
430439
}
431-
final ArraySet<PackageStateInternal> callingSharedPkgSettings = new ArraySet<>();
440+
final ArraySet<PackageStateInternal> callingSharedPkgSettings;
432441

433442
if (callingSetting instanceof PackageStateInternal) {
434443
final PackageStateInternal packageState = (PackageStateInternal) callingSetting;
@@ -437,13 +446,18 @@ protected boolean shouldFilterApplicationInternal(Computer snapshot, int calling
437446
final SharedUserApi sharedUserApi =
438447
snapshot.getSharedUser(packageState.getSharedUserAppId());
439448
if (sharedUserApi != null) {
449+
callingSharedPkgSettings = new ArraySet<>();
440450
callingSharedPkgSettings.addAll(sharedUserApi.getPackageStates());
451+
} else {
452+
callingSharedPkgSettings = EMPTY_SHARED_PKG_SETTINGS;
441453
}
442454
} else {
443455
callingPkgSetting = packageState;
456+
callingSharedPkgSettings = EMPTY_SHARED_PKG_SETTINGS;
444457
}
445458
} else {
446459
callingPkgSetting = null;
460+
callingSharedPkgSettings = new ArraySet<>();
447461
callingSharedPkgSettings.addAll(
448462
((SharedUserSetting) callingSetting).getPackageStates());
449463
}

services/core/java/com/android/server/pm/AppsFilterImpl.java

Lines changed: 59 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -804,15 +804,55 @@ private void updateEntireShouldFilterCacheInner(Computer snapshot,
804804
ArrayMap<String, ? extends PackageStateInternal> settings,
805805
UserInfo[] users,
806806
int subjectUserId) {
807+
final int settingsSize = settings.size();
808+
// When mCacheReady is true (post-boot user creation/deletion), readers
809+
// are actively using mShouldFilterCache under mCacheLock. We must hold
810+
// the lock for the entire rebuild to guarantee atomicity — readers see
811+
// either the old complete state or the new complete state, never partial.
812+
// The OOM only happens during boot, when mCacheReady is still false and
813+
// no readers exist, so the chunked path is safe there.
814+
if (mCacheReady) {
815+
synchronized (mCacheLock) {
816+
if (subjectUserId == USER_ALL) {
817+
mShouldFilterCache.clear();
818+
}
819+
mShouldFilterCache.setCapacity(users.length * settingsSize);
820+
for (int i = settingsSize - 1; i >= 0; i--) {
821+
updateShouldFilterCacheForPackage(snapshot,
822+
null /*skipPackage*/, settings.valueAt(i), settings,
823+
users, subjectUserId, i);
824+
}
825+
}
826+
return;
827+
}
828+
// Boot path: mCacheReady is false, no readers exist yet.
829+
// Do NOT call setCapacity() upfront — it forces a single large matrix
830+
// allocation that competes with dozens of boot services for the 512 MB
831+
// heap. Instead, let the matrix grow incrementally as put() is called.
832+
// Process in chunks to reduce peak memory during boot. Use wait(10)
833+
// instead of Thread.sleep(10) to atomically release mCacheLock during
834+
// the pause, keeping the cache in a consistent state while still giving
835+
// concurrent GC a window to reclaim temporaries.
836+
final int chunkSize = 10;
807837
synchronized (mCacheLock) {
808838
if (subjectUserId == USER_ALL) {
809839
mShouldFilterCache.clear();
810840
}
811-
mShouldFilterCache.setCapacity(users.length * settings.size());
812-
for (int i = settings.size() - 1; i >= 0; i--) {
813-
updateShouldFilterCacheForPackage(snapshot,
814-
null /*skipPackage*/, settings.valueAt(i), settings, users,
815-
subjectUserId, i);
841+
for (int chunkStart = settingsSize - 1; chunkStart >= 0;
842+
chunkStart -= chunkSize) {
843+
final int chunkEnd = Math.max(chunkStart - chunkSize + 1, 0);
844+
for (int i = chunkStart; i >= chunkEnd; i--) {
845+
updateShouldFilterCacheForPackage(snapshot,
846+
null /*skipPackage*/, settings.valueAt(i), settings,
847+
users, subjectUserId, i);
848+
}
849+
if (chunkEnd > 0) {
850+
try {
851+
mCacheLock.wait(10);
852+
} catch (InterruptedException e) {
853+
Thread.currentThread().interrupt();
854+
}
855+
}
816856
}
817857
}
818858
}
@@ -829,24 +869,26 @@ private void updateEntireShouldFilterCacheAsync(PackageManagerInternal pmInterna
829869
return;
830870
}
831871

872+
// During boot, dozens of services fill the heap to near its limit
873+
// with transient objects that become garbage shortly after. Running
874+
// an explicit GC before the N² cache build reclaims those objects
875+
// and prevents OOM during the heavy allocation phase that follows.
876+
if (!mCacheReady) {
877+
Runtime.getRuntime().gc();
878+
try {
879+
Thread.sleep(200);
880+
} catch (InterruptedException e) {
881+
Thread.currentThread().interrupt();
882+
}
883+
}
884+
832885
final long currentTimeUs = SystemClock.currentTimeMicro();
833-
final ArrayMap<String, AndroidPackage> packagesCache = new ArrayMap<>();
834-
final UserInfo[][] usersRef = new UserInfo[1][];
835886
final Computer snapshot = (Computer) pmInternal.snapshot();
836887
final ArrayMap<String, ? extends PackageStateInternal> settings =
837888
snapshot.getPackageStates();
838889
final UserInfo[] users = snapshot.getUserInfos();
839890

840-
packagesCache.ensureCapacity(settings.size());
841-
usersRef[0] = users;
842-
// store away the references to the immutable packages, since settings are retained
843-
// during updates.
844-
for (int i = 0, max = settings.size(); i < max; i++) {
845-
final AndroidPackage pkg = settings.valueAt(i).getPkg();
846-
packagesCache.put(settings.keyAt(i), pkg);
847-
}
848-
849-
updateEntireShouldFilterCacheInner(snapshot, settings, usersRef[0], USER_ALL);
891+
updateEntireShouldFilterCacheInner(snapshot, settings, users, USER_ALL);
850892
logCacheRebuilt(reason, SystemClock.currentTimeMicro() - currentTimeUs,
851893
users.length, settings.size());
852894

services/java/com/android/server/SystemServer.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3253,6 +3253,15 @@ private void startOtherServices(@NonNull TimingsTraceAndSlog t) {
32533253
mSystemServiceManager.startBootPhase(t, SystemService.PHASE_SYSTEM_SERVICES_READY);
32543254
t.traceEnd();
32553255

3256+
// Relieve heap pressure from transient boot-service allocations before
3257+
// the next wave of init work. On 512 MB heaps the boot storm can push
3258+
// usage close to the limit; an early GC here reclaims ~100-170 MB of
3259+
// garbage that accumulated during startBootstrapServices/startCoreServices.
3260+
t.traceBegin("BootGcAfterSystemServicesReady");
3261+
Slog.i(TAG, "Triggering GC after PHASE_SYSTEM_SERVICES_READY");
3262+
Runtime.getRuntime().gc();
3263+
t.traceEnd();
3264+
32563265
t.traceBegin("MakeWindowManagerServiceReady");
32573266
try {
32583267
wm.systemReady();
@@ -3428,6 +3437,14 @@ private void startOtherServices(@NonNull TimingsTraceAndSlog t) {
34283437
final ConnectivityManager connectivityF = (ConnectivityManager)
34293438
context.getSystemService(Context.CONNECTIVITY_SERVICE);
34303439

3440+
// Second GC pass before AMS.systemReady() — the heaviest boot phase.
3441+
// By now all other-services have been created; reclaim their transient
3442+
// allocations so the AMS ready callback has headroom on 512 MB heaps.
3443+
t.traceBegin("BootGcBeforeAmsReady");
3444+
Slog.i(TAG, "Triggering GC before AMS.systemReady()");
3445+
Runtime.getRuntime().gc();
3446+
t.traceEnd();
3447+
34313448
// We now tell the activity manager it is okay to run third party
34323449
// code. It will call back into us once it has gotten to the state
34333450
// where third party code can really run (but before it has actually
@@ -3563,6 +3580,15 @@ private void startOtherServices(@NonNull TimingsTraceAndSlog t) {
35633580
// Wait for all packages to be prepared
35643581
mPackageManagerService.waitForAppDataPrepared();
35653582

3583+
// Third GC pass before launching third-party apps. After
3584+
// PHASE_ACTIVITY_MANAGER_READY the service-ready callbacks can
3585+
// spike heap usage to 300+ MB on 512 MB heaps; reclaim that
3586+
// garbage before the next wave of allocations.
3587+
t.traceBegin("BootGcBeforeThirdPartyApps");
3588+
Slog.i(TAG, "Triggering GC before PHASE_THIRD_PARTY_APPS_CAN_START");
3589+
Runtime.getRuntime().gc();
3590+
t.traceEnd();
3591+
35663592
// It is now okay to let the various system services start their
35673593
// third party code...
35683594
t.traceBegin("PhaseThirdPartyAppsCanStart");

0 commit comments

Comments
 (0)