diff --git a/core/application/startup.go b/core/application/startup.go
index 4744ea8311bd..62b17e96fb0d 100644
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -484,6 +484,12 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
 			options.ForceEvictionWhenBusy = *settings.ForceEvictionWhenBusy
 		}
 	}
+	if settings.SizeAwareEviction != nil {
+		// Only apply if current value is default (false), suggesting it wasn't set from env var
+		if !options.SizeAwareEviction {
+			options.SizeAwareEviction = *settings.SizeAwareEviction
+		}
+	}
 	if settings.LRUEvictionMaxRetries != nil {
 		// Only apply if current value is default (30), suggesting it wasn't set from env var
 		if options.LRUEvictionMaxRetries == 0 {
@@ -671,6 +677,7 @@ func initializeWatchdog(application *Application, options *config.ApplicationCon
 			model.WithLRULimit(lruLimit),
 			model.WithMemoryReclaimer(options.MemoryReclaimerEnabled, options.MemoryReclaimerThreshold),
 			model.WithForceEvictionWhenBusy(options.ForceEvictionWhenBusy),
+			model.WithSizeAwareEviction(options.SizeAwareEviction),
 		)
 		application.ModelLoader().SetWatchDog(wd)
 
diff --git a/core/application/watchdog.go b/core/application/watchdog.go
index c1aee6c7adb5..e79afea71cd3 100644
--- a/core/application/watchdog.go
+++ b/core/application/watchdog.go
@@ -55,6 +55,7 @@ func (a *Application) startWatchdog() error {
 			model.WithLRULimit(lruLimit),
 			model.WithMemoryReclaimer(appConfig.MemoryReclaimerEnabled, appConfig.MemoryReclaimerThreshold),
 			model.WithForceEvictionWhenBusy(appConfig.ForceEvictionWhenBusy),
+			model.WithSizeAwareEviction(appConfig.SizeAwareEviction),
 		)
 
 		// Create new stop channel BEFORE setting up any goroutines
diff --git a/core/backend/options.go b/core/backend/options.go
index 9054bb39b693..0aeb82a5cc5e 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -1,6 +1,7 @@
 package backend
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"math/rand/v2"
@@ -12,7 +13,9 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/trace"
 	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/vram"
 	"github.com/mudler/xlog"
 )
 
@@ -33,6 +36,67 @@ func recordModelLoadFailure(appConfig *config.ApplicationConfig, modelName, back
 	})
 }
 
+// estimateModelSizeBytes uses the unified EstimateModel entry point to compute
+// the total weight-file size for a model config.  It collects all weight files
+// from DownloadFiles, Model, and MMProj, and also extracts the HuggingFace
+// repo ID so EstimateModel can fall back to the HF API when local file
+// metadata is unavailable (e.g. not-yet-downloaded models).
+func estimateModelSizeBytes(c config.ModelConfig, modelsPath string) int64 {
+	seen := make(map[string]bool)
+	input := vram.ModelEstimateInput{}
+
+	addFile := func(uri string) {
+		if !vram.IsWeightFile(uri) {
+			return
+		}
+		resolved := uri
+		if !strings.Contains(uri, "://") {
+			resolved = "file://" + filepath.Join(modelsPath, uri)
+		}
+		if seen[resolved] {
+			return
+		}
+		seen[resolved] = true
+		input.Files = append(input.Files, vram.FileInput{URI: resolved})
+	}
+
+	// tryHFRepo resolves any huggingface:// or hf:// URI to an HTTPS URL and
+	// then extracts the org/model repo ID for use as the HF fallback path.
+	tryHFRepo := func(uri string) {
+		if input.HFRepo != "" {
+			return
+		}
+		resolved := downloader.URI(uri).ResolveURL()
+		if repoID, ok := vram.ExtractHFRepoID(resolved); ok {
+			input.HFRepo = repoID
+		}
+	}
+
+	for _, f := range c.DownloadFiles {
+		uriStr := string(f.URI)
+		addFile(uriStr)
+		tryHFRepo(uriStr)
+	}
+	addFile(c.Model)
+	tryHFRepo(c.Model)
+	if c.MMProj != "" {
+		addFile(c.MMProj)
+	}
+
+	if len(input.Files) == 0 && input.HFRepo == "" {
+		return 0
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	result, err := vram.EstimateModel(ctx, input)
+	if err != nil || result.SizeBytes == 0 {
+		return 0
+	}
+	return int64(result.SizeBytes)
+}
+
 func ModelOptions(c config.ModelConfig, so *config.ApplicationConfig, opts ...model.Option) []model.Option {
 	name := c.Name
 	if name == "" {
@@ -75,6 +139,10 @@ func ModelOptions(c config.ModelConfig, so *config.ApplicationConfig, opts ...mo
 		defOpts = append(defOpts, model.WithExternalBackend(k, v))
 	}
 
+	if sizeBytes := estimateModelSizeBytes(c, so.SystemState.Model.ModelsPath); sizeBytes > 0 {
+		defOpts = append(defOpts, model.WithModelSizeBytes(sizeBytes))
+	}
+
 	return append(defOpts, opts...)
 }
 
diff --git a/core/cli/run.go b/core/cli/run.go
index 077ef8b230e0..77a33d26cbc8 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -89,6 +89,7 @@ type RunCMD struct {
 	EnableMemoryReclaimer              bool     `env:"LOCALAI_MEMORY_RECLAIMER,MEMORY_RECLAIMER,LOCALAI_GPU_RECLAIMER,GPU_RECLAIMER" default:"false" help:"Enable memory threshold monitoring to auto-evict backends when memory usage exceeds threshold (uses GPU VRAM if available, otherwise RAM)" group:"backends"`
 	MemoryReclaimerThreshold           float64  `env:"LOCALAI_MEMORY_RECLAIMER_THRESHOLD,MEMORY_RECLAIMER_THRESHOLD,LOCALAI_GPU_RECLAIMER_THRESHOLD,GPU_RECLAIMER_THRESHOLD" default:"0.95" help:"Memory usage threshold (0.0-1.0) that triggers backend eviction (default 0.95 = 95%%)" group:"backends"`
 	ForceEvictionWhenBusy              bool     `env:"LOCALAI_FORCE_EVICTION_WHEN_BUSY,FORCE_EVICTION_WHEN_BUSY" default:"false" help:"Force eviction even when models have active API calls (default: false for safety)" group:"backends"`
+	SizeAwareEviction                  bool     `env:"LOCALAI_SIZE_AWARE_EVICTION,SIZE_AWARE_EVICTION" default:"false" help:"Evict the largest loaded model first rather than the least-recently-used one, keeping small utility models resident and maximizing freed memory per eviction" group:"backends"`
 	LRUEvictionMaxRetries              int      `env:"LOCALAI_LRU_EVICTION_MAX_RETRIES,LRU_EVICTION_MAX_RETRIES" default:"30" help:"Maximum number of retries when waiting for busy models to become idle before eviction (default: 30)" group:"backends"`
 	LRUEvictionRetryInterval           string   `env:"LOCALAI_LRU_EVICTION_RETRY_INTERVAL,LRU_EVICTION_RETRY_INTERVAL" default:"1s" help:"Interval between retries when waiting for busy models to become idle (e.g., 1s, 2s) (default: 1s)" group:"backends"`
 	Federated                          bool     `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
@@ -463,6 +464,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 	if r.ForceEvictionWhenBusy {
 		opts = append(opts, config.WithForceEvictionWhenBusy(true))
 	}
+	if r.SizeAwareEviction {
+		opts = append(opts, config.WithSizeAwareEviction(true))
+	}
 	if r.LRUEvictionMaxRetries > 0 {
 		opts = append(opts, config.WithLRUEvictionMaxRetries(r.LRUEvictionMaxRetries))
 	}
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 39f76b9e0b6f..52757aee2c90 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -72,6 +72,7 @@ type ApplicationConfig struct {
 
 	// Eviction settings
 	ForceEvictionWhenBusy    bool          // Force eviction even when models have active API calls (default: false for safety)
+	SizeAwareEviction        bool          // Evict largest models first rather than least-recently-used (default: false)
 	LRUEvictionMaxRetries    int           // Maximum number of retries when waiting for busy models to become idle (default: 30)
 	LRUEvictionRetryInterval time.Duration // Interval between retries when waiting for busy models (default: 1s)
 
@@ -406,6 +407,16 @@ func WithForceEvictionWhenBusy(enabled bool) AppOption {
 	}
 }
 
+// WithSizeAwareEviction enables size-aware eviction ordering.
+// When true, the watchdog evicts the largest loaded model first rather than the
+// least-recently-used one, keeping small utility models resident and maximizing
+// memory freed per eviction.
+func WithSizeAwareEviction(enabled bool) AppOption {
+	return func(o *ApplicationConfig) {
+		o.SizeAwareEviction = enabled
+	}
+}
+
 // WithLRUEvictionMaxRetries sets the maximum number of retries when waiting for busy models to become idle
 func WithLRUEvictionMaxRetries(maxRetries int) AppOption {
 	return func(o *ApplicationConfig) {
@@ -903,6 +914,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
 	memoryReclaimerEnabled := o.MemoryReclaimerEnabled
 	memoryReclaimerThreshold := o.MemoryReclaimerThreshold
 	forceEvictionWhenBusy := o.ForceEvictionWhenBusy
+	sizeAwareEviction := o.SizeAwareEviction
 	lruEvictionMaxRetries := o.LRUEvictionMaxRetries
 	threads := o.Threads
 	contextSize := o.ContextSize
@@ -990,6 +1002,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
 		MemoryReclaimerEnabled:    &memoryReclaimerEnabled,
 		MemoryReclaimerThreshold:  &memoryReclaimerThreshold,
 		ForceEvictionWhenBusy:     &forceEvictionWhenBusy,
+		SizeAwareEviction:         &sizeAwareEviction,
 		LRUEvictionMaxRetries:     &lruEvictionMaxRetries,
 		LRUEvictionRetryInterval:  &lruEvictionRetryInterval,
 		Threads:                   &threads,
@@ -1107,6 +1120,10 @@ func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (req
 		o.ForceEvictionWhenBusy = *settings.ForceEvictionWhenBusy
 		// This setting doesn't require restart, can be updated dynamically
 	}
+	if settings.SizeAwareEviction != nil {
+		o.SizeAwareEviction = *settings.SizeAwareEviction
+		// This setting doesn't require restart, can be updated dynamically
+	}
 	if settings.LRUEvictionMaxRetries != nil {
 		o.LRUEvictionMaxRetries = *settings.LRUEvictionMaxRetries
 		// This setting doesn't require restart, can be updated dynamically
diff --git a/core/config/runtime_settings.go b/core/config/runtime_settings.go
index 3fb16233e7dc..0013f8b116ec 100644
--- a/core/config/runtime_settings.go
+++ b/core/config/runtime_settings.go
@@ -28,6 +28,7 @@ type RuntimeSettings struct {
 
 	// Eviction settings
 	ForceEvictionWhenBusy    *bool   `json:"force_eviction_when_busy,omitempty"`    // Force eviction even when models have active API calls (default: false for safety)
+	SizeAwareEviction        *bool   `json:"size_aware_eviction,omitempty"`          // Evict largest models first rather than least-recently-used (default: false)
 	LRUEvictionMaxRetries    *int    `json:"lru_eviction_max_retries,omitempty"`    // Maximum number of retries when waiting for busy models to become idle (default: 30)
 	LRUEvictionRetryInterval *string `json:"lru_eviction_retry_interval,omitempty"` // Interval between retries when waiting for busy models (e.g., 1s, 2s) (default: 1s)
 
diff --git a/core/http/react-ui/src/pages/Settings.jsx b/core/http/react-ui/src/pages/Settings.jsx
index 3174eed582ed..e1bffc2b8be3 100644
--- a/core/http/react-ui/src/pages/Settings.jsx
+++ b/core/http/react-ui/src/pages/Settings.jsx
@@ -314,6 +314,9 @@ export default function Settings() {
               <SettingRow label="Force Eviction When Busy" description="Allow model eviction even during active API calls">
                 <Toggle checked={settings.force_eviction_when_busy} onChange={(v) => update('force_eviction_when_busy', v)} />
               </SettingRow>
+              <SettingRow label="Size-Aware Eviction" description="Evict the largest loaded model first instead of the least-recently-used one">
+                <Toggle checked={settings.size_aware_eviction} onChange={(v) => update('size_aware_eviction', v)} />
+              </SettingRow>
               <SettingRow label="LRU Eviction Max Retries" description="Maximum retries waiting for busy models before eviction">
                 <input className="input" type="number" style={{ width: 120 }} value={settings.lru_eviction_max_retries ?? ''} onChange={(e) => update('lru_eviction_max_retries', parseInt(e.target.value) || 0)} placeholder="30" />
               </SettingRow>
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index a3de78dd4ee6..c840249b6ad1 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -159,6 +159,12 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 			return nil, fmt.Errorf("could not load model (no success): %s", res.Message)
 		}
 
+		// Register size for size-aware eviction using the caller-supplied estimate
+		// (computed via pkg/vram, which handles multi-file and non-GGUF models).
+		if ml.wd != nil && o.modelSizeBytes > 0 {
+			ml.wd.RegisterModelSize(modelID, o.modelSizeBytes)
+		}
+
 		return client, nil
 	}
 }
diff --git a/pkg/model/loader_options.go b/pkg/model/loader_options.go
index 16df2b9bda23..d247a00c8d3c 100644
--- a/pkg/model/loader_options.go
+++ b/pkg/model/loader_options.go
@@ -19,6 +19,11 @@ type Options struct {
 	grpcAttempts      int
 	grpcAttemptsDelay int
 	parallelRequests  bool
+
+	// modelSizeBytes is the estimated total weight size in bytes, pre-computed
+	// by the caller using the vram estimation scaffolding.  When non-zero it is
+	// registered with the watchdog so size-aware eviction can rank models.
+	modelSizeBytes int64
 }
 
 type Option func(*Options)
@@ -86,6 +91,12 @@ func WithModelID(id string) Option {
 	}
 }
 
+func WithModelSizeBytes(bytes int64) Option {
+	return func(o *Options) {
+		o.modelSizeBytes = bytes
+	}
+}
+
 func NewOptions(opts ...Option) *Options {
 	o := &Options{
 		gRPCOptions:       &pb.ModelOptions{},
diff --git a/pkg/model/watchdog.go b/pkg/model/watchdog.go
index fea188803c06..5de709b91d04 100644
--- a/pkg/model/watchdog.go
+++ b/pkg/model/watchdog.go
@@ -46,6 +46,11 @@ type WatchDog struct {
 	// Eviction settings
 	forceEvictionWhenBusy bool // Force eviction even when models have active API calls (default: false for safety)
 
+	// Size-aware eviction: sort candidates by model file size (largest first) to maximize freed memory.
+	// When enabled, bigger models are evicted before smaller ones regardless of recency.
+	sizeAwareEviction bool
+	modelSizes        map[string]int64 // modelID → file size in bytes
+
 	// Pinned models are excluded from idle, LRU, and memory-pressure eviction
 	pinnedModels map[string]bool
 }
@@ -88,6 +93,8 @@ func NewWatchDog(opts ...WatchDogOption) *WatchDog {
 		memoryReclaimerThreshold: o.memoryReclaimerThreshold,
 		watchdogInterval:         o.watchdogInterval,
 		forceEvictionWhenBusy:    o.forceEvictionWhenBusy,
+		sizeAwareEviction:        o.sizeAwareEviction,
+		modelSizes:               make(map[string]int64),
 	}
 }
 
@@ -127,6 +134,31 @@ func (wd *WatchDog) SetForceEvictionWhenBusy(force bool) {
 	wd.forceEvictionWhenBusy = force
 }
 
+// RegisterModelSize records the on-disk file size for a model.
+// This is used by size-aware eviction to prefer evicting larger models first.
+// Call this after a model has been successfully loaded.
+func (wd *WatchDog) RegisterModelSize(modelID string, bytes int64) {
+	wd.Lock()
+	defer wd.Unlock()
+	wd.modelSizes[modelID] = bytes
+}
+
+// SetSizeAwareEviction enables or disables size-aware eviction ordering.
+// When enabled, eviction candidates are sorted by file size (largest first)
+// rather than by recency, maximizing freed memory per eviction.
+func (wd *WatchDog) SetSizeAwareEviction(enabled bool) {
+	wd.Lock()
+	defer wd.Unlock()
+	wd.sizeAwareEviction = enabled
+}
+
+// GetSizeAwareEviction returns whether size-aware eviction is enabled.
+func (wd *WatchDog) GetSizeAwareEviction() bool {
+	wd.Lock()
+	defer wd.Unlock()
+	return wd.sizeAwareEviction
+}
+
 // SetPinnedModels replaces the set of pinned model names.
 // Pinned models are excluded from idle, LRU, and memory-pressure eviction.
 func (wd *WatchDog) SetPinnedModels(models []string) {
@@ -268,11 +300,12 @@ func (wd *WatchDog) RestoreState(state WatchDogState) {
 	xlog.Info("[WatchDog] Restored model state", "modelCount", len(wd.addressModelMap))
 }
 
-// modelUsageInfo holds information about a model's usage for LRU sorting
+// modelUsageInfo holds information about a model's usage for eviction sorting
 type modelUsageInfo struct {
-	address  string
-	model    string
-	lastUsed time.Time
+	address   string
+	model     string
+	lastUsed  time.Time
+	sizeBytes int64 // on-disk file size; 0 if unknown
 }
 
 // EnforceLRULimitResult contains the result of LRU enforcement
@@ -304,27 +337,39 @@ func (wd *WatchDog) EnforceLRULimit(pendingLoads int) EnforceLRULimitResult {
 		return EnforceLRULimitResult{EvictedCount: 0, NeedMore: false}
 	}
 
-	xlog.Debug("[WatchDog] LRU enforcement triggered", "current", currentCount, "pendingLoads", pendingLoads, "limit", wd.lruLimit, "toEvict", modelsToEvict)
+	sizeAwareEviction := wd.sizeAwareEviction
+	xlog.Debug("[WatchDog] LRU enforcement triggered", "current", currentCount, "pendingLoads", pendingLoads, "limit", wd.lruLimit, "toEvict", modelsToEvict, "sizeAware", sizeAwareEviction)
 
-	// Build a list of models sorted by last used time (oldest first)
+	// Build a list of models to sort for eviction candidates
 	var models []modelUsageInfo
 	for address, model := range wd.addressModelMap {
 		lastUsed := wd.lastUsed[address]
 		if lastUsed.IsZero() {
-			// If no lastUsed recorded, use a very old time
 			lastUsed = time.Time{}
 		}
 		models = append(models, modelUsageInfo{
-			address:  address,
-			model:    model,
-			lastUsed: lastUsed,
+			address:   address,
+			model:     model,
+			lastUsed:  lastUsed,
+			sizeBytes: wd.modelSizes[model],
 		})
 	}
 
-	// Sort by lastUsed time (oldest first)
-	slices.SortFunc(models, func(a, b modelUsageInfo) int {
-		return a.lastUsed.Compare(b.lastUsed)
-	})
+	// Sort eviction candidates: largest-first when size-aware, oldest-first otherwise.
+	// Tiebreaker in size-aware mode: oldest last-used (LRU) to break ties between
+	// models of the same size.
+	if sizeAwareEviction {
+		slices.SortFunc(models, func(a, b modelUsageInfo) int {
+			if a.sizeBytes != b.sizeBytes {
+				return int(b.sizeBytes - a.sizeBytes) // largest first
+			}
+			return a.lastUsed.Compare(b.lastUsed) // oldest first as tiebreaker
+		})
+	} else {
+		slices.SortFunc(models, func(a, b modelUsageInfo) int {
+			return a.lastUsed.Compare(b.lastUsed)
+		})
+	}
 
 	// Collect models to evict (the oldest ones)
 	var modelsToShutdown []string
@@ -520,8 +565,9 @@ func (wd *WatchDog) evictLRUModel() {
 	}
 
 	forceEvictionWhenBusy := wd.forceEvictionWhenBusy
+	sizeAwareEviction := wd.sizeAwareEviction
 
-	// Build a list of models sorted by last used time (oldest first)
+	// Build a list of models to sort for eviction candidates
 	var models []modelUsageInfo
 	for address, model := range wd.addressModelMap {
 		lastUsed := wd.lastUsed[address]
@@ -529,9 +575,10 @@ func (wd *WatchDog) evictLRUModel() {
 			lastUsed = time.Time{}
 		}
 		models = append(models, modelUsageInfo{
-			address:  address,
-			model:    model,
-			lastUsed: lastUsed,
+			address:   address,
+			model:     model,
+			lastUsed:  lastUsed,
+			sizeBytes: wd.modelSizes[model],
 		})
 	}
 
@@ -540,10 +587,19 @@ func (wd *WatchDog) evictLRUModel() {
 		return
 	}
 
-	// Sort by lastUsed time (oldest first)
-	slices.SortFunc(models, func(a, b modelUsageInfo) int {
-		return a.lastUsed.Compare(b.lastUsed)
-	})
+	// Sort eviction candidates: largest-first when size-aware, oldest-first otherwise.
+	if sizeAwareEviction {
+		slices.SortFunc(models, func(a, b modelUsageInfo) int {
+			if a.sizeBytes != b.sizeBytes {
+				return int(b.sizeBytes - a.sizeBytes) // largest first
+			}
+			return a.lastUsed.Compare(b.lastUsed)
+		})
+	} else {
+		slices.SortFunc(models, func(a, b modelUsageInfo) int {
+			return a.lastUsed.Compare(b.lastUsed)
+		})
+	}
 
 	// Find the first non-busy, non-pinned model (or first non-pinned model if forceEvictionWhenBusy is true)
 	var lruModel *modelUsageInfo
@@ -587,6 +643,9 @@ func (wd *WatchDog) evictLRUModel() {
 }
 
 func (wd *WatchDog) untrack(address string) {
+	if modelID, ok := wd.addressModelMap[address]; ok {
+		delete(wd.modelSizes, modelID)
+	}
 	delete(wd.busyTime, address)
 	delete(wd.idleTime, address)
 	delete(wd.lastUsed, address)
diff --git a/pkg/model/watchdog_options.go b/pkg/model/watchdog_options.go
index a3509a52b21d..d11eb23713f4 100644
--- a/pkg/model/watchdog_options.go
+++ b/pkg/model/watchdog_options.go
@@ -31,6 +31,9 @@ type WatchDogOptions struct {
 
 	// Eviction settings
 	forceEvictionWhenBusy bool // Force eviction even when models have active API calls (default: false for safety)
+
+	// Size-aware eviction: sort candidates by model file size (largest first)
+	sizeAwareEviction bool
 }
 
 // WatchDogOption is a function that configures WatchDogOptions
@@ -116,6 +119,17 @@ func WithForceEvictionWhenBusy(force bool) WatchDogOption {
 	}
 }
 
+// WithSizeAwareEviction enables size-aware eviction ordering.
+// When true, eviction candidates are sorted by on-disk file size (largest first)
+// so that bigger models are freed before smaller ones, keeping small utility models
+// resident and maximizing the memory freed per eviction round.
+// Default: false (LRU time ordering).
+func WithSizeAwareEviction(enabled bool) WatchDogOption {
+	return func(o *WatchDogOptions) {
+		o.sizeAwareEviction = enabled
+	}
+}
+
 // DefaultWatchDogOptions returns default options for the watchdog
 func DefaultWatchDogOptions() *WatchDogOptions {
 	return &WatchDogOptions{
diff --git a/pkg/model/watchdog_test.go b/pkg/model/watchdog_test.go
index f0f982843593..62398532c280 100644
--- a/pkg/model/watchdog_test.go
+++ b/pkg/model/watchdog_test.go
@@ -741,4 +741,110 @@ var _ = Describe("WatchDog", func() {
 			Expect(pm.getShutdownCalls()).To(ContainElement("model1"))
 		})
 	})
+
+	Context("Size-Aware Eviction", func() {
+		BeforeEach(func() {
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithLRULimit(2),
+				model.WithForceEvictionWhenBusy(true),
+				model.WithSizeAwareEviction(true),
+			)
+		})
+
+		It("should enable size-aware eviction via option", func() {
+			Expect(wd.GetSizeAwareEviction()).To(BeTrue())
+		})
+
+		It("should allow toggling size-aware eviction dynamically", func() {
+			wd.SetSizeAwareEviction(false)
+			Expect(wd.GetSizeAwareEviction()).To(BeFalse())
+			wd.SetSizeAwareEviction(true)
+			Expect(wd.GetSizeAwareEviction()).To(BeTrue())
+		})
+
+		It("should evict the largest model first when size-aware eviction is enabled", func() {
+			// Register sizes: model1=100MB, model2=400MB
+			wd.RegisterModelSize("model1", 100*1024*1024)
+			wd.RegisterModelSize("model2", 400*1024*1024)
+
+			// Add models — model1 older, model2 newer
+			wd.AddAddressModelMap("addr1", "model1")
+			wd.Mark("addr1")
+			wd.UnMark("addr1")
+			time.Sleep(10 * time.Millisecond)
+
+			wd.AddAddressModelMap("addr2", "model2")
+			wd.Mark("addr2")
+			wd.UnMark("addr2")
+
+			// With limit=2 and 2 loaded, adding a 3rd triggers eviction.
+			// LRU order: model1 (oldest) would be evicted first.
+			// Size order: model2 (400MB) should be evicted first.
+			result := wd.EnforceLRULimit(0)
+			Expect(result.EvictedCount).To(Equal(1))
+			Expect(result.NeedMore).To(BeFalse())
+			Expect(pm.getShutdownCalls()).To(ContainElement("model2")) // largest first
+			Expect(pm.getShutdownCalls()).ToNot(ContainElement("model1"))
+		})
+
+		It("should use LRU time as tiebreaker for equal-size models", func() {
+			// Register equal sizes for both models
+			wd.RegisterModelSize("model1", 200*1024*1024)
+			wd.RegisterModelSize("model2", 200*1024*1024)
+
+			// Add model1 first (older)
+			wd.AddAddressModelMap("addr1", "model1")
+			wd.Mark("addr1")
+			wd.UnMark("addr1")
+			time.Sleep(20 * time.Millisecond)
+
+			// Add model2 (newer)
+			wd.AddAddressModelMap("addr2", "model2")
+			wd.Mark("addr2")
+			wd.UnMark("addr2")
+
+			// Equal size → LRU tiebreaker: model1 (older) should be evicted
+			result := wd.EnforceLRULimit(0)
+			Expect(result.EvictedCount).To(Equal(1))
+			Expect(pm.getShutdownCalls()).To(ContainElement("model1"))
+			Expect(pm.getShutdownCalls()).ToNot(ContainElement("model2"))
+		})
+
+		It("should fall back to LRU when no size is registered", func() {
+			// No sizes registered — should behave like standard LRU
+			wd.AddAddressModelMap("addr1", "model1")
+			wd.Mark("addr1")
+			wd.UnMark("addr1")
+			time.Sleep(20 * time.Millisecond)
+
+			wd.AddAddressModelMap("addr2", "model2")
+			wd.Mark("addr2")
+			wd.UnMark("addr2")
+
+			// Both have size 0 → LRU tiebreaker: model1 (older) evicted
+			result := wd.EnforceLRULimit(0)
+			Expect(result.EvictedCount).To(Equal(1))
+			Expect(pm.getShutdownCalls()).To(ContainElement("model1"))
+		})
+
+		It("should clean up model size on eviction", func() {
+			wd.RegisterModelSize("model1", 200*1024*1024)
+
+			wd.AddAddressModelMap("addr1", "model1")
+			wd.Mark("addr1")
+			wd.UnMark("addr1")
+
+			wd.AddAddressModelMap("addr2", "model2")
+			wd.Mark("addr2")
+			wd.UnMark("addr2")
+
+			wd.EnforceLRULimit(0)
+
+			// model1 was evicted; registering a new model with the same name
+			// should start from a clean state (size not inherited)
+			wd.RegisterModelSize("model1", 50*1024*1024)
+			// Just verifying no panic and size can be re-registered
+		})
+	})
 })