Skip to content

Commit 8d31a66

Browse files
Speed up inference on nvidia 10 series on Linux.
1 parent b643eae commit 8d31a66

1 file changed

Lines changed: 7 additions & 2 deletions

File tree

comfy/model_management.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,8 +370,10 @@ def offloaded_memory(loaded_models, device):
370370
offloaded_mem += m.model_offloaded_memory()
371371
return offloaded_mem
372372

373+
WINDOWS = any(platform.win32_ver())
374+
373375
EXTRA_RESERVED_VRAM = 400 * 1024 * 1024
374-
if any(platform.win32_ver()):
376+
if WINDOWS:
375377
EXTRA_RESERVED_VRAM = 600 * 1024 * 1024 #Windows is higher because of the shared vram issue
376378

377379
if args.reserve_vram is not None:
@@ -1002,7 +1004,10 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
10021004
nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050", "p40", "p100", "p6", "p4"]
10031005
for x in nvidia_10_series:
10041006
if x in props.name.lower():
1005-
return True
1007+
if WINDOWS or manual_cast:
1008+
return True
1009+
else:
1010+
return False #weird linux behavior where fp32 is faster
10061011

10071012
if manual_cast:
10081013
free_model_memory = maximum_vram_for_weights(device)

0 commit comments

Comments
 (0)