geosPythonPackages/geos-trame/src/geos/trame/app/io/hpc_tools.py at 9576aadbb937761d91ee6ff377174be6fe238744 · GEOS-DEV/geosPythonPackages · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from geos.trame.app.io.ssh_tools import SimulationConstant


class SuggestDecomposition:

    def __init__( self, selected_cluster: SimulationConstant, n_unknowns: int, job_type: str = 'cpu' ) -> None:
        """Initialize the decomposition hinter for HPC."""
        self.selected_cluster: SimulationConstant = selected_cluster
        self.n_unknowns: int = n_unknowns
        self.job_type: str = job_type  #TODO should be an enum
        self.sd: list[ dict ] = []

    @staticmethod
    def compute( n_unknowns: int,
                 memory_per_unknown_bytes: int,
                 node_memory_gb: int,
                 cores_per_node: int,
                 min_unknowns_per_rank: int = 10000,
                 strong_scaling: bool = True ) -> list[ dict ]:
        """Suggests node/rank distribution for a cluster computation.

        Parameters:
        - n_unknowns: total number of unknowns
        - memory_per_unknown_bytes: estimated memory per unknown
        - node_memory_gb: available memory per node
        - cores_per_node: cores available per node
        - min_unknowns_per_rank: minimum for efficiency
        - strong_scaling: True if problem size is fixed

        Note:
            - 10,000-100,000 unknowns per rank is often a sweet spot for many PDE solvers
            - Use power-of-2 decompositions when possible (helps with communication patterns)
            - For 3D problems, try to maintain cubic subdomains (minimizes surface-to-volume ratio, reducing communication)
            - Don't oversubscribe: avoid using more ranks than provide parallel efficiency

        """
        # Memory constraint
        node_memory_bytes = node_memory_gb * 1e9
        max_unknowns_per_node = int( 0.8 * node_memory_bytes / memory_per_unknown_bytes )

        # Compute minimum nodes needed
        min_nodes = max( 1, ( n_unknowns + max_unknowns_per_node - 1 ) // max_unknowns_per_node )

        # Determine ranks per node
        unknowns_per_node = n_unknowns // min_nodes
        unknowns_per_rank = max( min_unknowns_per_rank, unknowns_per_node // cores_per_node )

        # Calculate total ranks needed
        n_ranks = max( 1, n_unknowns // unknowns_per_rank )

        # Distribute across nodes
        ranks_per_node = min( cores_per_node, ( n_ranks + min_nodes - 1 ) // min_nodes )
        n_nodes = ( n_ranks + ranks_per_node - 1 ) // ranks_per_node

        return [
            {
                'id':1,
                'nodes': n_nodes,
                'ranks_per_node': ranks_per_node,
                'total_ranks': n_nodes * ranks_per_node,
                'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node )
            },
            {
                'id':2,
                'nodes': n_nodes * 2,
                'ranks_per_node': ranks_per_node // 2,
                'total_ranks': n_nodes * ranks_per_node,
                'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node )
            },
        ]

    def get_sd( self ) -> list[ dict ]:
        """Get the suggested decomposition popoulated."""
        if self.job_type == 'cpu' and self.selected_cluster:  #make it an enum
            self.sd = SuggestDecomposition.compute( self.n_unknowns, 64, self.selected_cluster.mem_per_node,
                                                    self.selected_cluster.cores_per_node )
            self.sd = [{**item,'label': f"{self.selected_cluster.name} : {item['nodes']} x {item['ranks_per_node']}"} for item in self.sd ]
        else:
            self.sd = [
                {
                    'id': -1,
                    'label':'No: 0x0',
                    'nodes': 0,
                    'ranks_per_node': 0,
                    'total_ranks': 0,
                    'unknowns_per_rank': 0
                },
            ]
        # elif job_type == 'gpu':
        # selected_cluster['n_nodes']*selected_cluster['gpu']['per_node']

        return self.sd

    # def to_list( self ) -> list[ str ]:
    #     """Pretty printer to list of string for display in UI."""
    #     sd = self.get_sd()
    #     return [ f"{self.selected_cluster.name} : {sd_item['nodes']} x {sd_item['ranks_per_node']}" for sd_item in sd ]