Optimize cython variant

felsenhower · felsenhower · commit 35e31aebdd5f · 2025-12-28T18:41:54.000+01:00
diff --git a/.github/workflows/test_cython.yaml b/.github/workflows/test_cython.yaml
@@ -30,5 +30,4 @@ jobs:
             --executable='uv run main.py' \
             --cwd='../cython' \
             --strictness=4 \
-            --shuffle=42 \
-            --filter='o:{"lines": "0"}'
+            --shuffle=42
diff --git a/cython/calculate.pyx b/cython/calculate.pyx
@@ -5,9 +5,12 @@
 # cython: initializedcheck=False
 # cython: cdivision=True
 
-
 from time import time
-from itertools import count
+
+from libc.math cimport fabs
+
+import numpy as np
+cimport numpy as np
 
 from partdiff_common.parse_args import (
     Options,
@@ -20,45 +23,111 @@ from partdiff_common import (
     CalculationResults,
 )
 
+cdef int METH_GAUSS_SEIDEL = 1
+cdef int METH_JACOBI = 2
+cdef int TERM_ACC = 1
+cdef int TERM_ITER = 2
 
-def calculate(arguments: CalculationArguments, options: Options) -> CalculationResults:
-    start_time = time()
-    n = arguments.n
-    tensor = arguments.tensor
-    perturbation_matrix = arguments.perturbation_matrix
-    stat_iteration = 0
-    stat_accuracy = None
-    matrix_out = tensor[0, :, :]
-    matrix_in = matrix_out
-    if options.method == CalculationMethod.JACOBI:
-        matrix_in = tensor[1, :, :]
-    for stat_iteration in count(start=1):
+cdef inline double tensor_get(double* tensor, int N, int m, int i, int j) noexcept nogil:
+    return tensor[m * (N+1) * (N+1) + i * (N+1) + j]
+
+cdef inline void tensor_set(double* tensor, int N, int m, int i, int j, double value) noexcept nogil:
+    tensor[m * (N+1) * (N+1) + i * (N+1) + j] = value
+
+cdef inline double matrix_get(double* matrix, int N, int i, int j) noexcept nogil:
+    return matrix[i * (N+1) + j]
+
+cdef tuple calculate_inner(
+    int N,
+    int term_iteration,
+    double* tensor,
+    double* perturbation_matrix,
+    int method,
+    int termination,
+    double term_accuracy
+):
+    cdef int m1, m2
+    cdef int i, j
+    cdef double star, residuum, maxresiduum
+    cdef int stat_iteration = 0
+    cdef double stat_accuracy = 0.0
+    cdef int temp
+    if method == METH_JACOBI:
+        m1 = 0
+        m2 = 1
+    else:
+        m1 = 0
+        m2 = 0
+    while term_iteration > 0:
         maxresiduum = 0.0
-        for i in range(1, n):
-            for j in range(1, n):
+        for i in range(1, N):
+            for j in range(1, N):
                 star = 0.25 * (
-                    matrix_in[i - 1, j]
-                    + matrix_in[i, j - 1]
-                    + matrix_in[i, j + 1]
-                    + matrix_in[i + 1, j]
+                    tensor_get(tensor, N, m2, i-1, j) +
+                    tensor_get(tensor, N, m2, i, j-1) +
+                    tensor_get(tensor, N, m2, i, j+1) +
+                    tensor_get(tensor, N, m2, i+1, j)
                 )
-                star += perturbation_matrix[i, j]
-                if (
-                    options.termination == TerminationCondition.ACCURACY
-                    or stat_iteration == options.term_iteration
-                ):
-                    residuum = abs(matrix_in[i, j] - star)
-                    maxresiduum = max(maxresiduum, residuum)
-                matrix_out[i, j] = star
+                star += matrix_get(perturbation_matrix, N, i, j)
+                if termination == TERM_ACC or term_iteration == 1:
+                    residuum = tensor_get(tensor, N, m2, i, j) - star
+                    residuum = fabs(residuum)
+                    if residuum > maxresiduum:
+                        maxresiduum = residuum
+                tensor_set(tensor, N, m1, i, j, star)
+        stat_iteration += 1
         stat_accuracy = maxresiduum
-        matrix_in, matrix_out = matrix_out, matrix_in
-        if options.termination == TerminationCondition.ACCURACY:
-            if maxresiduum < options.term_accuracy:
-                break
-        else:
-            if stat_iteration == options.term_iteration:
-                break
+        temp = m1
+        m1 = m2
+        m2 = temp
+        if termination == TERM_ACC:
+            if maxresiduum < term_accuracy:
+                term_iteration = 0
+        elif termination == TERM_ITER:
+            term_iteration -= 1
+    return m2, stat_iteration, stat_accuracy
+
+def calculate_np(
+    int N,
+    int term_iteration,
+    np.ndarray[np.float64_t, ndim=3, mode="c"] tensor,
+    np.ndarray[np.float64_t, ndim=2, mode="c"] perturbation_matrix,
+    int method,
+    int termination,
+    double term_accuracy
+):
+    if not (1 <= tensor.shape[0] <= 2) or tensor.shape[1] != N+1 or tensor.shape[2] != N+1:
+        raise ValueError("tensor must have shape (2, N+1, N+1)")
+    if perturbation_matrix.shape[0] != N+1 or perturbation_matrix.shape[1] != N+1:
+        raise ValueError("perturbation_matrix must have shape (N+1, N+1)")
+    cdef double* tensor_ptr = <double*> tensor.data
+    cdef double* matrix_ptr = <double*> perturbation_matrix.data
+    cdef int m
+    cdef int stat_iteration
+    cdef double stat_accuracy
+    m, stat_iteration, stat_accuracy = calculate_inner(
+        N,
+        term_iteration,
+        tensor_ptr,
+        matrix_ptr,
+        method,
+        termination,
+        term_accuracy
+    )
+    return m, stat_iteration, stat_accuracy
+
+def calculate(arguments: CalculationArguments, options: Options) -> CalculationResults:
+    start_time = time()
+    m, stat_iteration, stat_accuracy = calculate_np(
+        arguments.n,
+        options.term_iteration,
+        arguments.tensor,
+        arguments.perturbation_matrix,
+        options.method.value,
+        options.termination.value,
+        options.term_accuracy,
+    )
     end_time = time()
     duration = end_time - start_time
-    final_matrix = matrix_in
+    final_matrix = arguments.tensor[m, :, :]
     return CalculationResults(final_matrix, stat_iteration, stat_accuracy, duration)
diff --git a/cython/setup.py b/cython/setup.py
@@ -7,7 +7,7 @@
         "calculate",
         ["calculate.pyx"],
         include_dirs=[np.get_include()],
-        extra_compile_args=["-O3", "-march=native", "-ffast-math"],
+        extra_compile_args=["-O3", "-march=native"],
     )
 ]
 

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`"calculate",`
`8`	`8`	`["calculate.pyx"],`
`9`	`9`	`include_dirs=[np.get_include()],`
`10`		`- extra_compile_args=["-O3", "-march=native", "-ffast-math"],`
	`10`	`+ extra_compile_args=["-O3", "-march=native"],`
`11`	`11`	`)`
`12`	`12`	`]`
`13`	`13`