Skip to content

Commit 474e0d6

Browse files
authored
Merge branch 'main' into e2p_opt
2 parents 19531d0 + e1048b0 commit 474e0d6

16 files changed

Lines changed: 377 additions & 232 deletions

File tree

doc/conf.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,18 @@
1818
intersphinx_mapping = {
1919
"python": ("https://docs.python.org/3/", None),
2020
"numpy": ("https://numpy.org/doc/stable/", None),
21-
"modepy": ("https://documen.tician.de/modepy/", None),
21+
"sympy": ("https://docs.sympy.org/latest/", None),
22+
"matplotlib": ("https://matplotlib.org/stable/", None),
2223
"pyopencl": ("https://documen.tician.de/pyopencl/", None),
24+
"pytools": ("https://documen.tician.de/pytools/", None),
25+
"modepy": ("https://documen.tician.de/modepy/", None),
2326
"pymbolic": ("https://documen.tician.de/pymbolic/", None),
2427
"loopy": ("https://documen.tician.de/loopy/", None),
2528
"pytential": ("https://documen.tician.de/pytential/", None),
2629
"boxtree": ("https://documen.tician.de/boxtree/", None),
27-
"sympy": ("https://docs.sympy.org/latest/", None),
28-
"matplotlib": ("https://matplotlib.org/stable/", None),
30+
"arraycontext": ("https://documen.tician.de/arraycontext/", None),
2931
}
3032

3133
nitpick_ignore_regex = [
32-
["py:class", r"symengine\.(.+)"], # :cry:
33-
]
34+
["py:class", r"symengine\.(.+)"], # :cry:
35+
]

doc/misc.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ Misc Tools
55

66
.. automodule:: sumpy.symbolic
77

8+
.. automodule:: sumpy.tools
9+
10+
.. automodule:: sumpy.array_context
811

912
Installation
1013
============

examples/curve-pot.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ def draw_pot_figure(aspect_ratio,
9191
knl_kwargs = {}
9292

9393
vol_source_knl, vol_target_knl = process_kernel(knl, what_operator)
94-
p2p = P2P(actx.context, source_kernels=(vol_source_knl,),
94+
p2p = P2P(actx.context,
95+
source_kernels=(vol_source_knl,),
9596
target_kernels=(vol_target_knl,),
9697
exclude_self=False,
9798
value_dtypes=np.complex128)
@@ -157,35 +158,40 @@ def map_to_curve(t):
157158
lpot_kwargs = knl_kwargs.copy()
158159

159160
if what_operator == "D":
160-
volpot_kwargs["src_derivative_dir"] = native_curve.normal
161+
volpot_kwargs["src_derivative_dir"] = actx.from_numpy(native_curve.normal)
161162

162163
if what_operator_lpot == "D":
163-
lpot_kwargs["src_derivative_dir"] = ovsmp_curve.normal
164+
lpot_kwargs["src_derivative_dir"] = actx.from_numpy(ovsmp_curve.normal)
164165

165166
if what_operator_lpot == "S'":
166-
lpot_kwargs["tgt_derivative_dir"] = native_curve.normal
167+
lpot_kwargs["tgt_derivative_dir"] = actx.from_numpy(native_curve.normal)
167168

168169
# }}}
169170

171+
targets = actx.from_numpy(fp.points)
172+
sources = actx.from_numpy(native_curve.pos)
173+
ovsmp_sources = actx.from_numpy(ovsmp_curve.pos)
174+
170175
if 0:
171176
# {{{ build matrix
172177

173178
from fourier import make_fourier_interp_matrix
174179
fim = make_fourier_interp_matrix(novsmp, nsrc)
180+
175181
from sumpy.tools import build_matrix
176182
from scipy.sparse.linalg import LinearOperator
177183

178184
def apply_lpot(x):
179185
xovsmp = np.dot(fim, x)
180186
evt, (y,) = lpot(actx.queue,
181-
native_curve.pos,
182-
ovsmp_curve.pos,
183-
centers,
184-
[xovsmp * ovsmp_curve.speed * ovsmp_weights],
185-
expansion_radii=np.ones(centers.shape[1]),
187+
sources,
188+
ovsmp_sources,
189+
actx.from_numpy(centers),
190+
[actx.from_numpy(xovsmp * ovsmp_curve.speed * ovsmp_weights)],
191+
expansion_radii=actx.from_numpy(np.ones(centers.shape[1])),
186192
**lpot_kwargs)
187193

188-
return y
194+
return actx.to_numpy(y)
189195

190196
op = LinearOperator((nsrc, nsrc), apply_lpot)
191197
mat = build_matrix(op, dtype=np.complex128)
@@ -200,19 +206,26 @@ def apply_lpot(x):
200206

201207
mode_nr = 0
202208
density = np.cos(mode_nr*2*np.pi*native_t).astype(np.complex128)
203-
ovsmp_density = np.cos(mode_nr*2*np.pi*ovsmp_t).astype(np.complex128)
209+
strength = actx.from_numpy(native_curve.speed * native_weights * density)
210+
204211
evt, (vol_pot,) = p2p(actx.queue,
205-
fp.points,
206-
native_curve.pos,
207-
[native_curve.speed*native_weights*density], **volpot_kwargs)
212+
targets,
213+
sources,
214+
[strength], **volpot_kwargs)
215+
vol_pot = actx.to_numpy(vol_pot)
216+
217+
ovsmp_density = np.cos(mode_nr*2*np.pi*ovsmp_t).astype(np.complex128)
218+
ovsmp_strength = actx.from_numpy(
219+
ovsmp_curve.speed * ovsmp_weights * ovsmp_density)
208220

209221
evt, (curve_pot,) = lpot(actx.queue,
210-
native_curve.pos,
211-
ovsmp_curve.pos,
212-
centers,
213-
[ovsmp_density * ovsmp_curve.speed * ovsmp_weights],
214-
expansion_radii=np.ones(centers.shape[1]),
222+
sources,
223+
ovsmp_sources,
224+
actx.from_numpy(centers),
225+
[ovsmp_strength],
226+
expansion_radii=actx.from_numpy(np.ones(centers.shape[1])),
215227
**lpot_kwargs)
228+
curve_pot = actx.to_numpy(curve_pot)
216229

217230
# }}}
218231

sumpy/array_context.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
register_pytest_array_context_factory)
2727

2828
__doc__ = """
29+
Array Context
30+
-------------
31+
2932
.. autoclass:: PyOpenCLArrayContext
3033
"""
3134

sumpy/e2e.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
from loopy.version import MOST_RECENT_LANGUAGE_VERSION
3131
from sumpy.tools import KernelCacheMixin, to_complex_dtype
32+
from sumpy.codegen import register_optimization_preambles
3233
from pytools import memoize_method
3334

3435
import logging
@@ -144,7 +145,8 @@ def get_kernel(self):
144145
def get_optimized_kernel(self):
145146
# FIXME
146147
knl = self.get_kernel()
147-
knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0")
148+
knl = lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0")
149+
knl = register_optimization_preambles(knl, self.device)
148150

149151
return knl
150152

@@ -278,7 +280,8 @@ def get_kernel(self):
278280
def get_optimized_kernel(self):
279281
# FIXME
280282
knl = self.get_kernel()
281-
knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0")
283+
knl = lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0")
284+
knl = register_optimization_preambles(knl, self.device)
282285

283286
return knl
284287

@@ -518,6 +521,7 @@ def get_optimized_kernel(self, result_dtype):
518521
knl = self.get_kernel(result_dtype)
519522
knl = self.tgt_expansion.m2l_translation.optimize_loopy_kernel(
520523
knl, self.tgt_expansion, self.src_expansion)
524+
knl = register_optimization_preambles(knl, self.device)
521525

522526
return knl
523527

@@ -627,6 +631,7 @@ def get_optimized_kernel(self, result_dtype):
627631
knl = self.get_kernel(result_dtype)
628632
knl = lp.tag_inames(knl, "idim*:unr")
629633
knl = lp.tag_inames(knl, {"itr_class": "g.0"})
634+
knl = register_optimization_preambles(knl, self.device)
630635

631636
return knl
632637

@@ -732,6 +737,7 @@ def get_optimized_kernel(self, result_dtype):
732737
_, optimizations = self.get_inner_knl_and_optimizations(result_dtype)
733738
for optimization in optimizations:
734739
knl = optimization(knl)
740+
knl = register_optimization_preambles(knl, self.device)
735741
return knl
736742

737743
def __call__(self, queue, **kwargs):
@@ -831,6 +837,7 @@ def get_optimized_kernel(self, result_dtype):
831837
for optimization in optimizations:
832838
knl = optimization(knl)
833839
knl = lp.add_inames_for_unused_hw_axes(knl)
840+
knl = register_optimization_preambles(knl, self.device)
834841
return knl
835842

836843
def __call__(self, queue, **kwargs):

sumpy/e2p.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import pymbolic.primitives as prim
3030

3131
from sumpy.tools import KernelCacheMixin, gather_loopy_arguments
32+
from sumpy.codegen import register_optimization_preambles
3233
from loopy.version import MOST_RECENT_LANGUAGE_VERSION
3334

3435

@@ -220,6 +221,7 @@ def get_optimized_kernel(self, max_ntargets_in_one_box):
220221
knl = lp.set_temporary_address_space(knl, "coeffs", lp.AddressSpace.LOCAL)
221222
knl = lp.set_options(knl,
222223
enforce_variable_access_ordered="no_check", write_code=False)
224+
knl = register_optimization_preambles(knl, self.device)
223225

224226
for transform in optimizations:
225227
knl = transform(knl)
@@ -400,6 +402,7 @@ def get_optimized_kernel(self, max_ntargets_in_one_box):
400402
enforce_variable_access_ordered="no_check", write_code=False)
401403
for transform in optimizations:
402404
knl = transform(knl)
405+
knl = register_optimization_preambles(knl, self.device)
403406
return knl
404407

405408
def __call__(self, queue, **kwargs):

sumpy/expansion/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ def __len__(self):
228228

229229
def __eq__(self, other):
230230
return (
231-
type(self) == type(other)
231+
type(self) is type(other)
232232
and self.kernel == other.kernel
233233
and self.order == other.order
234234
and self.use_rscale == other.use_rscale)

sumpy/expansion/local.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def update_persistent_hash(self, key_hash, key_builder):
7575

7676
def __eq__(self, other):
7777
return (
78-
type(self) == type(other)
78+
type(self) is type(other)
7979
and self.kernel == other.kernel
8080
and self.order == other.order
8181
and self.use_rscale == other.use_rscale

sumpy/kernel.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,9 @@ def name(self):
102102
def __eq__(self, other):
103103
if id(self) == id(other):
104104
return True
105-
if not type(self) == KernelArgument:
105+
if type(self) is not KernelArgument:
106106
return NotImplemented
107-
if not type(other) == KernelArgument:
107+
if type(other) is not KernelArgument:
108108
return NotImplemented
109109
return self.loopy_arg == other.loopy_arg
110110

sumpy/p2e.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from loopy.version import MOST_RECENT_LANGUAGE_VERSION
2626

2727
from sumpy.tools import KernelCacheMixin, KernelComputation
28+
from sumpy.codegen import register_optimization_preambles
2829

2930
import logging
3031
logger = logging.getLogger(__name__)
@@ -118,6 +119,7 @@ def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array):
118119
knl = self._allow_redundant_execution_of_knl_scaling(knl)
119120
knl = lp.set_options(knl,
120121
enforce_variable_access_ordered="no_check")
122+
knl = register_optimization_preambles(knl, self.device)
121123
return knl
122124

123125
def __call__(self, queue, **kwargs):

0 commit comments

Comments
 (0)