Skip to content

Commit e1048b0

Browse files
isurufinducer
authored andcommitted
register_optimization_preambles for all kernels
1 parent 67c35f2 commit e1048b0

4 files changed

Lines changed: 16 additions & 2 deletions

File tree

sumpy/e2e.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
from loopy.version import MOST_RECENT_LANGUAGE_VERSION
3131
from sumpy.tools import KernelCacheMixin, to_complex_dtype
32+
from sumpy.codegen import register_optimization_preambles
3233
from pytools import memoize_method
3334

3435
import logging
@@ -145,6 +146,7 @@ def get_optimized_kernel(self):
145146
# FIXME
146147
knl = self.get_kernel()
147148
knl = lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0")
149+
knl = register_optimization_preambles(knl, self.device)
148150

149151
return knl
150152

@@ -279,6 +281,7 @@ def get_optimized_kernel(self):
279281
# FIXME
280282
knl = self.get_kernel()
281283
knl = lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0")
284+
knl = register_optimization_preambles(knl, self.device)
282285

283286
return knl
284287

@@ -518,6 +521,7 @@ def get_optimized_kernel(self, result_dtype):
518521
knl = self.get_kernel(result_dtype)
519522
knl = self.tgt_expansion.m2l_translation.optimize_loopy_kernel(
520523
knl, self.tgt_expansion, self.src_expansion)
524+
knl = register_optimization_preambles(knl, self.device)
521525

522526
return knl
523527

@@ -627,6 +631,7 @@ def get_optimized_kernel(self, result_dtype):
627631
knl = self.get_kernel(result_dtype)
628632
knl = lp.tag_inames(knl, "idim*:unr")
629633
knl = lp.tag_inames(knl, {"itr_class": "g.0"})
634+
knl = register_optimization_preambles(knl, self.device)
630635

631636
return knl
632637

@@ -732,6 +737,7 @@ def get_optimized_kernel(self, result_dtype):
732737
_, optimizations = self.get_inner_knl_and_optimizations(result_dtype)
733738
for optimization in optimizations:
734739
knl = optimization(knl)
740+
knl = register_optimization_preambles(knl, self.device)
735741
return knl
736742

737743
def __call__(self, queue, **kwargs):
@@ -831,6 +837,7 @@ def get_optimized_kernel(self, result_dtype):
831837
for optimization in optimizations:
832838
knl = optimization(knl)
833839
knl = lp.add_inames_for_unused_hw_axes(knl)
840+
knl = register_optimization_preambles(knl, self.device)
834841
return knl
835842

836843
def __call__(self, queue, **kwargs):

sumpy/e2p.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import loopy as lp
2727

2828
from sumpy.tools import KernelCacheMixin, gather_loopy_arguments
29+
from sumpy.codegen import register_optimization_preambles
2930
from loopy.version import MOST_RECENT_LANGUAGE_VERSION
3031

3132

@@ -198,6 +199,7 @@ def get_optimized_kernel(self):
198199
knl = lp.add_inames_to_insn(knl, "itgt_box", "id:kernel_scaling")
199200
knl = lp.set_options(knl,
200201
enforce_variable_access_ordered="no_check")
202+
knl = register_optimization_preambles(knl, self.device)
201203

202204
return knl
203205

@@ -324,6 +326,8 @@ def get_optimized_kernel(self):
324326
knl = lp.add_inames_to_insn(knl, "itgt_box", "id:kernel_scaling")
325327
knl = lp.set_options(knl,
326328
enforce_variable_access_ordered="no_check")
329+
knl = register_optimization_preambles(knl, self.device)
330+
327331
return knl
328332

329333
def __call__(self, queue, **kwargs):

sumpy/p2e.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from loopy.version import MOST_RECENT_LANGUAGE_VERSION
2626

2727
from sumpy.tools import KernelCacheMixin, KernelComputation
28+
from sumpy.codegen import register_optimization_preambles
2829

2930
import logging
3031
logger = logging.getLogger(__name__)
@@ -118,6 +119,7 @@ def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array):
118119
knl = self._allow_redundant_execution_of_knl_scaling(knl)
119120
knl = lp.set_options(knl,
120121
enforce_variable_access_ordered="no_check")
122+
knl = register_optimization_preambles(knl, self.device)
121123
return knl
122124

123125
def __call__(self, queue, **kwargs):

sumpy/p2p.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
from sumpy.tools import (
3131
KernelComputation, KernelCacheMixin, is_obj_array_like)
32+
from sumpy.codegen import register_optimization_preambles
3233

3334

3435
__doc__ = """
@@ -190,7 +191,6 @@ def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array):
190191
knl = lp.set_options(knl,
191192
enforce_variable_access_ordered="no_check")
192193

193-
from sumpy.codegen import register_optimization_preambles
194194
knl = register_optimization_preambles(knl, self.device)
195195

196196
return knl
@@ -411,6 +411,8 @@ def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array):
411411
knl = self._allow_redundant_execution_of_knl_scaling(knl)
412412
knl = lp.set_options(knl,
413413
enforce_variable_access_ordered="no_check")
414+
knl = register_optimization_preambles(knl, self.device)
415+
414416
return knl
415417

416418
def __call__(self, queue, targets, sources, tgtindices, srcindices, **kwargs):
@@ -717,7 +719,6 @@ def get_optimized_kernel(self, max_nsources_in_one_box,
717719
knl = lp.set_options(knl,
718720
enforce_variable_access_ordered="no_check")
719721

720-
from sumpy.codegen import register_optimization_preambles
721722
knl = register_optimization_preambles(knl, self.device)
722723

723724
return knl

0 commit comments

Comments
 (0)