|
29 | 29 |
|
30 | 30 | from loopy.version import MOST_RECENT_LANGUAGE_VERSION |
31 | 31 | from sumpy.tools import KernelCacheMixin, to_complex_dtype |
| 32 | +from sumpy.codegen import register_optimization_preambles |
32 | 33 | from pytools import memoize_method |
33 | 34 |
|
34 | 35 | import logging |
@@ -145,6 +146,7 @@ def get_optimized_kernel(self): |
145 | 146 | # FIXME |
146 | 147 | knl = self.get_kernel() |
147 | 148 | knl = lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0") |
| 149 | + knl = register_optimization_preambles(knl, self.device) |
148 | 150 |
|
149 | 151 | return knl |
150 | 152 |
|
@@ -279,6 +281,7 @@ def get_optimized_kernel(self): |
279 | 281 | # FIXME |
280 | 282 | knl = self.get_kernel() |
281 | 283 | knl = lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0") |
| 284 | + knl = register_optimization_preambles(knl, self.device) |
282 | 285 |
|
283 | 286 | return knl |
284 | 287 |
|
@@ -518,6 +521,7 @@ def get_optimized_kernel(self, result_dtype): |
518 | 521 | knl = self.get_kernel(result_dtype) |
519 | 522 | knl = self.tgt_expansion.m2l_translation.optimize_loopy_kernel( |
520 | 523 | knl, self.tgt_expansion, self.src_expansion) |
| 524 | + knl = register_optimization_preambles(knl, self.device) |
521 | 525 |
|
522 | 526 | return knl |
523 | 527 |
|
@@ -627,6 +631,7 @@ def get_optimized_kernel(self, result_dtype): |
627 | 631 | knl = self.get_kernel(result_dtype) |
628 | 632 | knl = lp.tag_inames(knl, "idim*:unr") |
629 | 633 | knl = lp.tag_inames(knl, {"itr_class": "g.0"}) |
| 634 | + knl = register_optimization_preambles(knl, self.device) |
630 | 635 |
|
631 | 636 | return knl |
632 | 637 |
|
@@ -732,6 +737,7 @@ def get_optimized_kernel(self, result_dtype): |
732 | 737 | _, optimizations = self.get_inner_knl_and_optimizations(result_dtype) |
733 | 738 | for optimization in optimizations: |
734 | 739 | knl = optimization(knl) |
| 740 | + knl = register_optimization_preambles(knl, self.device) |
735 | 741 | return knl |
736 | 742 |
|
737 | 743 | def __call__(self, queue, **kwargs): |
@@ -831,6 +837,7 @@ def get_optimized_kernel(self, result_dtype): |
831 | 837 | for optimization in optimizations: |
832 | 838 | knl = optimization(knl) |
833 | 839 | knl = lp.add_inames_for_unused_hw_axes(knl) |
| 840 | + knl = register_optimization_preambles(knl, self.device) |
834 | 841 | return knl |
835 | 842 |
|
836 | 843 | def __call__(self, queue, **kwargs): |
|
0 commit comments