Skip to content

Commit d96c350

Browse files
committed
feat: add profiling tracepoints to CPU runtime function implementations (Part 8)
Instrument key CPU runtime functions entry points in src/runtime/NEON/functions/* with tracepoints to enable lightweight runtime profiling. These tracepoints leverage the ACL_PROFILE macros and form the basis for collecting execution timing and behavior metrics. This is the first step in integrating end-to-end profiling support. Partially Resolves: COMPMID-8330 Signed-off-by: Walid Ben Romdhane <Walid.BenRomdhane@arm.com> Change-Id: I6701dff91eae53bcc96d0666b449c1a5844f4028 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14780 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Andreas Flöjt <andreas.floejt@arm.com>
1 parent e058e2e commit d96c350

39 files changed

Lines changed: 208 additions & 35 deletions

src/runtime/NEON/functions/NELSTMLayerQuantized.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2021, 2024 Arm Limited.
2+
* Copyright (c) 2019-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -28,6 +28,7 @@
2828
#include "arm_compute/core/Validate.h"
2929

3030
#include "src/common/utils/Log.h"
31+
#include "src/common/utils/profile/acl_profile.h"
3132
#include "src/core/helpers/AutoConfiguration.h"
3233

3334
#include <cmath>
@@ -127,6 +128,7 @@ void NELSTMLayerQuantized::configure(const ITensor *input,
127128
ITensor *cell_state_out,
128129
ITensor *output_state_out)
129130
{
131+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELSTMLayerQuantized::configure");
130132
ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights,
131133
input_to_output_weights, recurrent_to_input_weights, recurrent_to_forget_weights,
132134
recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias,
@@ -363,6 +365,7 @@ Status NELSTMLayerQuantized::validate(const ITensorInfo *input,
363365
const ITensorInfo *cell_state_out,
364366
const ITensorInfo *output_state_out)
365367
{
368+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELSTMLayerQuantized::validate");
366369
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(
367370
input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
368371
recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
@@ -629,6 +632,7 @@ Status NELSTMLayerQuantized::validate(const ITensorInfo *input,
629632

630633
void NELSTMLayerQuantized::run()
631634
{
635+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELSTMLayerQuantized::run");
632636
prepare();
633637

634638
// Acquire all the temporaries

src/runtime/NEON/functions/NELogical.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2020-2021, 2024 Arm Limited.
2+
* Copyright (c) 2020-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -27,6 +27,7 @@
2727
#include "arm_compute/runtime/Tensor.h"
2828

2929
#include "src/common/utils/Log.h"
30+
#include "src/common/utils/profile/acl_profile.h"
3031
#include "src/core/NEON/kernels/NELogicalKernel.h"
3132

3233
namespace arm_compute
@@ -47,6 +48,7 @@ NELogicalAnd::~NELogicalAnd() = default;
4748

4849
void NELogicalAnd::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
4950
{
51+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELogicalAnd::configure");
5052
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
5153
ARM_COMPUTE_LOG_PARAMS(input1, input2, output);
5254

@@ -61,12 +63,14 @@ void NELogicalAnd::configure(const ITensor *input1, const ITensor *input2, ITens
6163

6264
Status NELogicalAnd::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
6365
{
66+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELogicalAnd::validate");
6467
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input1, input2, output);
6568
return kernels::NELogicalKernel::validate(input1, input2, output, LogicalOperation::And);
6669
}
6770

6871
void NELogicalAnd::run()
6972
{
73+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELogicalAnd::run");
7074
NEScheduler::get().schedule_op(_impl->kernel.get(), Window::DimY, _impl->kernel->window(), _impl->pack);
7175
}
7276

@@ -80,6 +84,7 @@ NELogicalOr::~NELogicalOr() = default;
8084

8185
void NELogicalOr::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
8286
{
87+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELogicalOr::configure");
8388
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
8489
ARM_COMPUTE_LOG_PARAMS(input1, input2, output);
8590

@@ -94,12 +99,14 @@ void NELogicalOr::configure(const ITensor *input1, const ITensor *input2, ITenso
9499

95100
Status NELogicalOr::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
96101
{
102+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELogicalOr::validate");
97103
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input1, input2, output);
98104
return kernels::NELogicalKernel::validate(input1, input2, output, LogicalOperation::Or);
99105
}
100106

101107
void NELogicalOr::run()
102108
{
109+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELogicalOr::run");
103110
NEScheduler::get().schedule_op(_impl->kernel.get(), Window::DimY, _impl->kernel->window(), _impl->pack);
104111
}
105112

@@ -113,6 +120,7 @@ NELogicalNot::~NELogicalNot() = default;
113120

114121
void NELogicalNot::configure(const ITensor *input, ITensor *output)
115122
{
123+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELogicalNot::configure");
116124
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
117125
ARM_COMPUTE_LOG_PARAMS(input, output);
118126

@@ -126,12 +134,14 @@ void NELogicalNot::configure(const ITensor *input, ITensor *output)
126134

127135
Status NELogicalNot::validate(const ITensorInfo *input, const ITensorInfo *output)
128136
{
137+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELogicalNot::validate");
129138
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input, output);
130139
return kernels::NELogicalKernel::validate(input, nullptr, output, LogicalOperation::Not);
131140
}
132141

133142
void NELogicalNot::run()
134143
{
144+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NELogicalNot::run");
135145
NEScheduler::get().schedule_op(_impl->kernel.get(), Window::DimY, _impl->kernel->window(), _impl->pack);
136146
}
137147
} // namespace arm_compute

src/runtime/NEON/functions/NEMatMul.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "arm_compute/runtime/MemoryGroup.h"
2828
#include "arm_compute/runtime/Tensor.h"
2929

30+
#include "src/common/utils/profile/acl_profile.h"
3031
#include "src/core/helpers/MemoryHelpers.h"
3132
#include "src/cpu/operators/CpuMatMul.h"
3233

@@ -56,6 +57,7 @@ void NEMatMul::configure(ITensor *lhs,
5657
const CpuMatMulSettings &settings,
5758
const ActivationLayerInfo &act_info)
5859
{
60+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEMatMul::configure");
5961
_impl->lhs = lhs;
6062
_impl->rhs = rhs;
6163
_impl->output = output;
@@ -74,12 +76,14 @@ Status NEMatMul::validate(const ITensorInfo *lhs,
7476
const CpuMatMulSettings &settings,
7577
const ActivationLayerInfo &act_info)
7678
{
79+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEMatMul::validate");
7780
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(lhs, rhs, output);
7881
return cpu::CpuMatMul::validate(lhs, rhs, output, info, settings, act_info);
7982
}
8083

8184
void NEMatMul::run()
8285
{
86+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEMatMul::run");
8387
MemoryGroupResourceScope scope_mg(_impl->memory_group);
8488
_impl->op->run(_impl->run_pack);
8589
}

src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2020-2022, 2024 Arm Limited.
2+
* Copyright (c) 2020-2022, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -29,6 +29,7 @@
2929
#include "arm_compute/runtime/NEON/NEScheduler.h"
3030

3131
#include "src/common/utils/Log.h"
32+
#include "src/common/utils/profile/acl_profile.h"
3233
#include "src/cpu/kernels/CpuMaxUnpoolingLayerKernel.h"
3334
#include "src/cpu/operators/CpuMaxUnpooling.h"
3435

@@ -53,6 +54,7 @@ void NEMaxUnpoolingLayer::configure(ITensor *input,
5354
ITensor *output,
5455
const PoolingLayerInfo &pool_info)
5556
{
57+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEMaxUnpoolingLayer::configure");
5658
ARM_COMPUTE_LOG_PARAMS(input, indices, output, pool_info);
5759

5860
const PixelValue zero_value(0.f);
@@ -72,6 +74,7 @@ Status NEMaxUnpoolingLayer::validate(const ITensorInfo *input,
7274
const ITensorInfo *output,
7375
const PoolingLayerInfo &pool_info)
7476
{
77+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEMaxUnpoolingLayer::validate");
7578
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, indices);
7679
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input, indices, output);
7780
ARM_COMPUTE_RETURN_ON_ERROR(cpu::CpuMaxUnpooling::validate(input, indices, output, pool_info));
@@ -80,6 +83,7 @@ Status NEMaxUnpoolingLayer::validate(const ITensorInfo *input,
8083

8184
void NEMaxUnpoolingLayer::run()
8285
{
86+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEMaxUnpoolingLayer::run");
8387
ITensorPack pack;
8488
pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
8589
pack.add_tensor(TensorType::ACL_SRC_1, _impl->indices);

src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "arm_compute/core/Validate.h"
2727

2828
#include "src/common/utils/Log.h"
29+
#include "src/common/utils/profile/acl_profile.h"
2930
#include "src/cpu/operators/CpuMeanStdDevNormalization.h"
3031

3132
namespace arm_compute
@@ -45,6 +46,8 @@ NEMeanStdDevNormalizationLayer::~NEMeanStdDevNormalizationLayer() = default;
4546

4647
void NEMeanStdDevNormalizationLayer::configure(ITensor *input, ITensor *output, float epsilon)
4748
{
49+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
50+
"NEMeanStdDevNormalizationLayer::configure");
4851
_impl->input = input;
4952
_impl->output = (output == nullptr) ? input : output;
5053
_impl->op = std::make_unique<cpu::CpuMeanStdDevNormalization>();
@@ -53,11 +56,14 @@ void NEMeanStdDevNormalizationLayer::configure(ITensor *input, ITensor *output,
5356

5457
Status NEMeanStdDevNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, float epsilon)
5558
{
59+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
60+
"NEMeanStdDevNormalizationLayer::validate");
5661
return cpu::CpuMeanStdDevNormalization::validate(input, output, epsilon);
5762
}
5863

5964
void NEMeanStdDevNormalizationLayer::run()
6065
{
66+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEMeanStdDevNormalizationLayer::run");
6167
ITensorPack pack;
6268
pack.add_tensor(TensorType::ACL_SRC, _impl->input);
6369
pack.add_tensor(TensorType::ACL_DST, _impl->output);

src/runtime/NEON/functions/NENormalizationLayer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021, 2024 Arm Limited.
2+
* Copyright (c) 2017-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -31,6 +31,7 @@
3131
#include "arm_compute/runtime/NEON/NEScheduler.h"
3232

3333
#include "src/common/utils/Log.h"
34+
#include "src/common/utils/profile/acl_profile.h"
3435
#include "src/core/NEON/kernels/NENormalizationLayerKernel.h"
3536

3637
namespace arm_compute
@@ -44,6 +45,7 @@ NENormalizationLayer::NENormalizationLayer(std::shared_ptr<IMemoryManager> memor
4445

4546
void NENormalizationLayer::configure(const ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
4647
{
48+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NENormalizationLayer::configure");
4749
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
4850
ARM_COMPUTE_LOG_PARAMS(input, output, norm_info);
4951

@@ -66,6 +68,7 @@ Status NENormalizationLayer::validate(const ITensorInfo *input,
6668
const ITensorInfo *output,
6769
const NormalizationLayerInfo &norm_info)
6870
{
71+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NENormalizationLayer::validate");
6972
// Perform validation step
7073
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
7174
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input, output);
@@ -79,6 +82,7 @@ Status NENormalizationLayer::validate(const ITensorInfo *input,
7982

8083
void NENormalizationLayer::run()
8184
{
85+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NENormalizationLayer::run");
8286
MemoryGroupResourceScope scope_mg(_memory_group);
8387
_multiply_f.run();
8488
NEScheduler::get().schedule(_norm_kernel.get(), Window::DimY);

src/runtime/NEON/functions/NEPReluLayer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2021, 2024 Arm Limited.
2+
* Copyright (c) 2019-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -26,6 +26,7 @@
2626
#include "arm_compute/core/ITensor.h"
2727
#include "arm_compute/core/Validate.h"
2828

29+
#include "src/common/utils/profile/acl_profile.h"
2930
#include "src/cpu/operators/CpuPRelu.h"
3031

3132
namespace arm_compute
@@ -49,6 +50,7 @@ NEPReluLayer::~NEPReluLayer() = default;
4950

5051
void NEPReluLayer::configure(const ITensor *input, const ITensor *alpha, ITensor *output)
5152
{
53+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEPReluLayer::configure");
5254
_impl->src_0 = input;
5355
_impl->src_1 = alpha;
5456
_impl->dst = output;
@@ -58,6 +60,7 @@ void NEPReluLayer::configure(const ITensor *input, const ITensor *alpha, ITensor
5860

5961
void NEPReluLayer::run()
6062
{
63+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEPReluLayer::run");
6164
ITensorPack pack;
6265
pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
6366
pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
@@ -67,6 +70,7 @@ void NEPReluLayer::run()
6770

6871
Status NEPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
6972
{
73+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEPReluLayer::validate");
7074
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input, alpha, output);
7175
return OperatorType::validate(input, alpha, output);
7276
}

src/runtime/NEON/functions/NEPadLayer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018-2021, 2024 Arm Limited.
2+
* Copyright (c) 2018-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -28,6 +28,7 @@
2828
#include "arm_compute/runtime/NEON/NEScheduler.h"
2929

3030
#include "src/common/utils/Log.h"
31+
#include "src/common/utils/profile/acl_profile.h"
3132
#include "src/core/helpers/AutoConfiguration.h"
3233
#include "src/core/NEON/kernels/NEPadLayerKernel.h"
3334

@@ -189,6 +190,7 @@ void NEPadLayer::configure(ITensor *input,
189190
const PixelValue constant_value,
190191
const PaddingMode mode)
191192
{
193+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEPadLayer::configure");
192194
ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), padding, constant_value, mode));
193195
ARM_COMPUTE_LOG_PARAMS(input, output, padding, constant_value, mode);
194196

@@ -234,6 +236,7 @@ Status NEPadLayer::validate(const ITensorInfo *input,
234236
const PixelValue constant_value,
235237
const PaddingMode mode)
236238
{
239+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEPadLayer::validate");
237240
ARM_COMPUTE_UNUSED(constant_value);
238241
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input, output);
239242

@@ -279,6 +282,7 @@ Status NEPadLayer::validate(const ITensorInfo *input,
279282

280283
void NEPadLayer::run()
281284
{
285+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEPadLayer::run");
282286
if (_num_dimensions > 0)
283287
{
284288
switch (_mode)

src/runtime/NEON/functions/NEPermute.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021, 2024 Arm Limited.
2+
* Copyright (c) 2017-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -25,6 +25,7 @@
2525

2626
#include "arm_compute/core/Validate.h"
2727

28+
#include "src/common/utils/profile/acl_profile.h"
2829
#include "src/cpu/operators/CpuPermute.h"
2930

3031
namespace arm_compute
@@ -44,6 +45,7 @@ NEPermute::~NEPermute() = default;
4445

4546
void NEPermute::configure(const ITensor *input, ITensor *output, const PermutationVector &perm)
4647
{
48+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEPermute::configure");
4749
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
4850

4951
_impl->src = input;
@@ -54,6 +56,7 @@ void NEPermute::configure(const ITensor *input, ITensor *output, const Permutati
5456

5557
Status NEPermute::validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
5658
{
59+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEPermute::validate");
5760
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
5861
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input, output);
5962
ARM_COMPUTE_RETURN_ON_ERROR(cpu::CpuPermute::validate(input, output, perm));
@@ -63,6 +66,7 @@ Status NEPermute::validate(const ITensorInfo *input, const ITensorInfo *output,
6366

6467
void NEPermute::run()
6568
{
69+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEPermute::run");
6670
ITensorPack pack;
6771
pack.add_tensor(TensorType::ACL_SRC, _impl->src);
6872
pack.add_tensor(TensorType::ACL_DST, _impl->dst);

0 commit comments

Comments
 (0)