diff --git a/cpp/modmesh/buffer/SimpleArray.cpp b/cpp/modmesh/buffer/SimpleArray.cpp index 3f7821f2..04e6eb04 100644 --- a/cpp/modmesh/buffer/SimpleArray.cpp +++ b/cpp/modmesh/buffer/SimpleArray.cpp @@ -52,20 +52,20 @@ static inline void copy_one(int8_t * dst, int8_t const * src) template static void tiled_2d_impl( - int8_t * const dst_body, int8_t const * const src_body, size_t const n0, size_t const n1, size_t const ss0, size_t const ss1, size_t const os0, size_t const os1) + int8_t * const dst_body, int8_t const * const src_body, ssize_t const n0, ssize_t const n1, ssize_t const ss0, ssize_t const ss1, ssize_t const os0, ssize_t const os1) { - constexpr size_t BLOCK = 32; - for (size_t i0 = 0; i0 < n0; i0 += BLOCK) + constexpr ssize_t BLOCK = 32; + for (ssize_t i0 = 0; i0 < n0; i0 += BLOCK) { - size_t const i_end = std::min(i0 + BLOCK, n0); - for (size_t j0 = 0; j0 < n1; j0 += BLOCK) + ssize_t const i_end = std::min(i0 + BLOCK, n0); + for (ssize_t j0 = 0; j0 < n1; j0 += BLOCK) { - size_t const j_end = std::min(j0 + BLOCK, n1); - for (size_t i = i0; i < i_end; ++i) + ssize_t const j_end = std::min(j0 + BLOCK, n1); + for (ssize_t i = i0; i < i_end; ++i) { int8_t const * src_row = src_body + i * ss0; int8_t * dst_row = dst_body + i * os0; - for (size_t j = j0; j < j_end; ++j) + for (ssize_t j = j0; j < j_end; ++j) { copy_one(dst_row + j * os1, src_row + j * ss1); } @@ -79,20 +79,20 @@ static void tiled_2d_impl( * itemsizes that are not in the specialized {1, 2, 4, 8, 16} set. */ static inline void tiled_2d_generic( - int8_t * const dst_body, int8_t const * const src_body, size_t const n0, size_t const n1, size_t const ss0, size_t const ss1, size_t const os0, size_t const os1, size_t const itemsize) + int8_t * const dst_body, int8_t const * const src_body, ssize_t const n0, ssize_t const n1, ssize_t const ss0, ssize_t const ss1, ssize_t const os0, ssize_t const os1, size_t const itemsize) { - constexpr size_t BLOCK = 32; - for (size_t i0 = 0; i0 < n0; i0 += BLOCK) + constexpr ssize_t BLOCK = 32; + for (ssize_t i0 = 0; i0 < n0; i0 += BLOCK) { - size_t const i_end = std::min(i0 + BLOCK, n0); - for (size_t j0 = 0; j0 < n1; j0 += BLOCK) + ssize_t const i_end = std::min(i0 + BLOCK, n0); + for (ssize_t j0 = 0; j0 < n1; j0 += BLOCK) { - size_t const j_end = std::min(j0 + BLOCK, n1); - for (size_t i = i0; i < i_end; ++i) + ssize_t const j_end = std::min(j0 + BLOCK, n1); + for (ssize_t i = i0; i < i_end; ++i) { int8_t const * src_row = src_body + i * ss0; int8_t * dst_row = dst_body + i * os0; - for (size_t j = j0; j < j_end; ++j) + for (ssize_t j = j0; j < j_end; ++j) { std::memcpy(dst_row + j * os1, src_row + j * ss1, itemsize); } @@ -103,20 +103,20 @@ static inline void tiled_2d_generic( template static void tiled_nd_inner( - int8_t * const dst_body, int8_t const * const src_body, size_t const n_a, size_t const n_b, size_t const ss_a, size_t const ss_b, size_t const os_a, size_t const os_b) + int8_t * const dst_body, int8_t const * const src_body, ssize_t const n_a, ssize_t const n_b, ssize_t const ss_a, ssize_t const ss_b, ssize_t const os_a, ssize_t const os_b) { - constexpr size_t BLOCK = 32; - for (size_t a0 = 0; a0 < n_a; a0 += BLOCK) + constexpr ssize_t BLOCK = 32; + for (ssize_t a0 = 0; a0 < n_a; a0 += BLOCK) { - size_t const a_end = std::min(a0 + BLOCK, n_a); - for (size_t b0 = 0; b0 < n_b; b0 += BLOCK) + ssize_t const a_end = std::min(a0 + BLOCK, n_a); + for (ssize_t b0 = 0; b0 < n_b; b0 += BLOCK) { - size_t const b_end = std::min(b0 + BLOCK, n_b); - for (size_t i = a0; i < a_end; ++i) + ssize_t const b_end = std::min(b0 + BLOCK, n_b); + for (ssize_t i = a0; i < a_end; ++i) { int8_t const * src_row = src_body + i * ss_a; int8_t * dst_row = dst_body + i * os_a; - for (size_t j = b0; j < b_end; ++j) + for (ssize_t j = b0; j < b_end; ++j) { copy_one(dst_row + j * os_b, src_row + j * ss_b); } @@ -126,20 +126,20 @@ static void tiled_nd_inner( } static inline void tiled_nd_inner_generic( - int8_t * const dst_body, int8_t const * const src_body, size_t const n_a, size_t const n_b, size_t const ss_a, size_t const ss_b, size_t const os_a, size_t const os_b, size_t const itemsize) + int8_t * const dst_body, int8_t const * const src_body, ssize_t const n_a, ssize_t const n_b, ssize_t const ss_a, ssize_t const ss_b, ssize_t const os_a, ssize_t const os_b, size_t const itemsize) { - constexpr size_t BLOCK = 32; - for (size_t a0 = 0; a0 < n_a; a0 += BLOCK) + constexpr ssize_t BLOCK = 32; + for (ssize_t a0 = 0; a0 < n_a; a0 += BLOCK) { - size_t const a_end = std::min(a0 + BLOCK, n_a); - for (size_t b0 = 0; b0 < n_b; b0 += BLOCK) + ssize_t const a_end = std::min(a0 + BLOCK, n_a); + for (ssize_t b0 = 0; b0 < n_b; b0 += BLOCK) { - size_t const b_end = std::min(b0 + BLOCK, n_b); - for (size_t i = a0; i < a_end; ++i) + ssize_t const b_end = std::min(b0 + BLOCK, n_b); + for (ssize_t i = a0; i < a_end; ++i) { int8_t const * src_row = src_body + i * ss_a; int8_t * dst_row = dst_body + i * os_a; - for (size_t j = b0; j < b_end; ++j) + for (ssize_t j = b0; j < b_end; ++j) { std::memcpy(dst_row + j * os_b, src_row + j * ss_b, itemsize); } @@ -153,7 +153,7 @@ static inline void tiled_nd_inner_generic( * dtypes; everything else falls through to the memcpy version. */ static inline void dispatch_tile_inner( - int8_t * const dst_body, int8_t const * const src_body, size_t const n_a, size_t const n_b, size_t const ss_a, size_t const ss_b, size_t const os_a, size_t const os_b, size_t const itemsize) + int8_t * const dst_body, int8_t const * const src_body, ssize_t const n_a, ssize_t const n_b, ssize_t const ss_a, ssize_t const ss_b, ssize_t const os_a, ssize_t const os_b, size_t const itemsize) { switch (itemsize) { @@ -181,11 +181,11 @@ static inline void dispatch_tile_inner( */ SimpleArrayCopier::SimpleArrayCopier( buffer_type const & src_buffer, - size_t const src_body_offset, - shape_type const & src_stride, + ssize_t const src_body_offset, + sshape_type const & src_stride, buffer_type & dst_buffer, - size_t const dst_body_offset, - shape_type const & dst_stride, + ssize_t const dst_body_offset, + sshape_type const & dst_stride, shape_type const & shape, size_t const itemsize) : m_src(src_buffer.data() + src_body_offset) @@ -216,14 +216,15 @@ void SimpleArrayCopier::memcpy() const */ void SimpleArrayCopier::tiled_2d() const { - size_t const n0 = m_shape[0]; - size_t const n1 = m_shape[1]; + auto const n0 = static_cast(m_shape[0]); + auto const n1 = static_cast(m_shape[1]); // Element strides scaled to byte strides once; the inner loop uses byte // arithmetic throughout. - size_t const ss0 = m_src_stride[0] * m_itemsize; - size_t const ss1 = m_src_stride[1] * m_itemsize; - size_t const os0 = m_dst_stride[0] * m_itemsize; - size_t const os1 = m_dst_stride[1] * m_itemsize; + auto const itemsize = static_cast(m_itemsize); + ssize_t const ss0 = m_src_stride[0] * itemsize; + ssize_t const ss1 = m_src_stride[1] * itemsize; + ssize_t const os0 = m_dst_stride[0] * itemsize; + ssize_t const os1 = m_dst_stride[1] * itemsize; switch (m_itemsize) { case 1: tiled_2d_impl<1>(m_dst, m_src, n0, n1, ss0, ss1, os0, os1); break; @@ -242,15 +243,15 @@ void SimpleArrayCopier::tiled_2d() const void SimpleArrayCopier::tiled_nd() const { size_t const ndim = m_shape.size(); - size_t const itemsize = m_itemsize; + auto const itemsize = static_cast(m_itemsize); if (ndim == 1) { - size_t const n = m_shape[0]; - size_t const ss = m_src_stride[0] * itemsize; - size_t const os = m_dst_stride[0] * itemsize; - for (size_t i = 0; i < n; ++i) + auto const n = static_cast(m_shape[0]); + ssize_t const ss = m_src_stride[0] * itemsize; + ssize_t const os = m_dst_stride[0] * itemsize; + for (ssize_t i = 0; i < n; ++i) { - std::memcpy(m_dst + i * os, m_src + i * ss, itemsize); + std::memcpy(m_dst + i * os, m_src + i * ss, m_itemsize); } return; } @@ -258,12 +259,12 @@ void SimpleArrayCopier::tiled_nd() const // See tiled_2d for the rationale behind the block size. size_t const ia = ndim - 2; size_t const ib = ndim - 1; - size_t const n_a = m_shape[ia]; - size_t const n_b = m_shape[ib]; - size_t const ss_a = m_src_stride[ia] * itemsize; - size_t const ss_b = m_src_stride[ib] * itemsize; - size_t const os_a = m_dst_stride[ia] * itemsize; - size_t const os_b = m_dst_stride[ib] * itemsize; + auto const n_a = static_cast(m_shape[ia]); + auto const n_b = static_cast(m_shape[ib]); + ssize_t const ss_a = m_src_stride[ia] * itemsize; + ssize_t const ss_b = m_src_stride[ib] * itemsize; + ssize_t const os_a = m_dst_stride[ia] * itemsize; + ssize_t const os_b = m_dst_stride[ib] * itemsize; size_t outer_total = 1; for (size_t k = 0; k < ia; ++k) @@ -271,23 +272,23 @@ void SimpleArrayCopier::tiled_nd() const outer_total *= m_shape[k]; } - shape_type outer_idx(ia, 0); + detail::sshape_type outer_idx(ia, 0); for (size_t step = 0; step < outer_total; ++step) { // Resolve outer-axis base offsets (in bytes) for this slab. - size_t src_base = 0; - size_t dst_base = 0; + ssize_t src_base = 0; + ssize_t dst_base = 0; for (size_t k = 0; k < ia; ++k) { src_base += m_src_stride[k] * outer_idx[k] * itemsize; dst_base += m_dst_stride[k] * outer_idx[k] * itemsize; } dispatch_tile_inner( - m_dst + dst_base, m_src + src_base, n_a, n_b, ss_a, ss_b, os_a, os_b, itemsize); + m_dst + dst_base, m_src + src_base, n_a, n_b, ss_a, ss_b, os_a, os_b, m_itemsize); // Carry-propagating increment of the outer index. for (size_t i = ia; i-- > 0;) { - if (++outer_idx[i] < m_shape[i]) + if (++outer_idx[i] < static_cast(m_shape[i])) { break; } @@ -319,22 +320,26 @@ void SimpleArrayCopier::naive() const } size_t const ndim = m_shape.size(); size_t const itemsize = m_itemsize; - shape_type idx(ndim, 0); + auto const signed_itemsize = static_cast(itemsize); + detail::sshape_type idx(ndim, 0); for (size_t step = 0; step < total; ++step) { - size_t src_off = 0; - size_t dst_off = 0; + ssize_t src_off = 0; + ssize_t dst_off = 0; for (size_t k = 0; k < ndim; ++k) { src_off += m_src_stride[k] * idx[k]; dst_off += m_dst_stride[k] * idx[k]; } - std::memcpy(m_dst + dst_off * itemsize, m_src + src_off * itemsize, itemsize); + std::memcpy( + m_dst + dst_off * signed_itemsize, + m_src + src_off * signed_itemsize, + itemsize); // Carry-propagating increment: bump the trailing axis; on overflow, // wrap to 0 and carry into the next-most-significant axis. for (size_t i = ndim; i-- > 0;) { - if (++idx[i] < m_shape[i]) + if (++idx[i] < static_cast(m_shape[i])) { break; } @@ -974,6 +979,33 @@ std::string format_shape(shape_type const & shape) return ret; } +std::string format_flat_index(shape_type const & shape, size_t offset) +{ + if (shape.empty()) + { + return "[]"; + } + + shape_type row_major_stride(shape.size(), 1); + for (size_t it = shape.size() - 1; it > 0; --it) + { + row_major_stride[it - 1] = row_major_stride[it] * shape[it]; + } + + std::string ret = "["; + for (size_t it = 0; it < shape.size(); ++it) + { + if (it != 0) + { + ret += ", "; + } + ret += std::to_string(offset / row_major_stride[it]); + offset %= row_major_stride[it]; + } + ret += "]"; + return ret; +} + } /* end namespace detail */ } /* end namespace modmesh */ diff --git a/cpp/modmesh/buffer/SimpleArray.hpp b/cpp/modmesh/buffer/SimpleArray.hpp index 575e4e74..eb0c136b 100644 --- a/cpp/modmesh/buffer/SimpleArray.hpp +++ b/cpp/modmesh/buffer/SimpleArray.hpp @@ -88,32 +88,32 @@ namespace detail { template -size_t buffer_offset_impl(S const &) +ssize_t buffer_offset_impl(S const &) { return 0; } template -size_t buffer_offset_impl(S const & strides, Arg arg, Args... args) +ssize_t buffer_offset_impl(S const & strides, Arg arg, Args... args) { - return arg * strides[D] + buffer_offset_impl(strides, args...); + return static_cast(arg) * strides[D] + buffer_offset_impl(strides, args...); } } /* end namespace detail */ template -size_t buffer_offset(S const & strides, Args... args) +ssize_t buffer_offset(S const & strides, Args... args) { return detail::buffer_offset_impl<0>(strides, args...); } -inline size_t buffer_offset(small_vector const & stride, small_vector const & idx) +inline ssize_t buffer_offset(small_vector const & stride, small_vector const & idx) { if (stride.size() != idx.size()) { throw std::out_of_range(std::format("stride size {} != index size {}", stride.size(), idx.size())); } - size_t offset = 0; + ssize_t offset = 0; for (size_t it = 0; it < stride.size(); ++it) { offset += stride[it] * idx[it]; @@ -126,7 +126,6 @@ namespace detail using shape_type = small_vector; using sshape_type = small_vector; -using slice_type = small_vector; /** * \brief Enumerate SimpleArray indices with nghost included. @@ -214,6 +213,7 @@ class IndexRange }; std::string format_shape(shape_type const & shape); +std::string format_flat_index(shape_type const & shape, size_t offset); template struct SimpleArrayInternalTypes @@ -268,6 +268,8 @@ class SimpleArrayMixinSum public: using value_type = typename internal_types::value_type; + using shape_type = typename internal_types::shape_type; + using sshape_type = typename internal_types::sshape_type; value_type sum() const { @@ -328,24 +330,24 @@ class SimpleArrayMixinSum // This avoids the per-element multi-dimensional index arithmetic that // at(sidx) performs. static value_type sum_strided(value_type const * data, - small_vector const & shape, - small_vector const & stride) + shape_type const & shape, + sshape_type const & stride) { const size_t ndim = shape.size(); - const size_t last_dim = shape[ndim - 1]; - const size_t last_stride = stride[ndim - 1]; + auto const last_dim = static_cast(shape[ndim - 1]); + const ssize_t last_stride = stride[ndim - 1]; value_type acc = zero(); - small_vector prefix(ndim - 1, 0); + sshape_type prefix(ndim - 1, 0); do { - size_t offset = 0; + ssize_t offset = 0; for (size_t i = 0; i + 1 < ndim; ++i) { offset += prefix[i] * stride[i]; } value_type const * row = data + offset; - for (size_t j = 0; j < last_dim; ++j) + for (ssize_t j = 0; j < last_dim; ++j) { accumulate(acc, row[j * last_stride]); } @@ -353,12 +355,12 @@ class SimpleArrayMixinSum return acc; } - static bool next_prefix(small_vector & idx, - small_vector const & shape) + static bool next_prefix(sshape_type & idx, + shape_type const & shape) { for (size_t i = idx.size(); i > 0; --i) { - if (++idx[i - 1] < shape[i - 1]) + if (++idx[i - 1] < static_cast(shape[i - 1])) { return true; } @@ -1611,36 +1613,36 @@ class SimpleArray } // NOLINTNEXTLINE(modernize-pass-by-value) - explicit SimpleArray(small_vector const & shape) + explicit SimpleArray(shape_type const & shape) : m_shape(shape) , m_stride(calc_stride(m_shape)) { if (!m_shape.empty()) { - m_buffer = buffer_type::construct(m_shape[0] * m_stride[0] * ITEMSIZE, 0); + m_buffer = buffer_type::construct(m_shape[0] * static_cast(m_stride[0]) * ITEMSIZE, 0); m_body = m_buffer->template data(); } } // NOLINTNEXTLINE(modernize-pass-by-value) - SimpleArray(small_vector const & shape, size_t alignment, with_alignment_t const & /* unnamed argument for tagging */) + SimpleArray(shape_type const & shape, size_t alignment, with_alignment_t const & /* unnamed argument for tagging */) : m_shape(shape) , m_stride(calc_stride(m_shape)) { if (!m_shape.empty()) { - m_buffer = buffer_type::construct(m_shape[0] * m_stride[0] * ITEMSIZE, alignment); + m_buffer = buffer_type::construct(m_shape[0] * static_cast(m_stride[0]) * ITEMSIZE, alignment); m_body = m_buffer->template data(); } } - SimpleArray(small_vector const & shape, value_type const & value, size_t alignment) + SimpleArray(shape_type const & shape, value_type const & value, size_t alignment) : SimpleArray(shape, alignment, with_alignment_t{}) { std::fill(begin(), end(), value); } - SimpleArray(small_vector const & shape, value_type const & value) + SimpleArray(shape_type const & shape, value_type const & value) : SimpleArray(shape) { std::fill(begin(), end(), value); @@ -1652,7 +1654,7 @@ class SimpleArray { if (!m_shape.empty()) { - m_buffer = buffer_type::construct(m_shape[0] * m_stride[0] * ITEMSIZE, 0); + m_buffer = buffer_type::construct(m_shape[0] * static_cast(m_stride[0]) * ITEMSIZE, 0); m_body = m_buffer->template data(); } } @@ -1663,7 +1665,7 @@ class SimpleArray { if (!m_shape.empty()) { - m_buffer = buffer_type::construct(m_shape[0] * m_stride[0] * ITEMSIZE, alignment); + m_buffer = buffer_type::construct(m_shape[0] * static_cast(m_stride[0]) * ITEMSIZE, alignment); m_body = m_buffer->template data(); } } @@ -1690,7 +1692,7 @@ class SimpleArray throw std::runtime_error("SimpleArray: input buffer size must be divisible"); } m_shape = shape_type{nitem}; - m_stride = shape_type{1}; + m_stride = sshape_type{1}; m_buffer = buffer; m_body = m_buffer->template data(); } @@ -1700,14 +1702,14 @@ class SimpleArray } } - explicit SimpleArray(small_vector const & shape, std::shared_ptr const & buffer) + explicit SimpleArray(shape_type const & shape, std::shared_ptr const & buffer) : SimpleArray(buffer) { if (buffer) { m_shape = shape; m_stride = calc_stride(m_shape); - const size_t nbytes = m_shape[0] * m_stride[0] * ITEMSIZE; + const size_t nbytes = m_shape[0] * static_cast(m_stride[0]) * ITEMSIZE; if (nbytes != buffer->nbytes()) { throw std::runtime_error( @@ -1718,8 +1720,8 @@ class SimpleArray } } - explicit SimpleArray(small_vector const & shape, - small_vector const & stride, + explicit SimpleArray(shape_type const & shape, + sshape_type const & stride, std::shared_ptr const & buffer, bool c_contiguous, bool f_contiguous) @@ -1739,8 +1741,8 @@ class SimpleArray } } - explicit SimpleArray(small_vector const & shape, - small_vector const & stride, + explicit SimpleArray(shape_type const & shape, + sshape_type const & stride, std::shared_ptr const & buffer) : SimpleArray(buffer) { @@ -1815,21 +1817,21 @@ class SimpleArray return *this; } - static shape_type calc_stride(shape_type const & shape) + static sshape_type calc_stride(shape_type const & shape) { - shape_type stride(shape.size()); + sshape_type stride(shape.size()); if (!shape.empty()) { stride[shape.size() - 1] = 1; for (size_t it = shape.size() - 1; it > 0; --it) { - stride[it - 1] = stride[it] * shape[it]; + stride[it - 1] = stride[it] * static_cast(shape[it]); } } return stride; } - static T * calc_body(T * data, shape_type const & stride, size_t nghost) + static T * calc_body(T * data, sshape_type const & stride, size_t nghost) { if (nullptr == data || stride.empty() || 0 == nghost) { @@ -1837,9 +1839,9 @@ class SimpleArray } else { - shape_type shape(stride.size(), 0); - shape[0] = nghost; - data += buffer_offset(stride, shape); + sshape_type idx(stride.size(), 0); + idx[0] = static_cast(nghost); + data += buffer_offset(stride, idx); } return data; } @@ -1881,13 +1883,15 @@ class SimpleArray value_type const & at(ssize_t it) const { - shape_type const idx{normalize_index(it)}; - return data(buffer_offset(m_stride, idx)); + sshape_type const idx{normalize_index(it)}; + ssize_t const offset = buffer_offset(m_stride, idx); + return *(data() + offset); } value_type & at(ssize_t it) { - shape_type const idx{normalize_index(it)}; - return data(buffer_offset(m_stride, idx)); + sshape_type const idx{normalize_index(it)}; + ssize_t const offset = buffer_offset(m_stride, idx); + return *(data() + offset); } value_type const & at(std::vector const & idx) const { return at(sshape_type(idx)); } @@ -1895,24 +1899,24 @@ class SimpleArray value_type const & at(sshape_type const & sidx) const { - shape_type const idx = normalize_index(sidx); - const size_t offset = buffer_offset(m_stride, idx); - return data(offset); + sshape_type const idx = normalize_index(sidx); + ssize_t const offset = buffer_offset(m_stride, idx); + return *(data() + offset); } value_type & at(sshape_type const & sidx) { - shape_type const idx = normalize_index(sidx); - const size_t offset = buffer_offset(m_stride, idx); - return data(offset); + sshape_type const idx = normalize_index(sidx); + ssize_t const offset = buffer_offset(m_stride, idx); + return *(data() + offset); } size_t ndim() const noexcept { return m_shape.size(); } shape_type const & shape() const { return m_shape; } size_t shape(size_t it) const noexcept { return m_shape[it]; } size_t & shape(size_t it) noexcept { return m_shape[it]; } - shape_type const & stride() const { return m_stride; } - size_t stride(size_t it) const noexcept { return m_stride[it]; } - size_t & stride(size_t it) noexcept { return m_stride[it]; } + sshape_type const & stride() const { return m_stride; } + ssize_t stride(size_t it) const noexcept { return m_stride[it]; } + ssize_t & stride(size_t it) noexcept { return m_stride[it]; } size_t nghost() const { return m_nghost; } size_t nbody() const { return m_shape.empty() ? 0 : m_shape[0] - m_nghost; } @@ -2044,7 +2048,14 @@ class SimpleArray std::layout_stride::mapping> make_mdspan_mapping_impl(std::index_sequence) const { std::array strides; - for (size_t i = 0; i < N; ++i) { strides[i] = stride(i); } + for (size_t i = 0; i < N; ++i) + { + if (stride(i) < 0) + { + throw std::runtime_error("SimpleArray::as_mdspan: negative stride is not supported"); + } + strides[i] = static_cast(stride(i)); + } return std::layout_stride::mapping>( std::dextents(shape(I)...), strides); } @@ -2055,8 +2066,8 @@ class SimpleArray return make_mdspan_mapping_impl(std::make_index_sequence{}); } - static bool is_c_contiguous(small_vector const & shape, - small_vector const & stride) + static bool is_c_contiguous(shape_type const & shape, + sshape_type const & stride) { if (stride[stride.size() - 1] != 1) { @@ -2064,7 +2075,7 @@ class SimpleArray } for (size_t it = 0; it < shape.size() - 1; ++it) { - if (stride[it] != shape[it + 1] * stride[it + 1]) + if (stride[it] != static_cast(shape[it + 1]) * stride[it + 1]) { return false; } @@ -2072,8 +2083,8 @@ class SimpleArray return true; } - static bool is_f_contiguous(small_vector const & shape, - small_vector const & stride) + static bool is_f_contiguous(shape_type const & shape, + sshape_type const & stride) { if (stride[0] != 1) { @@ -2081,7 +2092,7 @@ class SimpleArray } for (size_t it = 0; it < shape.size() - 1; ++it) { - if (stride[it + 1] != shape[it] * stride[it]) + if (stride[it + 1] != static_cast(shape[it]) * stride[it]) { return false; } @@ -2089,8 +2100,8 @@ class SimpleArray return true; } - static void check_c_contiguous(small_vector const & shape, - small_vector const & stride) + static void check_c_contiguous(shape_type const & shape, + sshape_type const & stride) { if (!is_c_contiguous(shape, stride)) { @@ -2098,8 +2109,8 @@ class SimpleArray } } - void check_f_contiguous(small_vector const & shape, - small_vector const & stride) const + void check_f_contiguous(shape_type const & shape, + sshape_type const & stride) const { if (!is_f_contiguous(shape, stride)) { @@ -2107,7 +2118,7 @@ class SimpleArray } } - size_t normalize_index(ssize_t it) const + ssize_t normalize_index(ssize_t it) const { if (ndim() != 1) { @@ -2145,7 +2156,7 @@ class SimpleArray return to_nonnegative_index(shifted_index, dim_length); } - shape_type normalize_index(small_vector const & idx) const + sshape_type normalize_index(small_vector const & idx) const { auto index2string = [&idx]() -> std::string { @@ -2169,7 +2180,7 @@ class SimpleArray // Test for the "index shape". if (idx.empty()) { - throw std::out_of_range("SimpleArray::validate_shape(): empty index"); + throw std::out_of_range("SimpleArray::normalize_index(): empty index"); } if (idx.size() != m_shape.size()) { @@ -2179,7 +2190,7 @@ class SimpleArray m_shape.size())); } - shape_type normalized(idx.size()); + sshape_type normalized(idx.size()); for (size_t dim = 0; dim < m_shape.size(); ++dim) { auto const dim_length = static_cast(m_shape[dim]); @@ -2225,13 +2236,13 @@ class SimpleArray return normalized; } - static size_t to_nonnegative_index(ssize_t index, ssize_t length) + static ssize_t to_nonnegative_index(ssize_t index, ssize_t length) { if (index < 0) { index += length; } - return static_cast(index); + return index; } /// Contiguous data buffer for the array. @@ -2241,7 +2252,7 @@ class SimpleArray shape_type m_shape; /// Each element in this vector is the number of elements (not number of /// bytes) to skip for advancing an index in the corresponding dimension. - shape_type m_stride; + sshape_type m_stride; size_t m_nghost = 0; value_type * m_body = nullptr; @@ -2307,15 +2318,16 @@ class SimpleArrayCopier public: using shape_type = small_vector; + using sshape_type = detail::sshape_type; using buffer_type = ConcreteBuffer; SimpleArrayCopier( buffer_type const & src_buffer, - size_t src_body_offset, - shape_type const & src_stride, + ssize_t src_body_offset, + sshape_type const & src_stride, buffer_type & dst_buffer, - size_t dst_body_offset, - shape_type const & dst_stride, + ssize_t dst_body_offset, + sshape_type const & dst_stride, shape_type const & shape, size_t itemsize); @@ -2336,8 +2348,8 @@ class SimpleArrayCopier int8_t const * m_src; int8_t * m_dst; shape_type const & m_shape; - shape_type const & m_src_stride; - shape_type const & m_dst_stride; + sshape_type const & m_src_stride; + sshape_type const & m_dst_stride; size_t m_itemsize; }; /* end class SimpleArrayCopier */ @@ -2384,7 +2396,7 @@ void SimpleArray::transpose(shape_type const & axis, bool copy) throw std::runtime_error("SimpleArray::transpose: axis size mismatch"); } shape_type new_shape(m_shape.size(), -1); - shape_type new_stride(m_stride.size()); + sshape_type new_stride(m_stride.size()); for (size_t it = 0; it < m_shape.size(); ++it) { if (axis[it] >= m_shape.size() || axis[it] < 0) @@ -2488,7 +2500,7 @@ SimpleArray SimpleArray::to_column_major() const } // Compute column-major strides: the fastest-varying axis is the leading // one (stride[0] == 1). - shape_type fstride(m_shape.size()); + sshape_type fstride(m_shape.size()); fstride[0] = 1; for (size_t i = 1; i < m_shape.size(); ++i) { @@ -2535,8 +2547,8 @@ void SimpleArray::copy_logical_into(SimpleArray & out) const } // Subtract on T* so no reinterpret_cast is needed; ITEMSIZE then maps // the element offset to the byte offset the helper expects. - auto const src_body_offset = static_cast(m_body - m_buffer->template data()) * ITEMSIZE; - auto const dst_body_offset = static_cast(out.m_body - out.m_buffer->template data()) * ITEMSIZE; + auto const src_body_offset = static_cast(m_body - m_buffer->template data()) * static_cast(ITEMSIZE); + auto const dst_body_offset = static_cast(out.m_body - out.m_buffer->template data()) * static_cast(ITEMSIZE); SimpleArrayCopier const copier( *m_buffer, src_body_offset, @@ -2612,15 +2624,7 @@ A detail::SimpleArrayMixinSort::take_along_axis(SimpleArray const & ind if (*src < 0 || *src > max_idx) { size_t offset = src - indices.begin(); - shape_type const & stride = indices.stride(); - std::string indices_str = "[" + std::to_string(offset / stride[0]); - offset %= stride[0]; - for (size_t dim = 1; dim < stride.size(); ++dim) - { - indices_str += ", " + std::to_string(offset / stride[dim]); - offset %= stride[dim]; - } - indices_str += "]"; + std::string const indices_str = format_flat_index(indices.shape(), offset); throw std::out_of_range( std::format("SimpleArray::take_along_axis(): " @@ -2697,16 +2701,7 @@ A detail::SimpleArrayMixinSort::take_along_axis_simd(SimpleArray const if (oor_ptr != nullptr) { size_t offset = oor_ptr - indices.begin(); - shape_type const & stride = indices.stride(); - const size_t ndim = stride.size(); - std::string indices_str = "[" + std::to_string(offset / stride[0]); - offset %= stride[0]; - for (size_t dim = 1; dim < ndim; ++dim) - { - indices_str += ", " + std::to_string(offset / stride[dim]); - offset %= stride[dim]; - } - indices_str += "]"; + std::string const indices_str = format_flat_index(indices.shape(), offset); const auto err = std::format("SimpleArray::take_along_axis_simd(): " "indices{} is {}, which is out of range of the array size {}", diff --git a/cpp/modmesh/buffer/pymod/TypeBroadcast.hpp b/cpp/modmesh/buffer/pymod/TypeBroadcast.hpp index 4877e2da..7d5309a7 100644 --- a/cpp/modmesh/buffer/pymod/TypeBroadcast.hpp +++ b/cpp/modmesh/buffer/pymod/TypeBroadcast.hpp @@ -42,11 +42,16 @@ namespace python template struct TypeBroadcastImpl { - using slice_type = modmesh::detail::slice_type; using shape_type = modmesh::detail::shape_type; + using sshape_type = modmesh::detail::sshape_type; // NOLINTNEXTLINE(misc-no-recursion) - static void copy_idx(SimpleArray & arr_out, std::vector const & slices, pybind11::array_t const * arr_in, shape_type left_shape, shape_type sidx, int dim) + static void copy_idx(SimpleArray & arr_out, + std::vector const & slices, + pybind11::array_t const * arr_in, + shape_type const & left_shape, + sshape_type sidx, + ssize_t dim) { using out_type = typename std::remove_reference_t; @@ -55,21 +60,25 @@ struct TypeBroadcastImpl return; } - for (size_t i = 0; i < left_shape[dim]; ++i) + auto const axis = static_cast(dim); + auto const length = static_cast(left_shape[axis]); + for (ssize_t i = 0; i < length; ++i) { - sidx[dim] = i; + sidx[axis] = i; - size_t offset_in = 0; - for (pybind11::ssize_t it = 0; it < arr_in->ndim(); ++it) + ssize_t offset_in = 0; + pybind11::ssize_t const ndim_in = arr_in->ndim(); + for (pybind11::ssize_t py_axis = 0; py_axis < ndim_in; ++py_axis) { - offset_in += arr_in->strides(it) / arr_in->itemsize() * sidx[it]; + auto const axis_in = static_cast(py_axis); + offset_in += arr_in->strides(py_axis) / arr_in->itemsize() * sidx[axis_in]; } const D * ptr_in = arr_in->data() + offset_in; - size_t offset_out = 0; + ssize_t offset_out = 0; for (size_t it = 0; it < arr_out.ndim(); ++it) { - auto step = slices[it][2]; + ssize_t const step = slices[it][2]; offset_out += arr_out.stride(it) * sidx[it] * step; } @@ -77,8 +86,9 @@ struct TypeBroadcastImpl if constexpr (valid_conversion) { + auto * ptr_out = arr_out.data() + offset_out; // FIXME: NOLINTNEXTLINE(bugprone-signed-char-misuse,cert-str34-c) - arr_out.data(offset_out) = static_cast(*ptr_in); + *ptr_out = static_cast(*ptr_in); } else { @@ -90,80 +100,81 @@ struct TypeBroadcastImpl } } - static void broadcast(SimpleArray & arr_out, std::vector const & slices, pybind11::array const & arr_in) + static void broadcast(SimpleArray & arr_out, + std::vector const & slices, + pybind11::array const & arr_in) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) auto * arr_new = reinterpret_cast const *>(&arr_in); shape_type left_shape(arr_out.ndim()); - for (size_t i = 0; i < arr_out.ndim(); i++) + for (size_t i = 0; i < arr_out.ndim(); ++i) { - slice_type const & slice = slices[i]; + sshape_type const & slice = slices[i]; if ((slice[1] - slice[0]) % slice[2] == 0) { - left_shape[i] = (slice[1] - slice[0]) / slice[2]; + left_shape[i] = static_cast((slice[1] - slice[0]) / slice[2]); } else { - left_shape[i] = (slice[1] - slice[0]) / slice[2] + 1; + left_shape[i] = static_cast((slice[1] - slice[0]) / slice[2] + 1); } } - shape_type sidx_init(arr_out.ndim()); + sshape_type sidx_init(arr_out.ndim(), 0); - for (size_t i = 0; i < arr_out.ndim(); ++i) - { - sidx_init[i] = 0; - } - - copy_idx(arr_out, slices, arr_new, left_shape, sidx_init, static_cast(arr_out.ndim()) - 1); + copy_idx(arr_out, slices, arr_new, left_shape, sidx_init, static_cast(arr_out.ndim()) - 1); } }; /* end struct TypeBroadcastImpl */ template struct TypeBroadcast { - using slice_type = modmesh::detail::slice_type; using shape_type = modmesh::detail::shape_type; + using sshape_type = modmesh::detail::sshape_type; - static void check_shape(SimpleArray const & arr_out, std::vector const & slices, pybind11::array const & arr_in) + static void check_shape(SimpleArray const & arr_out, + std::vector const & slices, + pybind11::array const & arr_in) { - shape_type right_shape(arr_in.ndim()); - for (pybind11::ssize_t i = 0; i < arr_in.ndim(); i++) + shape_type right_shape(static_cast(arr_in.ndim())); + for (size_t i = 0; i < right_shape.size(); ++i) { - right_shape[i] = arr_in.shape(i); + right_shape[i] = static_cast(arr_in.shape(static_cast(i))); } shape_type left_shape(arr_out.ndim()); // TODO: range check - for (size_t i = 0; i < arr_out.ndim(); i++) + for (size_t i = 0; i < arr_out.ndim(); ++i) { - const slice_type & slice = slices[i]; + sshape_type const & slice = slices[i]; if ((slice[1] - slice[0]) % slice[2] == 0) { - left_shape[i] = (slice[1] - slice[0]) / slice[2]; + left_shape[i] = static_cast((slice[1] - slice[0]) / slice[2]); } else { - left_shape[i] = (slice[1] - slice[0]) / slice[2] + 1; + left_shape[i] = static_cast((slice[1] - slice[0]) / slice[2] + 1); } } - if (arr_out.ndim() != static_cast(arr_in.ndim())) + if (static_cast(arr_out.ndim()) != arr_in.ndim()) { throw_shape_error(left_shape, right_shape); } for (size_t i = 0; i < left_shape.size(); ++i) { - if (left_shape[i] != static_cast(right_shape[i])) + if (left_shape[i] != right_shape[i]) { throw_shape_error(left_shape, right_shape); } } } - static void broadcast(SimpleArray & arr_out, std::vector const & slices, pybind11::array const & arr_in) + static void broadcast(SimpleArray & arr_out, + std::vector const & slices, + pybind11::array const & arr_in) { if (dtype_is_type(arr_in)) { diff --git a/cpp/modmesh/buffer/pymod/array_common.hpp b/cpp/modmesh/buffer/pymod/array_common.hpp index f18c7aa8..b65412cf 100644 --- a/cpp/modmesh/buffer/pymod/array_common.hpp +++ b/cpp/modmesh/buffer/pymod/array_common.hpp @@ -183,7 +183,7 @@ class ArrayPropertyHelper { public: using shape_type = modmesh::detail::shape_type; - using slice_type = modmesh::detail::slice_type; + using sshape_type = modmesh::detail::sshape_type; static void broadcast_array_using_ellipsis(SimpleArray & arr_out, pybind11::array const & arr_in) { @@ -273,10 +273,18 @@ class ArrayPropertyHelper static pybind11::buffer_info get_buffer_info(SimpleArray & array) { - std::vector stride; - for (size_t const i : array.stride()) + std::vector shape; + shape.reserve(array.shape().size()); + for (size_t const i : array.shape()) { - stride.push_back(i * sizeof(T)); + shape.push_back(static_cast(i)); + } + + std::vector stride; + auto const itemsize = static_cast(sizeof(T)); + for (ssize_t const i : array.stride()) + { + stride.push_back(static_cast(i) * itemsize); } // Special handling for Complex types @@ -302,7 +310,7 @@ class ArrayPropertyHelper sizeof(T), /* Size of one scalar */ format, /* Python struct-style format descriptor */ array.ndim(), /* Number of dimensions */ - std::vector(array.shape().begin(), array.shape().end()), /* Buffer dimensions */ + shape, /* Buffer dimensions */ stride /* Strides (in bytes) for each index */ ); } @@ -360,22 +368,22 @@ class ArrayPropertyHelper } } - static std::vector make_default_slices(SimpleArray const & arr) + static std::vector make_default_slices(SimpleArray const & arr) { - std::vector slices; + std::vector slices; slices.reserve(arr.ndim()); for (size_t i = 0; i < arr.ndim(); ++i) { - slice_type default_slice(3); + sshape_type default_slice(3); default_slice[0] = 0; // start - default_slice[1] = static_cast(arr.shape(i)); // stop + default_slice[1] = static_cast(arr.shape(i)); // stop default_slice[2] = 1; // step slices.push_back(std::move(default_slice)); } return slices; } - static void copy_slice(slice_type & slice_out, pybind11::slice const & slice_in) + static void copy_slice(sshape_type & slice_out, pybind11::slice const & slice_in) { auto start = std::string(pybind11::str(slice_in.attr("start"))); auto stop = std::string(pybind11::str(slice_in.attr("stop"))); @@ -421,7 +429,7 @@ class ArrayPropertyHelper } static void process_slices(pybind11::tuple const & tuple, - std::vector & slices, + std::vector & slices, size_t ndim) { namespace py = pybind11; @@ -463,7 +471,7 @@ class ArrayPropertyHelper } static void broadcast_array_using_slice(SimpleArray & arr_out, - std::vector const & slices, + std::vector const & slices, pybind11::array const & arr_in) { TypeBroadcast::check_shape(arr_out, slices, arr_in); diff --git a/cpp/modmesh/buffer/pymod/wrap_SimpleArray.cpp b/cpp/modmesh/buffer/pymod/wrap_SimpleArray.cpp index b61c29ef..ed465548 100644 --- a/cpp/modmesh/buffer/pymod/wrap_SimpleArray.cpp +++ b/cpp/modmesh/buffer/pymod/wrap_SimpleArray.cpp @@ -89,12 +89,12 @@ class MODMESH_PYTHON_WRAPPER_VISIBILITY WrapSimpleArray } modmesh::detail::shape_type shape; - modmesh::detail::shape_type stride; - constexpr size_t itemsize = wrapped_type::itemsize(); + modmesh::detail::sshape_type stride; + constexpr auto itemsize = static_cast(wrapped_type::itemsize()); constexpr size_t span = 0; for (ssize_t i = 0; i < arr_in.ndim(); ++i) { - shape.push_back(arr_in.shape(i)); + shape.push_back(static_cast(arr_in.shape(i))); stride.push_back(arr_in.strides(i) / itemsize); } diff --git a/gtests/test_nopython_mdspan.cpp b/gtests/test_nopython_mdspan.cpp index efc2faa0..73ec11a2 100644 --- a/gtests/test_nopython_mdspan.cpp +++ b/gtests/test_nopython_mdspan.cpp @@ -408,7 +408,7 @@ TEST(SimpleArray, mdspan_non_contiguous) // Build a 3x4 view whose stride differs from the row-major layout, so the // array is neither C- nor F-contiguous over the underlying buffer. mm::small_vector shape{3, 4}; - mm::small_vector stride{8, 1}; + mm::SimpleArray::sshape_type stride{8, 1}; auto buffer = mm::ConcreteBuffer::construct(3 * 8 * sizeof(double)); mm::SimpleArray arr(shape, stride, buffer); for (size_t i = 0; i < 24; ++i) { arr.data(i) = static_cast(i); } diff --git a/tests/test_buffer.py b/tests/test_buffer.py index 49531eb5..9aa48776 100644 --- a/tests/test_buffer.py +++ b/tests/test_buffer.py @@ -304,6 +304,28 @@ def test_SimpleArray(self): self.assertEqual((12, 2), sarr.reshape((12, 2)).shape) self.assertEqual((2, 2, 2, 3), sarr.reshape((2, 2, 2, 3)).shape) + def test_SimpleArray_from_numpy_negative_stride(self): + ndarr = np.arange(2 * 3, dtype='float64').reshape((2, 3)) + view = ndarr[::-1, ::-1] + sarr = modmesh.SimpleArrayFloat64(array=view) + + self.assertEqual((2, 3), sarr.shape) + self.assertEqual((-3, -1), sarr.stride) + np.testing.assert_array_equal(view, sarr.ndarray) + self.assertEqual(np.sum(view), sarr.sum()) + + rhs = np.arange(2 * 3, dtype='float64').reshape((2, 3)) + 100 + sarr[:, :] = rhs + np.testing.assert_array_equal(rhs, view) + np.testing.assert_array_equal(rhs, sarr.ndarray) + + row_major = sarr.to_row_major() + self.assertTrue(row_major.is_c_contiguous) + np.testing.assert_array_equal(rhs, row_major.ndarray) + + sarr.ndarray[0, 0] = 100 + self.assertEqual(100, ndarr[1, 2]) + def test_SimpleArray_clone(self): sarr = modmesh.SimpleArrayFloat64((2, 3, 4)) sarr.fill(2.0) @@ -586,7 +608,7 @@ def test_SimpleArray_ghost_md_out_of_range(self): with self.assertRaisesRegex( IndexError, - r"SimpleArray::validate_shape\(\): empty index" + r"SimpleArray::normalize_index\(\): empty index" ): invalid_empty_idx = () sarr[invalid_empty_idx] @@ -619,7 +641,7 @@ def test_SimpleArray_ghost_md_out_of_range(self): with self.assertRaisesRegex( IndexError, - r"SimpleArray::validate_shape\(\): empty index" + r"SimpleArray::normalize_index\(\): empty index" ): invalid_empty_idx = () sarr[invalid_empty_idx] = 1