diff --git a/cpp/modmesh/buffer/SimpleArray.cpp b/cpp/modmesh/buffer/SimpleArray.cpp
index 3f7821f2..04e6eb04 100644
--- a/cpp/modmesh/buffer/SimpleArray.cpp
+++ b/cpp/modmesh/buffer/SimpleArray.cpp
@@ -52,20 +52,20 @@ static inline void copy_one(int8_t * dst, int8_t const * src)
 
 template <size_t N>
 static void tiled_2d_impl(
-    int8_t * const dst_body, int8_t const * const src_body, size_t const n0, size_t const n1, size_t const ss0, size_t const ss1, size_t const os0, size_t const os1)
+    int8_t * const dst_body, int8_t const * const src_body, ssize_t const n0, ssize_t const n1, ssize_t const ss0, ssize_t const ss1, ssize_t const os0, ssize_t const os1)
 {
-    constexpr size_t BLOCK = 32;
-    for (size_t i0 = 0; i0 < n0; i0 += BLOCK)
+    constexpr ssize_t BLOCK = 32;
+    for (ssize_t i0 = 0; i0 < n0; i0 += BLOCK)
     {
-        size_t const i_end = std::min(i0 + BLOCK, n0);
-        for (size_t j0 = 0; j0 < n1; j0 += BLOCK)
+        ssize_t const i_end = std::min(i0 + BLOCK, n0);
+        for (ssize_t j0 = 0; j0 < n1; j0 += BLOCK)
         {
-            size_t const j_end = std::min(j0 + BLOCK, n1);
-            for (size_t i = i0; i < i_end; ++i)
+            ssize_t const j_end = std::min(j0 + BLOCK, n1);
+            for (ssize_t i = i0; i < i_end; ++i)
             {
                 int8_t const * src_row = src_body + i * ss0;
                 int8_t * dst_row = dst_body + i * os0;
-                for (size_t j = j0; j < j_end; ++j)
+                for (ssize_t j = j0; j < j_end; ++j)
                 {
                     copy_one<N>(dst_row + j * os1, src_row + j * ss1);
                 }
@@ -79,20 +79,20 @@ static void tiled_2d_impl(
  * itemsizes that are not in the specialized {1, 2, 4, 8, 16} set.
  */
 static inline void tiled_2d_generic(
-    int8_t * const dst_body, int8_t const * const src_body, size_t const n0, size_t const n1, size_t const ss0, size_t const ss1, size_t const os0, size_t const os1, size_t const itemsize)
+    int8_t * const dst_body, int8_t const * const src_body, ssize_t const n0, ssize_t const n1, ssize_t const ss0, ssize_t const ss1, ssize_t const os0, ssize_t const os1, size_t const itemsize)
 {
-    constexpr size_t BLOCK = 32;
-    for (size_t i0 = 0; i0 < n0; i0 += BLOCK)
+    constexpr ssize_t BLOCK = 32;
+    for (ssize_t i0 = 0; i0 < n0; i0 += BLOCK)
     {
-        size_t const i_end = std::min(i0 + BLOCK, n0);
-        for (size_t j0 = 0; j0 < n1; j0 += BLOCK)
+        ssize_t const i_end = std::min(i0 + BLOCK, n0);
+        for (ssize_t j0 = 0; j0 < n1; j0 += BLOCK)
         {
-            size_t const j_end = std::min(j0 + BLOCK, n1);
-            for (size_t i = i0; i < i_end; ++i)
+            ssize_t const j_end = std::min(j0 + BLOCK, n1);
+            for (ssize_t i = i0; i < i_end; ++i)
             {
                 int8_t const * src_row = src_body + i * ss0;
                 int8_t * dst_row = dst_body + i * os0;
-                for (size_t j = j0; j < j_end; ++j)
+                for (ssize_t j = j0; j < j_end; ++j)
                 {
                     std::memcpy(dst_row + j * os1, src_row + j * ss1, itemsize);
                 }
@@ -103,20 +103,20 @@ static inline void tiled_2d_generic(
 
 template <size_t N>
 static void tiled_nd_inner(
-    int8_t * const dst_body, int8_t const * const src_body, size_t const n_a, size_t const n_b, size_t const ss_a, size_t const ss_b, size_t const os_a, size_t const os_b)
+    int8_t * const dst_body, int8_t const * const src_body, ssize_t const n_a, ssize_t const n_b, ssize_t const ss_a, ssize_t const ss_b, ssize_t const os_a, ssize_t const os_b)
 {
-    constexpr size_t BLOCK = 32;
-    for (size_t a0 = 0; a0 < n_a; a0 += BLOCK)
+    constexpr ssize_t BLOCK = 32;
+    for (ssize_t a0 = 0; a0 < n_a; a0 += BLOCK)
     {
-        size_t const a_end = std::min(a0 + BLOCK, n_a);
-        for (size_t b0 = 0; b0 < n_b; b0 += BLOCK)
+        ssize_t const a_end = std::min(a0 + BLOCK, n_a);
+        for (ssize_t b0 = 0; b0 < n_b; b0 += BLOCK)
         {
-            size_t const b_end = std::min(b0 + BLOCK, n_b);
-            for (size_t i = a0; i < a_end; ++i)
+            ssize_t const b_end = std::min(b0 + BLOCK, n_b);
+            for (ssize_t i = a0; i < a_end; ++i)
             {
                 int8_t const * src_row = src_body + i * ss_a;
                 int8_t * dst_row = dst_body + i * os_a;
-                for (size_t j = b0; j < b_end; ++j)
+                for (ssize_t j = b0; j < b_end; ++j)
                 {
                     copy_one<N>(dst_row + j * os_b, src_row + j * ss_b);
                 }
@@ -126,20 +126,20 @@ static void tiled_nd_inner(
 }
 
 static inline void tiled_nd_inner_generic(
-    int8_t * const dst_body, int8_t const * const src_body, size_t const n_a, size_t const n_b, size_t const ss_a, size_t const ss_b, size_t const os_a, size_t const os_b, size_t const itemsize)
+    int8_t * const dst_body, int8_t const * const src_body, ssize_t const n_a, ssize_t const n_b, ssize_t const ss_a, ssize_t const ss_b, ssize_t const os_a, ssize_t const os_b, size_t const itemsize)
 {
-    constexpr size_t BLOCK = 32;
-    for (size_t a0 = 0; a0 < n_a; a0 += BLOCK)
+    constexpr ssize_t BLOCK = 32;
+    for (ssize_t a0 = 0; a0 < n_a; a0 += BLOCK)
     {
-        size_t const a_end = std::min(a0 + BLOCK, n_a);
-        for (size_t b0 = 0; b0 < n_b; b0 += BLOCK)
+        ssize_t const a_end = std::min(a0 + BLOCK, n_a);
+        for (ssize_t b0 = 0; b0 < n_b; b0 += BLOCK)
         {
-            size_t const b_end = std::min(b0 + BLOCK, n_b);
-            for (size_t i = a0; i < a_end; ++i)
+            ssize_t const b_end = std::min(b0 + BLOCK, n_b);
+            for (ssize_t i = a0; i < a_end; ++i)
             {
                 int8_t const * src_row = src_body + i * ss_a;
                 int8_t * dst_row = dst_body + i * os_a;
-                for (size_t j = b0; j < b_end; ++j)
+                for (ssize_t j = b0; j < b_end; ++j)
                 {
                     std::memcpy(dst_row + j * os_b, src_row + j * ss_b, itemsize);
                 }
@@ -153,7 +153,7 @@ static inline void tiled_nd_inner_generic(
  * dtypes; everything else falls through to the memcpy version.
  */
 static inline void dispatch_tile_inner(
-    int8_t * const dst_body, int8_t const * const src_body, size_t const n_a, size_t const n_b, size_t const ss_a, size_t const ss_b, size_t const os_a, size_t const os_b, size_t const itemsize)
+    int8_t * const dst_body, int8_t const * const src_body, ssize_t const n_a, ssize_t const n_b, ssize_t const ss_a, ssize_t const ss_b, ssize_t const os_a, ssize_t const os_b, size_t const itemsize)
 {
     switch (itemsize)
     {
@@ -181,11 +181,11 @@ static inline void dispatch_tile_inner(
  */
 SimpleArrayCopier::SimpleArrayCopier(
     buffer_type const & src_buffer,
-    size_t const src_body_offset,
-    shape_type const & src_stride,
+    ssize_t const src_body_offset,
+    sshape_type const & src_stride,
     buffer_type & dst_buffer,
-    size_t const dst_body_offset,
-    shape_type const & dst_stride,
+    ssize_t const dst_body_offset,
+    sshape_type const & dst_stride,
     shape_type const & shape,
     size_t const itemsize)
     : m_src(src_buffer.data<int8_t>() + src_body_offset)
@@ -216,14 +216,15 @@ void SimpleArrayCopier::memcpy() const
  */
 void SimpleArrayCopier::tiled_2d() const
 {
-    size_t const n0 = m_shape[0];
-    size_t const n1 = m_shape[1];
+    auto const n0 = static_cast<ssize_t>(m_shape[0]);
+    auto const n1 = static_cast<ssize_t>(m_shape[1]);
     // Element strides scaled to byte strides once; the inner loop uses byte
     // arithmetic throughout.
-    size_t const ss0 = m_src_stride[0] * m_itemsize;
-    size_t const ss1 = m_src_stride[1] * m_itemsize;
-    size_t const os0 = m_dst_stride[0] * m_itemsize;
-    size_t const os1 = m_dst_stride[1] * m_itemsize;
+    auto const itemsize = static_cast<ssize_t>(m_itemsize);
+    ssize_t const ss0 = m_src_stride[0] * itemsize;
+    ssize_t const ss1 = m_src_stride[1] * itemsize;
+    ssize_t const os0 = m_dst_stride[0] * itemsize;
+    ssize_t const os1 = m_dst_stride[1] * itemsize;
     switch (m_itemsize)
     {
     case 1: tiled_2d_impl<1>(m_dst, m_src, n0, n1, ss0, ss1, os0, os1); break;
@@ -242,15 +243,15 @@ void SimpleArrayCopier::tiled_2d() const
 void SimpleArrayCopier::tiled_nd() const
 {
     size_t const ndim = m_shape.size();
-    size_t const itemsize = m_itemsize;
+    auto const itemsize = static_cast<ssize_t>(m_itemsize);
     if (ndim == 1)
     {
-        size_t const n = m_shape[0];
-        size_t const ss = m_src_stride[0] * itemsize;
-        size_t const os = m_dst_stride[0] * itemsize;
-        for (size_t i = 0; i < n; ++i)
+        auto const n = static_cast<ssize_t>(m_shape[0]);
+        ssize_t const ss = m_src_stride[0] * itemsize;
+        ssize_t const os = m_dst_stride[0] * itemsize;
+        for (ssize_t i = 0; i < n; ++i)
         {
-            std::memcpy(m_dst + i * os, m_src + i * ss, itemsize);
+            std::memcpy(m_dst + i * os, m_src + i * ss, m_itemsize);
         }
         return;
     }
@@ -258,12 +259,12 @@ void SimpleArrayCopier::tiled_nd() const
     // See tiled_2d for the rationale behind the block size.
     size_t const ia = ndim - 2;
     size_t const ib = ndim - 1;
-    size_t const n_a = m_shape[ia];
-    size_t const n_b = m_shape[ib];
-    size_t const ss_a = m_src_stride[ia] * itemsize;
-    size_t const ss_b = m_src_stride[ib] * itemsize;
-    size_t const os_a = m_dst_stride[ia] * itemsize;
-    size_t const os_b = m_dst_stride[ib] * itemsize;
+    auto const n_a = static_cast<ssize_t>(m_shape[ia]);
+    auto const n_b = static_cast<ssize_t>(m_shape[ib]);
+    ssize_t const ss_a = m_src_stride[ia] * itemsize;
+    ssize_t const ss_b = m_src_stride[ib] * itemsize;
+    ssize_t const os_a = m_dst_stride[ia] * itemsize;
+    ssize_t const os_b = m_dst_stride[ib] * itemsize;
 
     size_t outer_total = 1;
     for (size_t k = 0; k < ia; ++k)
@@ -271,23 +272,23 @@ void SimpleArrayCopier::tiled_nd() const
         outer_total *= m_shape[k];
     }
 
-    shape_type outer_idx(ia, 0);
+    detail::sshape_type outer_idx(ia, 0);
     for (size_t step = 0; step < outer_total; ++step)
     {
         // Resolve outer-axis base offsets (in bytes) for this slab.
-        size_t src_base = 0;
-        size_t dst_base = 0;
+        ssize_t src_base = 0;
+        ssize_t dst_base = 0;
         for (size_t k = 0; k < ia; ++k)
         {
             src_base += m_src_stride[k] * outer_idx[k] * itemsize;
             dst_base += m_dst_stride[k] * outer_idx[k] * itemsize;
         }
         dispatch_tile_inner(
-            m_dst + dst_base, m_src + src_base, n_a, n_b, ss_a, ss_b, os_a, os_b, itemsize);
+            m_dst + dst_base, m_src + src_base, n_a, n_b, ss_a, ss_b, os_a, os_b, m_itemsize);
         // Carry-propagating increment of the outer index.
         for (size_t i = ia; i-- > 0;)
         {
-            if (++outer_idx[i] < m_shape[i])
+            if (++outer_idx[i] < static_cast<ssize_t>(m_shape[i]))
             {
                 break;
             }
@@ -319,22 +320,26 @@ void SimpleArrayCopier::naive() const
     }
     size_t const ndim = m_shape.size();
     size_t const itemsize = m_itemsize;
-    shape_type idx(ndim, 0);
+    auto const signed_itemsize = static_cast<ssize_t>(itemsize);
+    detail::sshape_type idx(ndim, 0);
     for (size_t step = 0; step < total; ++step)
     {
-        size_t src_off = 0;
-        size_t dst_off = 0;
+        ssize_t src_off = 0;
+        ssize_t dst_off = 0;
         for (size_t k = 0; k < ndim; ++k)
         {
             src_off += m_src_stride[k] * idx[k];
             dst_off += m_dst_stride[k] * idx[k];
         }
-        std::memcpy(m_dst + dst_off * itemsize, m_src + src_off * itemsize, itemsize);
+        std::memcpy(
+            m_dst + dst_off * signed_itemsize,
+            m_src + src_off * signed_itemsize,
+            itemsize);
         // Carry-propagating increment: bump the trailing axis; on overflow,
         // wrap to 0 and carry into the next-most-significant axis.
         for (size_t i = ndim; i-- > 0;)
         {
-            if (++idx[i] < m_shape[i])
+            if (++idx[i] < static_cast<ssize_t>(m_shape[i]))
             {
                 break;
             }
@@ -974,6 +979,33 @@ std::string format_shape(shape_type const & shape)
     return ret;
 }
 
+std::string format_flat_index(shape_type const & shape, size_t offset)
+{
+    if (shape.empty())
+    {
+        return "[]";
+    }
+
+    shape_type row_major_stride(shape.size(), 1);
+    for (size_t it = shape.size() - 1; it > 0; --it)
+    {
+        row_major_stride[it - 1] = row_major_stride[it] * shape[it];
+    }
+
+    std::string ret = "[";
+    for (size_t it = 0; it < shape.size(); ++it)
+    {
+        if (it != 0)
+        {
+            ret += ", ";
+        }
+        ret += std::to_string(offset / row_major_stride[it]);
+        offset %= row_major_stride[it];
+    }
+    ret += "]";
+    return ret;
+}
+
 } /* end namespace detail */
 
 } /* end namespace modmesh */
diff --git a/cpp/modmesh/buffer/SimpleArray.hpp b/cpp/modmesh/buffer/SimpleArray.hpp
index 575e4e74..eb0c136b 100644
--- a/cpp/modmesh/buffer/SimpleArray.hpp
+++ b/cpp/modmesh/buffer/SimpleArray.hpp
@@ -88,32 +88,32 @@ namespace detail
 {
 
 template <size_t D, typename S>
-size_t buffer_offset_impl(S const &)
+ssize_t buffer_offset_impl(S const &)
 {
     return 0;
 }
 
 template <size_t D, typename S, typename Arg, typename... Args>
-size_t buffer_offset_impl(S const & strides, Arg arg, Args... args)
+ssize_t buffer_offset_impl(S const & strides, Arg arg, Args... args)
 {
-    return arg * strides[D] + buffer_offset_impl<D + 1>(strides, args...);
+    return static_cast<ssize_t>(arg) * strides[D] + buffer_offset_impl<D + 1>(strides, args...);
 }
 
 } /* end namespace detail */
 
 template <typename S, typename... Args>
-size_t buffer_offset(S const & strides, Args... args)
+ssize_t buffer_offset(S const & strides, Args... args)
 {
     return detail::buffer_offset_impl<0>(strides, args...);
 }
 
-inline size_t buffer_offset(small_vector<size_t> const & stride, small_vector<size_t> const & idx)
+inline ssize_t buffer_offset(small_vector<ssize_t> const & stride, small_vector<ssize_t> const & idx)
 {
     if (stride.size() != idx.size())
     {
         throw std::out_of_range(std::format("stride size {} != index size {}", stride.size(), idx.size()));
     }
-    size_t offset = 0;
+    ssize_t offset = 0;
     for (size_t it = 0; it < stride.size(); ++it)
     {
         offset += stride[it] * idx[it];
@@ -126,7 +126,6 @@ namespace detail
 
 using shape_type = small_vector<size_t>;
 using sshape_type = small_vector<ssize_t>;
-using slice_type = small_vector<ssize_t>;
 
 /**
  * \brief Enumerate SimpleArray indices with nghost included.
@@ -214,6 +213,7 @@ class IndexRange
 };
 
 std::string format_shape(shape_type const & shape);
+std::string format_flat_index(shape_type const & shape, size_t offset);
 
 template <typename T>
 struct SimpleArrayInternalTypes
@@ -268,6 +268,8 @@ class SimpleArrayMixinSum
 public:
 
     using value_type = typename internal_types::value_type;
+    using shape_type = typename internal_types::shape_type;
+    using sshape_type = typename internal_types::sshape_type;
 
     value_type sum() const
     {
@@ -328,24 +330,24 @@ class SimpleArrayMixinSum
     // This avoids the per-element multi-dimensional index arithmetic that
     // at(sidx) performs.
     static value_type sum_strided(value_type const * data,
-                                  small_vector<size_t> const & shape,
-                                  small_vector<size_t> const & stride)
+                                  shape_type const & shape,
+                                  sshape_type const & stride)
     {
         const size_t ndim = shape.size();
-        const size_t last_dim = shape[ndim - 1];
-        const size_t last_stride = stride[ndim - 1];
+        auto const last_dim = static_cast<ssize_t>(shape[ndim - 1]);
+        const ssize_t last_stride = stride[ndim - 1];
 
         value_type acc = zero();
-        small_vector<size_t> prefix(ndim - 1, 0);
+        sshape_type prefix(ndim - 1, 0);
         do
         {
-            size_t offset = 0;
+            ssize_t offset = 0;
             for (size_t i = 0; i + 1 < ndim; ++i)
             {
                 offset += prefix[i] * stride[i];
             }
             value_type const * row = data + offset;
-            for (size_t j = 0; j < last_dim; ++j)
+            for (ssize_t j = 0; j < last_dim; ++j)
             {
                 accumulate(acc, row[j * last_stride]);
             }
@@ -353,12 +355,12 @@ class SimpleArrayMixinSum
         return acc;
     }
 
-    static bool next_prefix(small_vector<size_t> & idx,
-                            small_vector<size_t> const & shape)
+    static bool next_prefix(sshape_type & idx,
+                            shape_type const & shape)
     {
         for (size_t i = idx.size(); i > 0; --i)
         {
-            if (++idx[i - 1] < shape[i - 1])
+            if (++idx[i - 1] < static_cast<ssize_t>(shape[i - 1]))
             {
                 return true;
             }
@@ -1611,36 +1613,36 @@ class SimpleArray
     }
 
     // NOLINTNEXTLINE(modernize-pass-by-value)
-    explicit SimpleArray(small_vector<size_t> const & shape)
+    explicit SimpleArray(shape_type const & shape)
         : m_shape(shape)
         , m_stride(calc_stride(m_shape))
     {
         if (!m_shape.empty())
         {
-            m_buffer = buffer_type::construct(m_shape[0] * m_stride[0] * ITEMSIZE, 0);
+            m_buffer = buffer_type::construct(m_shape[0] * static_cast<size_t>(m_stride[0]) * ITEMSIZE, 0);
             m_body = m_buffer->template data<T>();
         }
     }
 
     // NOLINTNEXTLINE(modernize-pass-by-value)
-    SimpleArray(small_vector<size_t> const & shape, size_t alignment, with_alignment_t const & /* unnamed argument for tagging */)
+    SimpleArray(shape_type const & shape, size_t alignment, with_alignment_t const & /* unnamed argument for tagging */)
         : m_shape(shape)
         , m_stride(calc_stride(m_shape))
     {
         if (!m_shape.empty())
         {
-            m_buffer = buffer_type::construct(m_shape[0] * m_stride[0] * ITEMSIZE, alignment);
+            m_buffer = buffer_type::construct(m_shape[0] * static_cast<size_t>(m_stride[0]) * ITEMSIZE, alignment);
             m_body = m_buffer->template data<T>();
         }
     }
 
-    SimpleArray(small_vector<size_t> const & shape, value_type const & value, size_t alignment)
+    SimpleArray(shape_type const & shape, value_type const & value, size_t alignment)
         : SimpleArray(shape, alignment, with_alignment_t{})
     {
         std::fill(begin(), end(), value);
     }
 
-    SimpleArray(small_vector<size_t> const & shape, value_type const & value)
+    SimpleArray(shape_type const & shape, value_type const & value)
         : SimpleArray(shape)
     {
         std::fill(begin(), end(), value);
@@ -1652,7 +1654,7 @@ class SimpleArray
     {
         if (!m_shape.empty())
         {
-            m_buffer = buffer_type::construct(m_shape[0] * m_stride[0] * ITEMSIZE, 0);
+            m_buffer = buffer_type::construct(m_shape[0] * static_cast<size_t>(m_stride[0]) * ITEMSIZE, 0);
             m_body = m_buffer->template data<T>();
         }
     }
@@ -1663,7 +1665,7 @@ class SimpleArray
     {
         if (!m_shape.empty())
         {
-            m_buffer = buffer_type::construct(m_shape[0] * m_stride[0] * ITEMSIZE, alignment);
+            m_buffer = buffer_type::construct(m_shape[0] * static_cast<size_t>(m_stride[0]) * ITEMSIZE, alignment);
             m_body = m_buffer->template data<T>();
         }
     }
@@ -1690,7 +1692,7 @@ class SimpleArray
                 throw std::runtime_error("SimpleArray: input buffer size must be divisible");
             }
             m_shape = shape_type{nitem};
-            m_stride = shape_type{1};
+            m_stride = sshape_type{1};
             m_buffer = buffer;
             m_body = m_buffer->template data<T>();
         }
@@ -1700,14 +1702,14 @@ class SimpleArray
         }
     }
 
-    explicit SimpleArray(small_vector<size_t> const & shape, std::shared_ptr<buffer_type> const & buffer)
+    explicit SimpleArray(shape_type const & shape, std::shared_ptr<buffer_type> const & buffer)
         : SimpleArray(buffer)
     {
         if (buffer)
         {
             m_shape = shape;
             m_stride = calc_stride(m_shape);
-            const size_t nbytes = m_shape[0] * m_stride[0] * ITEMSIZE;
+            const size_t nbytes = m_shape[0] * static_cast<size_t>(m_stride[0]) * ITEMSIZE;
             if (nbytes != buffer->nbytes())
             {
                 throw std::runtime_error(
@@ -1718,8 +1720,8 @@ class SimpleArray
         }
     }
 
-    explicit SimpleArray(small_vector<size_t> const & shape,
-                         small_vector<size_t> const & stride,
+    explicit SimpleArray(shape_type const & shape,
+                         sshape_type const & stride,
                          std::shared_ptr<buffer_type> const & buffer,
                          bool c_contiguous,
                          bool f_contiguous)
@@ -1739,8 +1741,8 @@ class SimpleArray
         }
     }
 
-    explicit SimpleArray(small_vector<size_t> const & shape,
-                         small_vector<size_t> const & stride,
+    explicit SimpleArray(shape_type const & shape,
+                         sshape_type const & stride,
                          std::shared_ptr<buffer_type> const & buffer)
         : SimpleArray(buffer)
     {
@@ -1815,21 +1817,21 @@ class SimpleArray
         return *this;
     }
 
-    static shape_type calc_stride(shape_type const & shape)
+    static sshape_type calc_stride(shape_type const & shape)
     {
-        shape_type stride(shape.size());
+        sshape_type stride(shape.size());
         if (!shape.empty())
         {
             stride[shape.size() - 1] = 1;
             for (size_t it = shape.size() - 1; it > 0; --it)
             {
-                stride[it - 1] = stride[it] * shape[it];
+                stride[it - 1] = stride[it] * static_cast<ssize_t>(shape[it]);
             }
         }
         return stride;
     }
 
-    static T * calc_body(T * data, shape_type const & stride, size_t nghost)
+    static T * calc_body(T * data, sshape_type const & stride, size_t nghost)
     {
         if (nullptr == data || stride.empty() || 0 == nghost)
         {
@@ -1837,9 +1839,9 @@ class SimpleArray
         }
         else
         {
-            shape_type shape(stride.size(), 0);
-            shape[0] = nghost;
-            data += buffer_offset(stride, shape);
+            sshape_type idx(stride.size(), 0);
+            idx[0] = static_cast<ssize_t>(nghost);
+            data += buffer_offset(stride, idx);
         }
         return data;
     }
@@ -1881,13 +1883,15 @@ class SimpleArray
 
     value_type const & at(ssize_t it) const
     {
-        shape_type const idx{normalize_index(it)};
-        return data(buffer_offset(m_stride, idx));
+        sshape_type const idx{normalize_index(it)};
+        ssize_t const offset = buffer_offset(m_stride, idx);
+        return *(data() + offset);
     }
     value_type & at(ssize_t it)
     {
-        shape_type const idx{normalize_index(it)};
-        return data(buffer_offset(m_stride, idx));
+        sshape_type const idx{normalize_index(it)};
+        ssize_t const offset = buffer_offset(m_stride, idx);
+        return *(data() + offset);
     }
 
     value_type const & at(std::vector<ssize_t> const & idx) const { return at(sshape_type(idx)); }
@@ -1895,24 +1899,24 @@ class SimpleArray
 
     value_type const & at(sshape_type const & sidx) const
     {
-        shape_type const idx = normalize_index(sidx);
-        const size_t offset = buffer_offset(m_stride, idx);
-        return data(offset);
+        sshape_type const idx = normalize_index(sidx);
+        ssize_t const offset = buffer_offset(m_stride, idx);
+        return *(data() + offset);
     }
     value_type & at(sshape_type const & sidx)
     {
-        shape_type const idx = normalize_index(sidx);
-        const size_t offset = buffer_offset(m_stride, idx);
-        return data(offset);
+        sshape_type const idx = normalize_index(sidx);
+        ssize_t const offset = buffer_offset(m_stride, idx);
+        return *(data() + offset);
     }
 
     size_t ndim() const noexcept { return m_shape.size(); }
     shape_type const & shape() const { return m_shape; }
     size_t shape(size_t it) const noexcept { return m_shape[it]; }
     size_t & shape(size_t it) noexcept { return m_shape[it]; }
-    shape_type const & stride() const { return m_stride; }
-    size_t stride(size_t it) const noexcept { return m_stride[it]; }
-    size_t & stride(size_t it) noexcept { return m_stride[it]; }
+    sshape_type const & stride() const { return m_stride; }
+    ssize_t stride(size_t it) const noexcept { return m_stride[it]; }
+    ssize_t & stride(size_t it) noexcept { return m_stride[it]; }
 
     size_t nghost() const { return m_nghost; }
     size_t nbody() const { return m_shape.empty() ? 0 : m_shape[0] - m_nghost; }
@@ -2044,7 +2048,14 @@ class SimpleArray
     std::layout_stride::mapping<std::dextents<size_t, N>> make_mdspan_mapping_impl(std::index_sequence<I...>) const
     {
         std::array<size_t, N> strides;
-        for (size_t i = 0; i < N; ++i) { strides[i] = stride(i); }
+        for (size_t i = 0; i < N; ++i)
+        {
+            if (stride(i) < 0)
+            {
+                throw std::runtime_error("SimpleArray::as_mdspan: negative stride is not supported");
+            }
+            strides[i] = static_cast<size_t>(stride(i));
+        }
         return std::layout_stride::mapping<std::dextents<size_t, N>>(
             std::dextents<size_t, N>(shape(I)...), strides);
     }
@@ -2055,8 +2066,8 @@ class SimpleArray
         return make_mdspan_mapping_impl<N>(std::make_index_sequence<N>{});
     }
 
-    static bool is_c_contiguous(small_vector<size_t> const & shape,
-                                small_vector<size_t> const & stride)
+    static bool is_c_contiguous(shape_type const & shape,
+                                sshape_type const & stride)
     {
         if (stride[stride.size() - 1] != 1)
         {
@@ -2064,7 +2075,7 @@ class SimpleArray
         }
         for (size_t it = 0; it < shape.size() - 1; ++it)
         {
-            if (stride[it] != shape[it + 1] * stride[it + 1])
+            if (stride[it] != static_cast<ssize_t>(shape[it + 1]) * stride[it + 1])
             {
                 return false;
             }
@@ -2072,8 +2083,8 @@ class SimpleArray
         return true;
     }
 
-    static bool is_f_contiguous(small_vector<size_t> const & shape,
-                                small_vector<size_t> const & stride)
+    static bool is_f_contiguous(shape_type const & shape,
+                                sshape_type const & stride)
     {
         if (stride[0] != 1)
         {
@@ -2081,7 +2092,7 @@ class SimpleArray
         }
         for (size_t it = 0; it < shape.size() - 1; ++it)
         {
-            if (stride[it + 1] != shape[it] * stride[it])
+            if (stride[it + 1] != static_cast<ssize_t>(shape[it]) * stride[it])
             {
                 return false;
             }
@@ -2089,8 +2100,8 @@ class SimpleArray
         return true;
     }
 
-    static void check_c_contiguous(small_vector<size_t> const & shape,
-                                   small_vector<size_t> const & stride)
+    static void check_c_contiguous(shape_type const & shape,
+                                   sshape_type const & stride)
     {
         if (!is_c_contiguous(shape, stride))
         {
@@ -2098,8 +2109,8 @@ class SimpleArray
         }
     }
 
-    void check_f_contiguous(small_vector<size_t> const & shape,
-                            small_vector<size_t> const & stride) const
+    void check_f_contiguous(shape_type const & shape,
+                            sshape_type const & stride) const
     {
         if (!is_f_contiguous(shape, stride))
         {
@@ -2107,7 +2118,7 @@ class SimpleArray
         }
     }
 
-    size_t normalize_index(ssize_t it) const
+    ssize_t normalize_index(ssize_t it) const
     {
         if (ndim() != 1)
         {
@@ -2145,7 +2156,7 @@ class SimpleArray
         return to_nonnegative_index(shifted_index, dim_length);
     }
 
-    shape_type normalize_index(small_vector<ssize_t> const & idx) const
+    sshape_type normalize_index(small_vector<ssize_t> const & idx) const
     {
         auto index2string = [&idx]() -> std::string
         {
@@ -2169,7 +2180,7 @@ class SimpleArray
         // Test for the "index shape".
         if (idx.empty())
         {
-            throw std::out_of_range("SimpleArray::validate_shape(): empty index");
+            throw std::out_of_range("SimpleArray::normalize_index(): empty index");
         }
         if (idx.size() != m_shape.size())
         {
@@ -2179,7 +2190,7 @@ class SimpleArray
                             m_shape.size()));
         }
 
-        shape_type normalized(idx.size());
+        sshape_type normalized(idx.size());
         for (size_t dim = 0; dim < m_shape.size(); ++dim)
         {
             auto const dim_length = static_cast<ssize_t>(m_shape[dim]);
@@ -2225,13 +2236,13 @@ class SimpleArray
         return normalized;
     }
 
-    static size_t to_nonnegative_index(ssize_t index, ssize_t length)
+    static ssize_t to_nonnegative_index(ssize_t index, ssize_t length)
     {
         if (index < 0)
         {
             index += length;
         }
-        return static_cast<size_t>(index);
+        return index;
     }
 
     /// Contiguous data buffer for the array.
@@ -2241,7 +2252,7 @@ class SimpleArray
     shape_type m_shape;
     /// Each element in this vector is the number of elements (not number of
     /// bytes) to skip for advancing an index in the corresponding dimension.
-    shape_type m_stride;
+    sshape_type m_stride;
 
     size_t m_nghost = 0;
     value_type * m_body = nullptr;
@@ -2307,15 +2318,16 @@ class SimpleArrayCopier
 public:
 
     using shape_type = small_vector<size_t>;
+    using sshape_type = detail::sshape_type;
     using buffer_type = ConcreteBuffer;
 
     SimpleArrayCopier(
         buffer_type const & src_buffer,
-        size_t src_body_offset,
-        shape_type const & src_stride,
+        ssize_t src_body_offset,
+        sshape_type const & src_stride,
         buffer_type & dst_buffer,
-        size_t dst_body_offset,
-        shape_type const & dst_stride,
+        ssize_t dst_body_offset,
+        sshape_type const & dst_stride,
         shape_type const & shape,
         size_t itemsize);
 
@@ -2336,8 +2348,8 @@ class SimpleArrayCopier
     int8_t const * m_src;
     int8_t * m_dst;
     shape_type const & m_shape;
-    shape_type const & m_src_stride;
-    shape_type const & m_dst_stride;
+    sshape_type const & m_src_stride;
+    sshape_type const & m_dst_stride;
     size_t m_itemsize;
 
 }; /* end class SimpleArrayCopier */
@@ -2384,7 +2396,7 @@ void SimpleArray<T>::transpose(shape_type const & axis, bool copy)
         throw std::runtime_error("SimpleArray::transpose: axis size mismatch");
     }
     shape_type new_shape(m_shape.size(), -1);
-    shape_type new_stride(m_stride.size());
+    sshape_type new_stride(m_stride.size());
     for (size_t it = 0; it < m_shape.size(); ++it)
     {
         if (axis[it] >= m_shape.size() || axis[it] < 0)
@@ -2488,7 +2500,7 @@ SimpleArray<T> SimpleArray<T>::to_column_major() const
     }
     // Compute column-major strides: the fastest-varying axis is the leading
     // one (stride[0] == 1).
-    shape_type fstride(m_shape.size());
+    sshape_type fstride(m_shape.size());
     fstride[0] = 1;
     for (size_t i = 1; i < m_shape.size(); ++i)
     {
@@ -2535,8 +2547,8 @@ void SimpleArray<T>::copy_logical_into(SimpleArray & out) const
     }
     // Subtract on T* so no reinterpret_cast is needed; ITEMSIZE then maps
     // the element offset to the byte offset the helper expects.
-    auto const src_body_offset = static_cast<size_t>(m_body - m_buffer->template data<value_type>()) * ITEMSIZE;
-    auto const dst_body_offset = static_cast<size_t>(out.m_body - out.m_buffer->template data<value_type>()) * ITEMSIZE;
+    auto const src_body_offset = static_cast<ssize_t>(m_body - m_buffer->template data<value_type>()) * static_cast<ssize_t>(ITEMSIZE);
+    auto const dst_body_offset = static_cast<ssize_t>(out.m_body - out.m_buffer->template data<value_type>()) * static_cast<ssize_t>(ITEMSIZE);
     SimpleArrayCopier const copier(
         *m_buffer,
         src_body_offset,
@@ -2612,15 +2624,7 @@ A detail::SimpleArrayMixinSort<A, T>::take_along_axis(SimpleArray<I> const & ind
         if (*src < 0 || *src > max_idx)
         {
             size_t offset = src - indices.begin();
-            shape_type const & stride = indices.stride();
-            std::string indices_str = "[" + std::to_string(offset / stride[0]);
-            offset %= stride[0];
-            for (size_t dim = 1; dim < stride.size(); ++dim)
-            {
-                indices_str += ", " + std::to_string(offset / stride[dim]);
-                offset %= stride[dim];
-            }
-            indices_str += "]";
+            std::string const indices_str = format_flat_index(indices.shape(), offset);
 
             throw std::out_of_range(
                 std::format("SimpleArray::take_along_axis(): "
@@ -2697,16 +2701,7 @@ A detail::SimpleArrayMixinSort<A, T>::take_along_axis_simd(SimpleArray<I> const
     if (oor_ptr != nullptr)
     {
         size_t offset = oor_ptr - indices.begin();
-        shape_type const & stride = indices.stride();
-        const size_t ndim = stride.size();
-        std::string indices_str = "[" + std::to_string(offset / stride[0]);
-        offset %= stride[0];
-        for (size_t dim = 1; dim < ndim; ++dim)
-        {
-            indices_str += ", " + std::to_string(offset / stride[dim]);
-            offset %= stride[dim];
-        }
-        indices_str += "]";
+        std::string const indices_str = format_flat_index(indices.shape(), offset);
 
         const auto err = std::format("SimpleArray::take_along_axis_simd(): "
                                      "indices{} is {}, which is out of range of the array size {}",
diff --git a/cpp/modmesh/buffer/pymod/TypeBroadcast.hpp b/cpp/modmesh/buffer/pymod/TypeBroadcast.hpp
index 4877e2da..7d5309a7 100644
--- a/cpp/modmesh/buffer/pymod/TypeBroadcast.hpp
+++ b/cpp/modmesh/buffer/pymod/TypeBroadcast.hpp
@@ -42,11 +42,16 @@ namespace python
 template <typename T /* original type */, typename D /* for destination type */>
 struct TypeBroadcastImpl
 {
-    using slice_type = modmesh::detail::slice_type;
     using shape_type = modmesh::detail::shape_type;
+    using sshape_type = modmesh::detail::sshape_type;
 
     // NOLINTNEXTLINE(misc-no-recursion)
-    static void copy_idx(SimpleArray<T> & arr_out, std::vector<slice_type> const & slices, pybind11::array_t<D> const * arr_in, shape_type left_shape, shape_type sidx, int dim)
+    static void copy_idx(SimpleArray<T> & arr_out,
+                         std::vector<sshape_type> const & slices,
+                         pybind11::array_t<D> const * arr_in,
+                         shape_type const & left_shape,
+                         sshape_type sidx,
+                         ssize_t dim)
     {
         using out_type = typename std::remove_reference_t<decltype(arr_out[0])>;
 
@@ -55,21 +60,25 @@ struct TypeBroadcastImpl
             return;
         }
 
-        for (size_t i = 0; i < left_shape[dim]; ++i)
+        auto const axis = static_cast<size_t>(dim);
+        auto const length = static_cast<ssize_t>(left_shape[axis]);
+        for (ssize_t i = 0; i < length; ++i)
         {
-            sidx[dim] = i;
+            sidx[axis] = i;
 
-            size_t offset_in = 0;
-            for (pybind11::ssize_t it = 0; it < arr_in->ndim(); ++it)
+            ssize_t offset_in = 0;
+            pybind11::ssize_t const ndim_in = arr_in->ndim();
+            for (pybind11::ssize_t py_axis = 0; py_axis < ndim_in; ++py_axis)
             {
-                offset_in += arr_in->strides(it) / arr_in->itemsize() * sidx[it];
+                auto const axis_in = static_cast<size_t>(py_axis);
+                offset_in += arr_in->strides(py_axis) / arr_in->itemsize() * sidx[axis_in];
             }
             const D * ptr_in = arr_in->data() + offset_in;
 
-            size_t offset_out = 0;
+            ssize_t offset_out = 0;
             for (size_t it = 0; it < arr_out.ndim(); ++it)
             {
-                auto step = slices[it][2];
+                ssize_t const step = slices[it][2];
                 offset_out += arr_out.stride(it) * sidx[it] * step;
             }
 
@@ -77,8 +86,9 @@ struct TypeBroadcastImpl
 
             if constexpr (valid_conversion)
             {
+                auto * ptr_out = arr_out.data() + offset_out;
                 // FIXME: NOLINTNEXTLINE(bugprone-signed-char-misuse,cert-str34-c)
-                arr_out.data(offset_out) = static_cast<out_type>(*ptr_in);
+                *ptr_out = static_cast<out_type>(*ptr_in);
             }
             else
             {
@@ -90,80 +100,81 @@ struct TypeBroadcastImpl
         }
     }
 
-    static void broadcast(SimpleArray<T> & arr_out, std::vector<slice_type> const & slices, pybind11::array const & arr_in)
+    static void broadcast(SimpleArray<T> & arr_out,
+                          std::vector<sshape_type> const & slices,
+                          pybind11::array const & arr_in)
     {
         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
         auto * arr_new = reinterpret_cast<pybind11::array_t<D> const *>(&arr_in);
 
         shape_type left_shape(arr_out.ndim());
-        for (size_t i = 0; i < arr_out.ndim(); i++)
+        for (size_t i = 0; i < arr_out.ndim(); ++i)
         {
-            slice_type const & slice = slices[i];
+            sshape_type const & slice = slices[i];
             if ((slice[1] - slice[0]) % slice[2] == 0)
             {
-                left_shape[i] = (slice[1] - slice[0]) / slice[2];
+                left_shape[i] = static_cast<size_t>((slice[1] - slice[0]) / slice[2]);
             }
             else
             {
-                left_shape[i] = (slice[1] - slice[0]) / slice[2] + 1;
+                left_shape[i] = static_cast<size_t>((slice[1] - slice[0]) / slice[2] + 1);
             }
         }
 
-        shape_type sidx_init(arr_out.ndim());
+        sshape_type sidx_init(arr_out.ndim(), 0);
 
-        for (size_t i = 0; i < arr_out.ndim(); ++i)
-        {
-            sidx_init[i] = 0;
-        }
-
-        copy_idx(arr_out, slices, arr_new, left_shape, sidx_init, static_cast<int>(arr_out.ndim()) - 1);
+        copy_idx(arr_out, slices, arr_new, left_shape, sidx_init, static_cast<ssize_t>(arr_out.ndim()) - 1);
     }
 }; /* end struct TypeBroadcastImpl */
 
 template <typename T>
 struct TypeBroadcast
 {
-    using slice_type = modmesh::detail::slice_type;
     using shape_type = modmesh::detail::shape_type;
+    using sshape_type = modmesh::detail::sshape_type;
 
-    static void check_shape(SimpleArray<T> const & arr_out, std::vector<slice_type> const & slices, pybind11::array const & arr_in)
+    static void check_shape(SimpleArray<T> const & arr_out,
+                            std::vector<sshape_type> const & slices,
+                            pybind11::array const & arr_in)
     {
-        shape_type right_shape(arr_in.ndim());
-        for (pybind11::ssize_t i = 0; i < arr_in.ndim(); i++)
+        shape_type right_shape(static_cast<size_t>(arr_in.ndim()));
+        for (size_t i = 0; i < right_shape.size(); ++i)
         {
-            right_shape[i] = arr_in.shape(i);
+            right_shape[i] = static_cast<size_t>(arr_in.shape(static_cast<pybind11::ssize_t>(i)));
         }
 
         shape_type left_shape(arr_out.ndim());
         // TODO: range check
-        for (size_t i = 0; i < arr_out.ndim(); i++)
+        for (size_t i = 0; i < arr_out.ndim(); ++i)
         {
-            const slice_type & slice = slices[i];
+            sshape_type const & slice = slices[i];
             if ((slice[1] - slice[0]) % slice[2] == 0)
             {
-                left_shape[i] = (slice[1] - slice[0]) / slice[2];
+                left_shape[i] = static_cast<size_t>((slice[1] - slice[0]) / slice[2]);
             }
             else
             {
-                left_shape[i] = (slice[1] - slice[0]) / slice[2] + 1;
+                left_shape[i] = static_cast<size_t>((slice[1] - slice[0]) / slice[2] + 1);
             }
         }
 
-        if (arr_out.ndim() != static_cast<size_t>(arr_in.ndim()))
+        if (static_cast<pybind11::ssize_t>(arr_out.ndim()) != arr_in.ndim())
         {
             throw_shape_error(left_shape, right_shape);
         }
 
         for (size_t i = 0; i < left_shape.size(); ++i)
         {
-            if (left_shape[i] != static_cast<size_t>(right_shape[i]))
+            if (left_shape[i] != right_shape[i])
             {
                 throw_shape_error(left_shape, right_shape);
             }
         }
     }
 
-    static void broadcast(SimpleArray<T> & arr_out, std::vector<slice_type> const & slices, pybind11::array const & arr_in)
+    static void broadcast(SimpleArray<T> & arr_out,
+                          std::vector<sshape_type> const & slices,
+                          pybind11::array const & arr_in)
     {
         if (dtype_is_type<bool>(arr_in))
         {
diff --git a/cpp/modmesh/buffer/pymod/array_common.hpp b/cpp/modmesh/buffer/pymod/array_common.hpp
index f18c7aa8..b65412cf 100644
--- a/cpp/modmesh/buffer/pymod/array_common.hpp
+++ b/cpp/modmesh/buffer/pymod/array_common.hpp
@@ -183,7 +183,7 @@ class ArrayPropertyHelper
 {
 public:
     using shape_type = modmesh::detail::shape_type;
-    using slice_type = modmesh::detail::slice_type;
+    using sshape_type = modmesh::detail::sshape_type;
 
     static void broadcast_array_using_ellipsis(SimpleArray<T> & arr_out, pybind11::array const & arr_in)
     {
@@ -273,10 +273,18 @@ class ArrayPropertyHelper
 
     static pybind11::buffer_info get_buffer_info(SimpleArray<T> & array)
     {
-        std::vector<size_t> stride;
-        for (size_t const i : array.stride())
+        std::vector<pybind11::ssize_t> shape;
+        shape.reserve(array.shape().size());
+        for (size_t const i : array.shape())
         {
-            stride.push_back(i * sizeof(T));
+            shape.push_back(static_cast<pybind11::ssize_t>(i));
+        }
+
+        std::vector<pybind11::ssize_t> stride;
+        auto const itemsize = static_cast<pybind11::ssize_t>(sizeof(T));
+        for (ssize_t const i : array.stride())
+        {
+            stride.push_back(static_cast<pybind11::ssize_t>(i) * itemsize);
         }
 
         // Special handling for Complex types
@@ -302,7 +310,7 @@ class ArrayPropertyHelper
             sizeof(T), /* Size of one scalar */
             format, /* Python struct-style format descriptor */
             array.ndim(), /* Number of dimensions */
-            std::vector<size_t>(array.shape().begin(), array.shape().end()), /* Buffer dimensions */
+            shape, /* Buffer dimensions */
             stride /* Strides (in bytes) for each index */
         );
     }
@@ -360,22 +368,22 @@ class ArrayPropertyHelper
         }
     }
 
-    static std::vector<slice_type> make_default_slices(SimpleArray<T> const & arr)
+    static std::vector<sshape_type> make_default_slices(SimpleArray<T> const & arr)
     {
-        std::vector<slice_type> slices;
+        std::vector<sshape_type> slices;
         slices.reserve(arr.ndim());
         for (size_t i = 0; i < arr.ndim(); ++i)
         {
-            slice_type default_slice(3);
+            sshape_type default_slice(3);
             default_slice[0] = 0; // start
-            default_slice[1] = static_cast<int>(arr.shape(i)); // stop
+            default_slice[1] = static_cast<ssize_t>(arr.shape(i)); // stop
             default_slice[2] = 1; // step
             slices.push_back(std::move(default_slice));
         }
         return slices;
     }
 
-    static void copy_slice(slice_type & slice_out, pybind11::slice const & slice_in)
+    static void copy_slice(sshape_type & slice_out, pybind11::slice const & slice_in)
     {
         auto start = std::string(pybind11::str(slice_in.attr("start")));
         auto stop = std::string(pybind11::str(slice_in.attr("stop")));
@@ -421,7 +429,7 @@ class ArrayPropertyHelper
     }
 
     static void process_slices(pybind11::tuple const & tuple,
-                               std::vector<slice_type> & slices,
+                               std::vector<sshape_type> & slices,
                                size_t ndim)
     {
         namespace py = pybind11;
@@ -463,7 +471,7 @@ class ArrayPropertyHelper
     }
 
     static void broadcast_array_using_slice(SimpleArray<T> & arr_out,
-                                            std::vector<slice_type> const & slices,
+                                            std::vector<sshape_type> const & slices,
                                             pybind11::array const & arr_in)
     {
         TypeBroadcast<T>::check_shape(arr_out, slices, arr_in);
diff --git a/cpp/modmesh/buffer/pymod/wrap_SimpleArray.cpp b/cpp/modmesh/buffer/pymod/wrap_SimpleArray.cpp
index b61c29ef..ed465548 100644
--- a/cpp/modmesh/buffer/pymod/wrap_SimpleArray.cpp
+++ b/cpp/modmesh/buffer/pymod/wrap_SimpleArray.cpp
@@ -89,12 +89,12 @@ class MODMESH_PYTHON_WRAPPER_VISIBILITY WrapSimpleArray
                         }
 
                         modmesh::detail::shape_type shape;
-                        modmesh::detail::shape_type stride;
-                        constexpr size_t itemsize = wrapped_type::itemsize();
+                        modmesh::detail::sshape_type stride;
+                        constexpr auto itemsize = static_cast<ssize_t>(wrapped_type::itemsize());
                         constexpr size_t span = 0;
                         for (ssize_t i = 0; i < arr_in.ndim(); ++i)
                         {
-                            shape.push_back(arr_in.shape(i));
+                            shape.push_back(static_cast<size_t>(arr_in.shape(i)));
                             stride.push_back(arr_in.strides(i) / itemsize);
                         }
 
diff --git a/gtests/test_nopython_mdspan.cpp b/gtests/test_nopython_mdspan.cpp
index efc2faa0..73ec11a2 100644
--- a/gtests/test_nopython_mdspan.cpp
+++ b/gtests/test_nopython_mdspan.cpp
@@ -408,7 +408,7 @@ TEST(SimpleArray, mdspan_non_contiguous)
     // Build a 3x4 view whose stride differs from the row-major layout, so the
     // array is neither C- nor F-contiguous over the underlying buffer.
     mm::small_vector<size_t> shape{3, 4};
-    mm::small_vector<size_t> stride{8, 1};
+    mm::SimpleArray<double>::sshape_type stride{8, 1};
     auto buffer = mm::ConcreteBuffer::construct(3 * 8 * sizeof(double));
     mm::SimpleArray<double> arr(shape, stride, buffer);
     for (size_t i = 0; i < 24; ++i) { arr.data(i) = static_cast<double>(i); }
diff --git a/tests/test_buffer.py b/tests/test_buffer.py
index 49531eb5..9aa48776 100644
--- a/tests/test_buffer.py
+++ b/tests/test_buffer.py
@@ -304,6 +304,28 @@ def test_SimpleArray(self):
         self.assertEqual((12, 2), sarr.reshape((12, 2)).shape)
         self.assertEqual((2, 2, 2, 3), sarr.reshape((2, 2, 2, 3)).shape)
 
+    def test_SimpleArray_from_numpy_negative_stride(self):
+        ndarr = np.arange(2 * 3, dtype='float64').reshape((2, 3))
+        view = ndarr[::-1, ::-1]
+        sarr = modmesh.SimpleArrayFloat64(array=view)
+
+        self.assertEqual((2, 3), sarr.shape)
+        self.assertEqual((-3, -1), sarr.stride)
+        np.testing.assert_array_equal(view, sarr.ndarray)
+        self.assertEqual(np.sum(view), sarr.sum())
+
+        rhs = np.arange(2 * 3, dtype='float64').reshape((2, 3)) + 100
+        sarr[:, :] = rhs
+        np.testing.assert_array_equal(rhs, view)
+        np.testing.assert_array_equal(rhs, sarr.ndarray)
+
+        row_major = sarr.to_row_major()
+        self.assertTrue(row_major.is_c_contiguous)
+        np.testing.assert_array_equal(rhs, row_major.ndarray)
+
+        sarr.ndarray[0, 0] = 100
+        self.assertEqual(100, ndarr[1, 2])
+
     def test_SimpleArray_clone(self):
         sarr = modmesh.SimpleArrayFloat64((2, 3, 4))
         sarr.fill(2.0)
@@ -586,7 +608,7 @@ def test_SimpleArray_ghost_md_out_of_range(self):
 
         with self.assertRaisesRegex(
                 IndexError,
-                r"SimpleArray::validate_shape\(\): empty index"
+                r"SimpleArray::normalize_index\(\): empty index"
         ):
             invalid_empty_idx = ()
             sarr[invalid_empty_idx]
@@ -619,7 +641,7 @@ def test_SimpleArray_ghost_md_out_of_range(self):
 
         with self.assertRaisesRegex(
                 IndexError,
-                r"SimpleArray::validate_shape\(\): empty index"
+                r"SimpleArray::normalize_index\(\): empty index"
         ):
             invalid_empty_idx = ()
             sarr[invalid_empty_idx] = 1