From 778d112a1d92be6a16bbbef1e6d23fa33cb8da22 Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Tue, 5 May 2026 15:27:14 +0200
Subject: [PATCH 1/8] [tmva][sofie] Fixes for parsing mlpf model

- After the changes for CLAD the mlpf modek could not be parsed anymore.
Handle now correctly the variable defining the number of non zero elements coming from Non_Zero
- Fixes also TMVA::SOFIE::Copy for different types than float making it a template function
- Add also output shape definition in generated code as it is done for the input
---
 tmva/sofie/inc/TMVA/RModel.hxx            |  3 ++
 tmva/sofie/inc/TMVA/ROperator_NonZero.hxx | 22 ++++++-------
 tmva/sofie/inc/TMVA/SOFIE_common.hxx      |  3 +-
 tmva/sofie/src/RModel.cxx                 | 40 ++++++++++++++++++++++-
 4 files changed, 55 insertions(+), 13 deletions(-)
diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx
index ec4e1115b759d..ed7eca59ec844 100644
--- a/tmva/sofie/inc/TMVA/RModel.hxx
+++ b/tmva/sofie/inc/TMVA/RModel.hxx
@@ -45,6 +45,8 @@ private:
    MemoryPoolInfo fIntermediateMemoryInfo;    ///<!  intermediate memory info (transient)
    std::unordered_map<std::string_view, size_t> fIntermediateTensorFrequencyLookup;    ///<!  lookup table for intermediate tensor frequency (transient)
 
+   std::string fExtraCodeForDimShapes; // extra code needed for initialization of dynamic parameters (e.g. number of non zero elements in NonZero operator)
+
 public:
    /**
        Default constructor. Needed to allow serialization of ROOT objects. See
@@ -108,6 +110,7 @@ public:
 
    void AddShapeTensor(const std::string & name, const std::vector<Dim> & shapeValues, bool scalar = false);
 
+   void AddExtraCodeForDimShapes(const std::string & code) { fExtraCodeForDimShapes += code; }
 
    // add and initialize subgraph to the model
    void InitializeSubGraph(std::shared_ptr<RModel>  graph);
diff --git a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx
index 8587035f8d44b..065789e8d41d6 100644
--- a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx
@@ -19,6 +19,7 @@ private:
 
    std::string fNX;
    std::string fNY;
+   std::string fNonZeroParam; // name of the parameter used to store the number of non zero elements when output is not constant
    std::vector<Dim> fShapeX;
    std::vector<Dim> fShapeY;
 
@@ -93,7 +94,14 @@ public:
          fShapeY[0] = fShapeX.size();
 
          // identify as -1 since we will declare maximum as size of input
-         fShapeY[1] = Dim{std::string("v_NonZero_") + fNX, static_cast<size_t>(-1)};
+         // we will compute at run time the actual number of non zero and rearrange the output vector accordingly
+         fNonZeroParam = "v_NonZero_" + fNX;
+         fShapeY[1] = Dim{fNonZeroParam, static_cast<size_t>(-1)};
+
+         // declare the parameter for number of non zero elements, used when output is not constant
+         auto inputLength = ConvertDimShapeToLength(fShapeX);
+         std::string codeDecl = SP + "size_t " + fNonZeroParam + " = " + inputLength + ";\n";
+         model.AddExtraCodeForDimShapes(codeDecl);
 
          model.AddIntermediateTensor(fNY, ETensorType::INT64, fShapeY);
          if (model.Verbose()) {
@@ -102,14 +110,6 @@ public:
       }
    }
 
-   std::string GenerateSessionMembersCode(std::string /*opName*/) override {
-      if (fIsOutputConstant) return "";
-      // define output value used as max non zero with max size = input shape * N
-      auto inputLength = ConvertDimShapeToLength(fShapeX);
-      std::stringstream out;
-      out << SP << "size_t fV_NonZero_" << fNX << " = " << inputLength << ";\n";
-      return out.str();
-   }
 
    std::string Generate(std::string opName) override {
       if (fIsOutputConstant) {
@@ -127,9 +127,9 @@ public:
          inputLength = ConvertShapeToLength(intShapeX);
 
       size_t dims = fShapeX.size();
-      out << "\n//------ NonZero\n";
+      out << "\n//------ NonZero  -> " << ConvertDimShapeToString(fShapeY) << "\n";
 
-      std::string vnonzero = "v_NonZero_" + fNX;
+      std::string vnonzero = fNonZeroParam;
 
       // loop on input indices
       out << SP << "size_t offset_" << opName << " = 0;\n";
diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
index 9f35cca5f7db3..4b7bb4a29b22d 100644
--- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx
+++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
@@ -779,7 +779,8 @@ inline void Fill(float *output, float value, int size)
    std::fill(output, output + size, value);
 }
 
-inline void Copy(float *output, float const *input, int size)
+template <class T>
+inline void Copy(T *output, T const *input, int size)
 {
    std::copy(input, input + size, output);
 }
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
index 5c30a42619e55..3098303440e72 100644
--- a/tmva/sofie/src/RModel.cxx
+++ b/tmva/sofie/src/RModel.cxx
@@ -1304,7 +1304,7 @@ void RModel::GenerateSessionCode()
 
    // storing the parameters for future checking to avoid mismatches
    if (!fDimShapeNames.empty()) {
-      fGC += "\n\n";
+      fGC += "\n//   dynamic shape parameters\n";
       std::sort(fDimShapeNames.begin(), fDimShapeNames.end());
       for (const auto &p : fDimShapeNames) {
          fGC += "size_t " + memberNameForDimShape(p) + ";\n";
@@ -1361,6 +1361,8 @@ void RModel::GenerateSessionCode()
             fGC += "   " + memberNameForDimShape(p) + " = " + p + ";\n";
          }
       }
+      // add some extra code needed for initialization of dynamic parameters
+      fGC += fExtraCodeForDimShapes;
 
       if (fUseWeightFile) {
          fGC += "\n//--- reading weights from file\n";
@@ -1759,6 +1761,42 @@ void RModel::GenerateRequiredInputTensorInfo()
 
    fGC +=
       "\nconstexpr bool hasDynamicInputTensors{" + std::string{hasDynamicInputTensors ? "true" : "false"} + "};\n\n";
+
+   fGC += "\n// Output tensor dimensions\n";
+   bool hasDynamicOutputTensors = false;
+   for (std::size_t iOutput = 0; iOutput < fOutputTensorNames.size(); ++iOutput) {
+      auto const &name = fOutputTensorNames[iOutput];
+      if (IsDynamicTensor(name)) {
+         hasDynamicOutputTensors = true;
+      }
+      std::vector<Dim> shape = GetDimTensorShape(name);
+      fGC += "constexpr std::array<SingleDim, " + std::to_string(shape.size()) + "> dim_" + name + "{";
+      for (std::size_t iDim = 0; iDim < shape.size(); ++iDim) {
+         auto const &dim = shape[iDim];
+         if (dim.isParam) {
+            fGC += "SingleDim{\"" + dim.GetVal() + "\"}";
+         } else {
+            fGC += "SingleDim{" + dim.GetVal() + "}";
+         }
+         if (iDim != shape.size() - 1) {
+            fGC += ", ";
+         }
+      }
+      fGC += "};\n";
+   }
+   fGC += "\nconstexpr std::array<TensorDims, " + std::to_string(fOutputTensorNames.size()) + "> outputTensorDims{\n";
+   for (std::size_t iOutput = 0; iOutput < fOutputTensorNames.size(); ++iOutput) {
+      auto const &name = fOutputTensorNames[iOutput];
+      fGC += SP + "makeDims(dim_" + name + ")";
+      if (iOutput == fOutputTensorNames.size() - 1) {
+         fGC += "\n";
+      } else {
+         fGC += ",\n";
+      }
+   }
+   fGC += "};\n";
+   fGC +=
+      "\nconstexpr bool hasDynamicOutputTensors{" + std::string{hasDynamicOutputTensors ? "true" : "false"} + "};\n\n";
 }
 
 void RModel::PrintRequiredInputTensors() const {

From 95761367ccc5a0892ae429f90c6f8f22103f7f3f Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Thu, 7 May 2026 19:43:41 +0200
Subject: [PATCH 2/8] [tmva][sofie] Fix a bug in Cast operator in case of
 casting to bool

The casting to bool was incorrect since it was done a cast to uint8.

Fix also the special case of NonZero dynamic parameter which is defindef by NonZero operator. Add at the end a Session data member for the parameter which is then used in creating the output vector

Fix a bug introduced in softmax generated code in the generic case

Fix the writing of the data in initializer lists for uint8_t types

Add correctly new version in RModel.hxx (version 4)
---
 tmva/sofie/inc/TMVA/RModel.hxx            |  3 ++-
 tmva/sofie/inc/TMVA/ROperator_Cast.hxx    | 11 ++++++++---
 tmva/sofie/inc/TMVA/ROperator_NonZero.hxx |  8 ++++++++
 tmva/sofie/inc/TMVA/ROperator_Softmax.hxx | 14 +++++++++-----
 tmva/sofie/inc/TMVA/SOFIE_common.hxx      |  3 +--
 tmva/sofie/src/RModel.cxx                 |  1 +
 6 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx
index ed7eca59ec844..08ad14149aacf 100644
--- a/tmva/sofie/inc/TMVA/RModel.hxx
+++ b/tmva/sofie/inc/TMVA/RModel.hxx
@@ -242,7 +242,8 @@ public:
    bool UseVDT() const { return fUseVDT;}
 
    // Use the ClassDef macro to allow definition of custom streaming
-   ClassDefNV(RModel, 3);
+   // Use Version 0 since we don't support for time being ROOT I/O streaming of RModel objects
+   ClassDefNV(RModel, 4);
 };
 
 // need to implement here templated member functions and its specialization
diff --git a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
index cace65040c772..85f7ac40e6aac 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
@@ -66,8 +66,9 @@ public:
       if (!fIsOutputConstant)
          model.AddIntermediateTensor(fNY, fType, fShape);
       if (model.Verbose()) {
-         std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << ConvertTypeToString(fType) << " for " << fNY
-                  << " shape " << ConvertDimShapeToString(fShape);
+         std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << ConvertTypeToString(fType);
+         if (fType == ETensorType::BOOL) std::cout << " (converted from BOOL) ";
+         std::cout << " for " << fNY << " shape " << ConvertDimShapeToString(fShape);
          if (fIsOutputConstant) std::cout << " (constant) ";
          std::cout << std::endl;
       }
@@ -87,7 +88,11 @@ public:
 
       out << SP << "for (int id = 0; id < " << length << " ; id++){\n";
 
-      out << SP << SP << "tensor_" << fNY << "[id] = static_cast<"<< ConvertTypeToString(fType) << ">(tensor_" << fNX << "[id]);\n";
+      // need to handle bool case separatly since casting to uint8 will not give right result
+      if (fType == ETensorType::BOOL)
+         out << SP << SP << "tensor_" << fNY << "[id] = (tensor_" << fNX << "[id] != 0) ? 1 : 0;\n";
+      else
+         out << SP << SP << "tensor_" << fNY << "[id] = static_cast<"<< ConvertTypeToString(fType) << ">(tensor_" << fNX << "[id]);\n";
 
       out << SP << "}\n";
       return out.str();
diff --git a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx
index 065789e8d41d6..0aebf5b14309b 100644
--- a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx
@@ -101,6 +101,7 @@ public:
          // declare the parameter for number of non zero elements, used when output is not constant
          auto inputLength = ConvertDimShapeToLength(fShapeX);
          std::string codeDecl = SP + "size_t " + fNonZeroParam + " = " + inputLength + ";\n";
+         codeDecl += SP + "fV_NonZero_" + fNX + " = " + fNonZeroParam + ";\n";
          model.AddExtraCodeForDimShapes(codeDecl);
 
          model.AddIntermediateTensor(fNY, ETensorType::INT64, fShapeY);
@@ -110,6 +111,13 @@ public:
       }
    }
 
+   std::string GenerateSessionMembersCode(std::string /*opName*/) override {
+      if (fIsOutputConstant) return "";
+      std::stringstream out;
+      out << SP << "size_t fV_NonZero_" << fNX << " = 0;\n";
+      return out.str();
+   }
+
 
    std::string Generate(std::string opName) override {
       if (fIsOutputConstant) {
diff --git a/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx b/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx
index db79c2b6d0f7d..4d4d16d866e64 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx
@@ -62,12 +62,14 @@ public:
       }
    }
 
-   std::string Generate(std::string OpName) override {
-      OpName = "op_" + OpName;
+   std::string Generate(std::string opName) override {
+      opName = "op_" + opName;
       if (fShape.empty()) {
          throw std::runtime_error("TMVA SOFIE Operator Softmax called to Generate without being initialized first");
       }
       std::stringstream out;
+       out << "///------- Softmax " << opName << " ---> " << fNY << " "
+           << ConvertDimShapeToString(fShape) << "\n" << std::endl;
       size_t size = fShape.size();
       auto length_str = ConvertDimShapeToLength(fShape);
       size_t axis = fAttrAxis < 0 ? size + fAttrAxis : fAttrAxis;
@@ -85,7 +87,7 @@ public:
             num_rows = "(" + length_str + ") / (" + axis_size + ")";
          }
 
-         out << "\n" << SP << "//------ SOFTMAX - " << size << "  " << length_str << "  " << axis << "\n";
+         out << SP << "//-----  softmax axis is last one - " << axis << "\n";
          out << SP << "for (int i = 0; i < " << num_rows << "; ++i) {\n";
          out << SP << SP << "size_t offset = i * " << axis_size << ";\n";
          out << SP << SP << fType << " const * x_ptr = &tensor_" << fNX << "[offset];\n";
@@ -111,6 +113,7 @@ public:
          out << SP << "}\n";
 
       } else {
+         // generic case for any axis
          auto stride = UTILITY::ComputeStrideFromShape(fShape);
          size_t k = 0;
          std::vector<std::string> l(size);
@@ -118,7 +121,7 @@ public:
             if (i != axis) {
                for (size_t j = 0; j < k; j++) out << SP;
                l[i] = std::string("i") + std::to_string(i);
-               out << "for (int " << l[i] << " = 0; " << l[i] << " < " << fShape[i] << "; " << l[i] << "++) {\n";
+               out << SP << "for (int " << l[i] << " = 0; " << l[i] << " < " << fShape[i] << "; " << l[i] << "++) {\n";
                k++;
             }
          }
@@ -167,7 +170,8 @@ public:
          out << "for (int i = 0; i < " << fShape[axis] << "; i++) {\n";
          for (size_t j = 0; j < size; j++) out << SP;
          out << "size_t id = index + i";
-         if (stride[axis].GetVal() != "1") out << "*(" << stride[axis] << ");\n";
+         if (stride[axis].GetVal() != "1") out << "*(" << stride[axis] << ")";
+         out << ";\n";
          for (size_t j = 0; j < size; j++) out << SP;
          out << "tensor_" << fNY << "[id] /= sum;\n";
          if (fLogSoftmax) {
diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
index 4b7bb4a29b22d..8dd6ced1991d7 100644
--- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx
+++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
@@ -231,8 +231,7 @@ std::string ConvertValuesToString(size_t n, const T * data, size_t maxprint = -1
       if (std::is_floating_point_v<T>)
          ret << std::setprecision(std::numeric_limits<T>::max_digits10) << data[i];
       else
-         // cast in case of boolean (int8)
-         ret << data[i];
+         ret << std::to_string(data[i]);
 
       if (i < n-1) ret << ", ";
       if (i < n-1 && i == maxprint-1) ret << "..... ";
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
index 3098303440e72..bfe45d0053f51 100644
--- a/tmva/sofie/src/RModel.cxx
+++ b/tmva/sofie/src/RModel.cxx
@@ -1158,6 +1158,7 @@ void RModel::GenerateOutput()
          // Use the session member (fXxx) when any dim is a runtime-computed identifier
          // (e.g. NonZero count). For expression-type dims derived from input shapes
          // (e.g. "((W+-3)/2+1)"), use the expression directly.
+         // for input shape parameters we don't need to use the session member since it is passed as argument to the infer function and it is not a runtime computed value
          bool hasRuntimeParam = false;
          for (auto const &dim : GetDynamicTensorShape(name)) {
             if (dim.isParam && IsIdentifier(dim.param) && !IsInputTensorShapeParam(dim.param))

From 0e52b822cdb6e89687bb4ff7035d83fe1e3f3ada Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Mon, 11 May 2026 10:43:16 +0200
Subject: [PATCH 3/8] [tmva][sofie] Fix some additional bugs parsing complex
 models

- Fix Where for initialized and Shape tensors. New impelmentation was not  taking into account the Shape tensors. This caused a failure to parse the ATLAS Gnn tracking model

- Fix Slice for trivial copying. Use now std::copy since we cannot use alias tensor anymore after the change of using a free function with a const Session

- Avoid printing tensor names in the comment of Softmax generated code. There is a issue in the function RModel::CollectTensorMemberNames used to get tensor members from Session. The problem if a tensor is gaving as name "tensor_X" and used as member "tensor_tensor_X" the function assume exists a tensor with name "X". This was causing teh Keras parser to crash.

- Fix an issue writing the initialized data when are inf or NaN. Use the function from limits in this case
---
 tmva/sofie/inc/TMVA/ROperator_Reshape.hxx |   4 +-
 tmva/sofie/inc/TMVA/ROperator_Slice.hxx   |   7 +-
 tmva/sofie/inc/TMVA/ROperator_Softmax.hxx |   2 +-
 tmva/sofie/inc/TMVA/ROperator_Where.hxx   | 382 ++++++++++++++--------
 tmva/sofie/inc/TMVA/SOFIE_common.hxx      |  15 +-
 tmva/sofie/src/RModel.cxx                 |  22 +-
 tmva/sofie_parsers/src/ParseWhere.cxx     |   5 +
 7 files changed, 289 insertions(+), 148 deletions(-)

diff --git a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
index 4168144f2e708..41946a33085b5 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
@@ -332,7 +332,7 @@ public:
    }
 
    std::string Generate(std::string opName) override {
-      if (fIsOutputConstant) return "";  //no op for constant tensors
+
 
       std::stringstream out;
       std::string opType = "Reshape";
@@ -345,6 +345,8 @@ public:
 
       out << SP << "///--------" << opType << " operator " << opName << " --> " << ConvertDimShapeToString(fShapeOutput) << "\n";
 
+      if (fIsOutputConstant) return out.str();  //no op for constant tensors
+
       // in case of dynamic output shape we need to set the shape value from input shape tensor
       // and take case of the zero values
       if (fDynamicShape) {
diff --git a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
index d119fa3a29ea1..dfdf492893113 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
@@ -339,7 +339,7 @@ public:
          }
 
          model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), fShapeOutput);
-         if (fIdentitySlice)  model.AddAliasTensor(fNOutput, fNData);
+         //if (fIdentitySlice)  model.AddAliasTensor(fNOutput, fNData);
 
          if (model.Verbose()) {
             std::cout << "Slice " << fNData << "  " << ConvertDimShapeToString(fShapeInput)
@@ -366,8 +366,9 @@ public:
       size_t ndim = fShapeInput.size();
 
       if (fIdentitySlice) {
-         out << "/// Slice is just an identity (copy pointers) \n";
-         out << SP << "tensor_" << fNOutput << " = tensor_" << fNData << ";\n";
+         out << "/// Slice is just an identity (copy) \n";
+         //out << SP << "tensor_" << fNOutput << " = const_cast<" << ConvertTypeToString(fOutputType) << " *>(tensor_" << fNData << ");\n";
+         out << SP << "std::copy(tensor_" << fNData << ", tensor_" << fNData << " + " << ConvertDimShapeToLength(fShapeInput) << ", tensor_" << fNOutput << ");\n";
          return out.str();
       }
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx b/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx
index 4d4d16d866e64..025d6d678088a 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx
@@ -68,7 +68,7 @@ public:
          throw std::runtime_error("TMVA SOFIE Operator Softmax called to Generate without being initialized first");
       }
       std::stringstream out;
-       out << "///------- Softmax " << opName << " ---> " << fNY << " "
+       out << "///------- Softmax " << opName << " ---> "  // << fNY << " "
            << ConvertDimShapeToString(fShape) << "\n" << std::endl;
       size_t size = fShape.size();
       auto length_str = ConvertDimShapeToLength(fShape);
diff --git a/tmva/sofie/inc/TMVA/ROperator_Where.hxx b/tmva/sofie/inc/TMVA/ROperator_Where.hxx
index 4c42ad6d655d9..073c7e1ec19e7 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Where.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Where.hxx
@@ -7,32 +7,36 @@
 
 #include <sstream>
 
-namespace TMVA {
-namespace Experimental {
-namespace SOFIE {
+namespace TMVA{
+namespace Experimental{
+namespace SOFIE{
 
-template <typename T>
-class ROperator_Where final : public ROperator {
+
+
+template<typename T>
+class ROperator_Where final : public ROperator{
 private:
 
    bool fIsInputBoolTensor = false;
 
-   // Tensor names: C = condition, X = true branch, Y = false branch, Z = output
-   std::string fNC;            // condition (bool)
-   std::string fNX;            // true-branch values
-   std::string fNY;            // false-branch values
-   std::string fNZ;            // output
-   std::string fNBroadcastedC;
+
+   std::string fNX;
+   std::string fNY;
+   std::string fNC;
    std::string fNBroadcastedX;
    std::string fNBroadcastedY;
+   std::string fNBroadcastedC;
+   std::string fNZ;
 
-   // Static shapes (used when all inputs are non-dynamic)
-   std::vector<size_t> fShapeC;
+
+
+   // static shapes (used when tensors are not dynamic) )
    std::vector<size_t> fShapeX;
    std::vector<size_t> fShapeY;
+   std::vector<size_t> fShapeC;
    std::vector<size_t> fShapeZ;
 
-   // Dynamic shapes (Dim-aware, used when any input is dynamic)
+   // Dynamic generic shapes
    std::vector<Dim> fDimShapeC;
    std::vector<Dim> fDimShapeX;
    std::vector<Dim> fDimShapeY;
@@ -46,47 +50,37 @@ private:
    int fBroadcastFlag = 0;
 
 public:
-   ROperator_Where() {}
-   ROperator_Where(const std::string &nameC,
-                   const std::string &nameX,
-                   const std::string &nameY,
-                   const std::string &nameZ)
-      : fNC(UTILITY::Clean_name(nameC)),
-        fNX(UTILITY::Clean_name(nameX)),
-        fNY(UTILITY::Clean_name(nameY)),
-        fNZ(UTILITY::Clean_name(nameZ))
-   {
-      fInputTensorNames  = { fNC, fNX, fNY };
-      fOutputTensorNames = { fNZ };
-   }
+   ROperator_Where(){}
+   ROperator_Where(const std::string & nameC, const std::string & nameX, const std::string & nameY, const std::string & nameZ):
+      fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)), fNC(UTILITY::Clean_name(nameC)), fNZ(UTILITY::Clean_name(nameZ)){
+         fInputTensorNames = { fNX, fNY, fNC };
+         fOutputTensorNames = { fNZ };
+      }
 
    // type of output given input
-   std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override
-   {
-      // output type follows X (and Y), not C (which is bool)
-      return { input[1] };
+   std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
+      return input;
    }
 
    // shape of output tensors given input tensors
-   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override
-   {
-      // conservative: assume same shape (broadcasting resolved in Initialize)
-      return { input[1] };
+   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override {
+      // assume now inputs have same shape (no broadcasting)
+      auto ret = std::vector<std::vector<size_t>>(1, input[0]); // return vector size 1 with first input
+      return ret;
    }
 
-   void Initialize(RModel &model) override
-   {
-      // ---------------------------------------------------------------- //
-      //  Check all inputs exist
-      // ---------------------------------------------------------------- //
-      if (!model.CheckIfTensorAlreadyExist(fNC))
-         throw std::runtime_error(std::string("TMVA SOFIE Where Op: condition tensor ") + fNC + " not found in model");
-      if (!model.CheckIfTensorAlreadyExist(fNX))
-         throw std::runtime_error(std::string("TMVA SOFIE Where Op: X tensor ") + fNX + " not found in model");
-      if (!model.CheckIfTensorAlreadyExist(fNY))
-         throw std::runtime_error(std::string("TMVA SOFIE Where Op: Y tensor ") + fNY + " not found in model");
-
-      // condition tensor is bool (uint8) - mark if it is a live input tensor
+   void Initialize(RModel& model) override {
+      // input must be a graph input, or already initialized intermediate tensor
+      if (!model.CheckIfTensorAlreadyExist(fNX)){
+         throw std::runtime_error(std::string("TMVA SOFIE Where Op Input Tensor ") + fNX + "is not found in model");
+      }
+      if (!model.CheckIfTensorAlreadyExist(fNY)) {
+         throw std::runtime_error(std::string("TMVA SOFIE Where Op Input Tensor ") + fNY + "is not found in model");
+      }
+      if (!model.CheckIfTensorAlreadyExist(fNC)) {
+         throw std::runtime_error(std::string("TMVA SOFIE Where Op Input Tensor ") + fNC + "is not found in model");
+      }
+      // check if fNC input tensor is boolean
       if (model.IsReadyInputTensor(fNC))
          fIsInputBoolTensor = true;
 
@@ -117,13 +111,14 @@ public:
          fDimShapeY = ConvertShapeToDim(fShapeY);
       }
 
+
       if (model.Verbose()) {
          if (dynamicInputs & 1)
             std::cout << "Where : condition " << fNC << " is dynamic " << ConvertDimShapeToString(fDimShapeC) << "\n";
          if (dynamicInputs & 2)
-            std::cout << "Where : X " << fNX << " is dynamic " << ConvertDimShapeToString(fDimShapeX) << "\n";
+            std::cout << "Where :  " << fNX << " is dynamic " << ConvertDimShapeToString(fDimShapeX) << "\n";
          if (dynamicInputs & 4)
-            std::cout << "Where : Y " << fNY << " is dynamic " << ConvertDimShapeToString(fDimShapeY) << "\n";
+            std::cout << "Where : Y " << fNZ << " is dynamic " << ConvertDimShapeToString(fDimShapeZ) << "\n";
       }
 
       // ---------------------------------------------------------------- //
@@ -131,79 +126,186 @@ public:
       // ---------------------------------------------------------------- //
       if (dynamicInputs == 0) {
 
-         // Multidirectional broadcast over all three tensors
-         auto retXY = UTILITY::MultidirectionalBroadcastShape(fShapeX, fShapeY);
-         fBroadcastFlag = retXY.first;
-         fShapeZ = retXY.second;
-         // also factor in C
-         auto retCZ = UTILITY::MultidirectionalBroadcastShape(fShapeC, fShapeZ);
-         fBroadcastFlag |= retCZ.first;
-         fShapeZ = retCZ.second;
-
-         bool allConstant = model.IsInitializedTensor(fNC) &&
-                            model.IsInitializedTensor(fNX) &&
-                            model.IsInitializedTensor(fNY);
-
-         if (allConstant) {
-            // ----------------------------------------------------------
-            //  Constant folding: evaluate Where at model initialisation
-            // ----------------------------------------------------------
-            auto broadcastIfNeeded = [&](const std::string &name,
-                                         const std::vector<size_t> &shape,
-                                         std::string &bcName,
-                                         const std::string &prefix) {
-               if (shape != fShapeZ) {
-                  bcName = prefix + name + "to" + fNZ;
-                  auto data = model.GetInitializedTensorData(name);
-                  std::shared_ptr<void> bcData(
-                     UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), shape, fShapeZ),
+         bool broadcast = !UTILITY::AreSameShape(fShapeX, fShapeY) || !UTILITY::AreSameShape(fShapeX, fShapeC);
+         if (broadcast) {
+            // find shape to broadcast between X,Y,C looking for max length
+            size_t lengthX = ConvertShapeToLength(fShapeX);
+            size_t lengthY = ConvertShapeToLength(fShapeY);
+            size_t lengthC = ConvertShapeToLength(fShapeC);
+            bool broadcastX = false, broadcastY = false, broadcastC = false;
+            if (lengthX >= lengthY && lengthX >= lengthC) {
+               fShapeZ = fShapeX;
+               // broadcast Y and C if different than X
+               broadcastY = (lengthY != lengthX);
+               broadcastC = (lengthC != lengthX);
+            } else if (lengthY >= lengthX && lengthY >= lengthC) {
+               fShapeZ = fShapeY;
+               // broadcast X and C if different than Y
+               broadcastX = (lengthX != lengthY);
+               broadcastC = (lengthC != lengthY);
+            } else if (lengthC >= lengthX && lengthC >= lengthY) {
+               fShapeZ = fShapeC;
+               // broadcast X and Y if different than C
+               broadcastX = (lengthX != lengthC);
+               broadcastY = (lengthY != lengthC);
+            }
+
+            // Broadcast X to Z
+            if (broadcastX) {
+               fNBroadcastedX = "BC_" + fNX + "_to_" + fNZ;
+               if (model.IsInitializedTensor(fNX)) {
+                  auto data = model.GetInitializedTensorData(fNX);
+                  std::shared_ptr<void> broadcastedData(
+                     UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), fShapeX, fShapeZ),
                      std::default_delete<T[]>());
-                  model.AddConstantTensor(bcName, model.GetTensorType(name), fShapeZ, bcData);
+                  // Update the data and the shape of X
+                  model.AddConstantTensor(fNBroadcastedX, model.GetTensorType(fNX), fShapeZ, broadcastedData);
+                  fShapeX = fShapeZ;
+               } else {
+                  // I need to prepend to shape of X the extra dimensions added for broadcasting to Z
+                  if (fShapeX.size() < fShapeZ.size()) {
+                     size_t nPrepend = fShapeZ.size() - fShapeX.size();
+                     fShapeX.insert(fShapeX.begin(), nPrepend, 1);
+                  }
                }
-            };
-
-            broadcastIfNeeded(fNX, fShapeX, fNBroadcastedX, "BC_");
-            broadcastIfNeeded(fNY, fShapeY, fNBroadcastedY, "BC_");
-            broadcastIfNeeded(fNC, fShapeC, fNBroadcastedC, "BC_");
+            }
+            // Broadcast Y to Z
+            if (broadcastY) {
+               fNBroadcastedY = "BC_" + fNY + "_to_" + fNZ;
+               if (model.IsInitializedTensor(fNY)) {
+                  auto data = model.GetInitializedTensorData(fNY);
+                  std::shared_ptr<void> broadcastedData(
+                     UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), fShapeY, fShapeZ),
+                     std::default_delete<T[]>());
+                  // do not update tensor B but add broadcasted one (since it can be input to some other operators)
+                  model.AddConstantTensor(fNBroadcastedY, model.GetTensorType(fNY), fShapeZ, broadcastedData);
+                  fShapeY = fShapeZ;
+               } else {
+                  // I need to prepend to shape of Y the extra dimensions added for broadcasting to Z
+                  if (fShapeY.size() < fShapeZ.size()) {
+                     size_t nPrepend = fShapeZ.size() - fShapeY.size();
+                     fShapeY.insert(fShapeY.begin(), nPrepend, 1);
+                  }
 
-            const std::string &nameC = fNBroadcastedC.empty() ? fNC : fNBroadcastedC;
-            const std::string &nameX = fNBroadcastedX.empty() ? fNX : fNBroadcastedX;
-            const std::string &nameY = fNBroadcastedY.empty() ? fNY : fNBroadcastedY;
+               }
+            }
+            // Broadcast C to Z
+            if (broadcastC) {
+               fNBroadcastedC = "BC_" + fNC + "_to_" + fNZ;
+               if (model.IsInitializedTensor(fNC)) {
+                  auto data = model.GetInitializedTensorData(fNC);
+                  std::shared_ptr<void> broadcastedData(
+                     UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), fShapeC, fShapeZ),
+                     std::default_delete<T[]>());
+                  // do not update tensor C but add broadcasted one (since it can be input to some other operators)
+                  model.AddConstantTensor(fNBroadcastedC, model.GetTensorType(fNC), fShapeZ, broadcastedData);
+                  fShapeC = fShapeZ;
+               } else {
+                  // I need to prepend to shape of C the extra dimensions added for broadcasting to Z
+                  if (fShapeC.size() < fShapeZ.size()) {
+                     size_t nPrepend = fShapeZ.size() - fShapeC.size();
+                     fShapeC.insert(fShapeC.begin(), nPrepend, 1);
+                  }
+               }
+            }
+         } else {
+            fShapeZ = fShapeX;
+         }
+         // check case of constant  output (if all inputs are defined)
+         if (model.IsInitializedTensor(fNC)) {
 
+            std::cout << "Where op: " << fNC << " is initialized\n";
+            std::string nameC = fNBroadcastedC.empty() ? fNC : fNBroadcastedC;
             auto dataC = static_cast<bool *>(model.GetInitializedTensorData(nameC).get());
-            auto dataX = static_cast<T *>   (model.GetInitializedTensorData(nameX).get());
-            auto dataY = static_cast<T *>   (model.GetInitializedTensorData(nameY).get());
-
-            size_t len = ConvertShapeToLength(fShapeZ);
-            std::vector<T> dataZ(len);
-            for (size_t i = 0; i < len; ++i)
-               dataZ[i] = dataC[i] ? dataX[i] : dataY[i];
-
-            model.AddConstantTensor<T>(fNZ, fShapeZ, dataZ.data());
             model.SetNotWritableInitializedTensor(nameC);
-            model.SetNotWritableInitializedTensor(nameX);
-            model.SetNotWritableInitializedTensor(nameY);
+            T *dataX = nullptr;
+            T *dataY = nullptr;
+            std::vector<Dim> shapeDataX;
+            std::vector<Dim> shapeDataY;
+            if (model.IsInitializedTensor(fNX)) {
+               std::cout << "Where op: " << fNX << " is initialized\n";
+               std::string nameX = fNBroadcastedX.empty() ? fNX : fNBroadcastedX;
+               dataX = static_cast<T *>(model.GetInitializedTensorData(nameX).get());
+               // flag tensors to not be written in a file
+               model.SetNotWritableInitializedTensor(nameX);
+            } else if (model.IsShapeTensor(fNX)) {
+               std::cout << "Where op: " << fNX << " is a shape tensor\n";
+               shapeDataX = model.GetShapeTensorValues(fNX);
+            }
+            if (model.IsInitializedTensor(fNY)) {
+               std::cout << "Where op: " << fNY << " is initialized\n";
+               std::string nameY = fNBroadcastedY.empty() ? fNY : fNBroadcastedY;
+               dataY = static_cast<T *>(model.GetInitializedTensorData(nameY).get());
+               model.SetNotWritableInitializedTensor(nameY);
+            } else if (model.IsShapeTensor(fNY)) {
+               std::cout << "Where op: " << fNY << " is a shape tensor\n";
+               shapeDataY = model.GetShapeTensorValues(fNY);
+            }
+            std::vector<T> dataZ;        // used in case output is constant tensor
+            std::vector<Dim> shapeDataZ; // used in case output is a shape tensor (can be also constant if all
+                                         // dimensions are not parametric)
+            // if fNC (condition) is initialized we know the output is a shape or a constant tensor,
+            // so we can compute it at initialization and add it as a constant tensor to the model
+            // (and not add the operator output as intermediate tensor to the model)
+            bool isOutputConstantTensor = true;
+            if (dataX && dataY) {
+               dataZ.resize(ConvertShapeToLength(fShapeZ));
+               for (size_t i = 0; i < dataZ.size(); i++)
+                  dataZ[i] = (dataC[i]) ? dataX[i] : dataY[i];
+               std::cout << "data A and B : dataZ constant: " << ConvertValuesToString(dataZ) << std::endl;
+            } else if (dataX && shapeDataY.size() > 0) {
+               shapeDataZ.resize(ConvertShapeToLength(fShapeZ));
+               for (size_t i = 0; i < shapeDataZ.size(); i++) {
+                  shapeDataZ[i] = (dataC[i]) ? Dim{size_t(dataX[i])} : shapeDataY[i];
+                  isOutputConstantTensor &= !shapeDataZ[i].isParam;
+               }
+               std::cout << "data A but shapeB " << ConvertDimShapeToString(shapeDataY) << "  "
+                         << isOutputConstantTensor << std::endl;
+            } else if (dataY && shapeDataX.size() > 0) {
+               shapeDataZ.resize(ConvertShapeToLength(fShapeZ));
+               for (size_t i = 0; i < shapeDataZ.size(); i++) {
+                  shapeDataZ[i] = (dataC[i]) ? shapeDataY[i] : Dim{size_t(dataY[i])};
+                  isOutputConstantTensor &= !shapeDataZ[i].isParam;
+               }
+               std::cout << "data B but shapeA " << ConvertDimShapeToString(shapeDataX) << "  "
+                         << isOutputConstantTensor << std::endl;
+            } else if (shapeDataY.size() > 0 && shapeDataX.size() > 0) {
+               shapeDataZ.resize(ConvertShapeToLength(fShapeZ));
+               for (size_t i = 0; i < shapeDataZ.size(); i++) {
+                  shapeDataZ[i] = (dataC[i]) ? shapeDataX[i] : shapeDataY[i];
+                  isOutputConstantTensor &= !shapeDataZ[i].isParam;
+               }
+               std::cout << " shapeA and B " << ConvertDimShapeToString(shapeDataX) << " shapeB "
+                         << ConvertDimShapeToString(shapeDataY) << "  " << isOutputConstantTensor << std::endl;
+            }
             fIsOutputConstant = true;
-            fOutputTensorNames.pop_back();
-
-            if (model.Verbose())
-               std::cout << "Where --> " << fNZ << " " << ConvertShapeToString(fShapeZ)
-                         << " : " << ConvertValuesToString(dataZ) << " (constant)\n";
-         } else {
-            // ----------------------------------------------------------
-            //  Non-constant static tensors - we don't need to broadcast tensors
-            // ----------------------------------------------------------
+            // add as constant or shape tensor depending on the case
+            if (dataZ.size() > 0)
+               model.AddConstantTensor<T>(fNZ, fShapeZ, dataZ.data());
+            else if (shapeDataZ.size() > 0)
+               model.AddShapeTensor(fNZ, shapeDataZ, fShapeZ.size() == 0);
+            else {
+               fIsOutputConstant = false;
+            }
+            if (fIsOutputConstant && model.Verbose())
+               std::cout << "Where op ---> " << fNZ << "  " << ConvertShapeToString(fShapeZ) << " : "
+                         << ((dataZ.size() > 0) ? ConvertValuesToString(dataZ) : ConvertDimShapeToString(shapeDataZ))
+                         << ((dataZ.size() > 0) ? " (constant)" : " (shape)") << std::endl;
+
+            // output is a constant tensor
+            if (fIsOutputConstant)
+               fOutputTensorNames.pop_back();
+         }
+         if (!fIsOutputConstant) {
 
             fDimShapeZ = ConvertShapeToDim(fShapeZ);
             model.AddIntermediateTensor(fNZ, model.GetTensorType(fNX), fShapeZ);
-
             if (model.Verbose())
-               std::cout << "Where : C=" << fNC << " " << ConvertShapeToString(fShapeC)
-                         << "  X=" << fNX << " " << ConvertShapeToString(fShapeX)
-                         << "  Y=" << fNY << " " << ConvertShapeToString(fShapeY)
-                         << " --> Z=" << fNZ << " " << ConvertShapeToString(fShapeZ) << "\n";
+               std::cout << "Where : condition : " << fNC << "  " << ConvertShapeToString(fShapeC) << " X "
+                         << fNX << "  " << ConvertShapeToString(fShapeX) << " Y " << fNY << "  "
+                         << ConvertShapeToString(fShapeY) << " ---> " << fNZ << "  " << ConvertShapeToString(fShapeZ)
+                         << std::endl;
          }
-
       } else {
          // ---------------------------------------------------------------- //
          //  Dynamic path: at least one input has a parametric shape
@@ -227,7 +329,7 @@ public:
             for (size_t i = 0; i < fDimShapeZ.size(); i++) {
                auto &s = fDimShapeZ[i];
                if (s.isParam && s.param.find("std::max") != std::string::npos) {
-                  // prefer X dim over Y dim
+                  // prefer A dim over B dim
                   if (i < fDimShapeX.size() && IsInputDimParam(fDimShapeX[i].param)) {
                      s = (fDimShapeX[i].dim != 1) ? fDimShapeX[i] : fDimShapeY[i];
                   } else if (i < fDimShapeY.size() && IsInputDimParam(fDimShapeY[i].param)) {
@@ -236,35 +338,42 @@ public:
                }
             }
          }
+         // I need to prepend to shape of X,Y,C the extra dimensions added for broadcasting to Z
+         if (fDimShapeX.size() < fDimShapeZ.size()) {
+            size_t nPrepend = fDimShapeZ.size() - fDimShapeX.size();
+            fDimShapeX.insert(fDimShapeX.begin(), nPrepend, Dim{1});
+         }
+         if (fDimShapeY.size() < fDimShapeZ.size()) {
+            size_t nPrepend = fDimShapeZ.size() - fDimShapeY.size();
+            fDimShapeY.insert(fDimShapeY.begin(), nPrepend, Dim{1});
+         }
+         if (fDimShapeC.size() < fDimShapeZ.size()) {
+            size_t nPrepend = fDimShapeZ.size() - fDimShapeC.size();
+            fDimShapeC.insert(fDimShapeC.begin(), nPrepend, Dim{1});
+         }
 
          model.AddIntermediateTensor(fNZ, model.GetTensorType(fNX), fDimShapeZ);
 
          if (model.Verbose())
             std::cout << "Where (dynamic) : C=" << ConvertDimShapeToString(fDimShapeC)
-                      << "  X=" << ConvertDimShapeToString(fDimShapeX)
-                      << "  Y=" << ConvertDimShapeToString(fDimShapeY)
-                      << " --> Z=" << ConvertDimShapeToString(fDimShapeZ) << "\n";
+                      << "  A=" << ConvertDimShapeToString(fDimShapeX)
+                      << "  B=" << ConvertDimShapeToString(fDimShapeY)
+                      << " --> Y=" << ConvertDimShapeToString(fDimShapeZ) << "\n";
       }
    }
 
-   std::string GenerateInitCode() override
-   {
+   std::string GenerateInitCode() override {
       std::stringstream out;
       return out.str();
    }
 
-   std::string Generate(std::string opName) override
-   {
-      if (fIsOutputConstant) return "";
+   std::string Generate(std::string opName) override {
 
       opName = "op_" + opName;
-
-      if (fDimShapeZ.empty()) {
-         throw std::runtime_error("TMVA SOFIE Where Op called to Generate without being initialized first");
-      }
-
       std::stringstream out;
       out << SP << "\n//------ WHERE " << opName << " --> " << ConvertDimShapeToString(fDimShapeZ) << "\n";
+      if (fIsOutputConstant) return out.str();
+
 
       // ---------------------------------------------------------------- //
       //  Runtime broadcast validation (dynamic shapes, flag bit 4)
@@ -281,14 +390,14 @@ public:
                out << SP << SP << "if (" << fDimShapeX[i] << " != 1 && "
                    << fDimShapeX[i] << " != " << fDimShapeZ[i] << ")\n";
                out << SP << SP << SP
-                   << "throw std::runtime_error(\"SOFIE Where: cannot broadcast X dim " << i << " in " << opName << "\");\n";
+                   << "throw std::runtime_error(\"SOFIE Where: cannot broadcast A dim " << i << " in " << opName << "\");\n";
             }
             // validate Y vs Z
             if (i < fDimShapeY.size() && fDimShapeY[i].isParam) {
                out << SP << SP << "if (" << fDimShapeY[i] << " != 1 && "
                    << fDimShapeY[i] << " != " << fDimShapeZ[i] << ")\n";
                out << SP << SP << SP
-                   << "throw std::runtime_error(\"SOFIE Where: cannot broadcast Y dim " << i << " in " << opName << "\");\n";
+                   << "throw std::runtime_error(\"SOFIE Where: cannot broadcast B dim " << i << " in " << opName << "\");\n";
             }
             // validate C vs Z
             if (i < fDimShapeC.size() && fDimShapeC[i].isParam) {
@@ -300,10 +409,8 @@ public:
          }
          out << SP << "}\n";
       }
-
+      // implement now where using teh strides and looping on the different dimensions
       // ---------------------------------------------------------------- //
-      //  Runtime for non-constant, non-initialised tensors
-      //
       //  Generate loop(s) with per-dimension stride-based index arithmetic
       // ---------------------------------------------------------------- //
       auto stridesX = UTILITY::ComputeStrideFromShape(fDimShapeX);
@@ -320,6 +427,7 @@ public:
             return "0";
          std::string expr;
          size_t offset = rankZ - dimShape.size();
+         std::cout << rankZ << "  " << dimShape.size() << "  " << offset << std::endl;
          for (size_t i = 0; i < dimShape.size(); ++i) {
             if (dimShape[i].dim == 1 || dimShape[i].GetVal() == "1") continue;
             expr += "idx_" + std::to_string(i + offset);
@@ -336,9 +444,10 @@ public:
       std::string idxY = buildIdxExpr(fDimShapeY, stridesY, fDimShapeZ.size());
       std::string idxC = buildIdxExpr(fDimShapeC, stridesC, fDimShapeZ.size());
 
-      // Emit nested loops over output shape
+       // Emit nested loops over output shape
       int nloop = 0;
       std::string idxZ;
+      // case Z is a scalar (all dimensions are 1) or Z has no dimension
       if (fDimShapeZ.empty() ||
           std::all_of(fDimShapeZ.begin(), fDimShapeZ.end(),
                       [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
@@ -375,10 +484,13 @@ public:
 
       return out.str();
    }
+
+
 };
 
-} // namespace SOFIE
-} // namespace Experimental
-} // namespace TMVA
+}//SOFIE
+}//Experimental
+}//TMVA
+
 
-#endif // TMVA_SOFIE_ROperator_Where
+#endif //TMVA_SOFIE_ROperator_Where
diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
index 8dd6ced1991d7..ff206db95f981 100644
--- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx
+++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
@@ -228,11 +228,18 @@ std::string ConvertValuesToString(size_t n, const T * data, size_t maxprint = -1
    std::stringstream ret;
    ret << "{ ";
    for (size_t i = 0; i < std::min(n,maxprint); i++) {
-      if (std::is_floating_point_v<T>)
-         ret << std::setprecision(std::numeric_limits<T>::max_digits10) << data[i];
-      else
+      if (std::is_floating_point_v<T>) {
+         // special case for infinity and Nan
+         if (std::isinf(data[i]))
+            ret << (data[i] > 0 ? "std::numeric_limits<" + TensorType<T>::Name() + ">::infinity()" :
+                                  "-std::numeric_limits<" + TensorType<T>::Name() + ">::infinity()");
+         else if (std::isnan(data[i]))
+            ret << "std::numeric_limits<" + TensorType<T>::Name() + ">::quiet_NaN()";
+         else
+            ret << std::setprecision(std::numeric_limits<T>::max_digits10) << data[i];
+      } else {
          ret << std::to_string(data[i]);
-
+      }
       if (i < n-1) ret << ", ";
       if (i < n-1 && i == maxprint-1) ret << "..... ";
    }
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
index bfe45d0053f51..037c9f292fc3b 100644
--- a/tmva/sofie/src/RModel.cxx
+++ b/tmva/sofie/src/RModel.cxx
@@ -763,6 +763,7 @@ std::string GenerateConstantTensorCode(const std::pair<std::string, InitializedT
 
    // and check if all values are the same
    bool sameData = false;
+
    // for non stack allocation check if data are the same
    if (!allocateOnStack && length > 1) {
       size_t idx = 1;
@@ -797,6 +798,19 @@ void RModel::GenerateInitializedTensorInfo()
       size_t length = ConvertShapeToLength(i.second.shape());
       if (!fUseWeightFile || i.second.IsConstantTensor() || !i.second.IsWeightTensor() || i.second.type() != ETensorType::FLOAT ) {
          if (i.second.type() == ETensorType::FLOAT) {
+            // check if NaN of Inf are inside tensor data
+            bool hasInfOrNaN = false;
+            const float *data = i.second.data<float>();
+            for (size_t idx = 0; idx < length; idx++) {
+               if (std::is_floating_point<float>::value) {
+                  if (std::isinf(data[idx]) || std::isnan(data[idx])) {
+                     hasInfOrNaN = true;
+                     break;
+                  }
+               }
+            }
+            if (hasInfOrNaN)
+               AddNeededStdLib("limits");
             fGC += GenerateConstantTensorCode<float>(i);
             fConstantTensorSize += length * sizeof(float);
          } else if (i.second.type() == ETensorType::INT64) {
@@ -1306,8 +1320,9 @@ void RModel::GenerateSessionCode()
    // storing the parameters for future checking to avoid mismatches
    if (!fDimShapeNames.empty()) {
       fGC += "\n//   dynamic shape parameters\n";
-      std::sort(fDimShapeNames.begin(), fDimShapeNames.end());
-      for (const auto &p : fDimShapeNames) {
+      auto dimShapeNames = fDimShapeNames;
+      std::sort(dimShapeNames.begin(), dimShapeNames.end());
+      for (const auto &p : dimShapeNames) {
          fGC += "size_t " + memberNameForDimShape(p) + ";\n";
       }
    }
@@ -1345,8 +1360,7 @@ void RModel::GenerateSessionCode()
       // add initialization of shape parameters
       // assume all parameters are of type size_t
       if (!fDimShapeNames.empty()) {
-         // sort first the shape parameters in alphabetical order to avoid a random order
-         std::sort(fDimShapeNames.begin(), fDimShapeNames.end() );
+         // need to use same order as in infer function not alphabetical one
          for (auto &p : fDimShapeNames) {
             fGC += ",\n";
             fGC += "        size_t " + p + " = " + fShapeParams[p];
diff --git a/tmva/sofie_parsers/src/ParseWhere.cxx b/tmva/sofie_parsers/src/ParseWhere.cxx
index 6ebcf161e5012..dc4b436282cab 100644
--- a/tmva/sofie_parsers/src/ParseWhere.cxx
+++ b/tmva/sofie_parsers/src/ParseWhere.cxx
@@ -12,6 +12,10 @@ ParserFuncSignature ParseWhere = [](RModelParser_ONNX &parser, const onnx::NodeP
       throw std::runtime_error("TMVA::SOFIE ONNX Parser Where op has invalid input size");
    }
    // condition boolean vector is input 0
+   if (!parser.IsRegisteredTensorType(nodeproto.input(0))){
+      throw std::runtime_error("TMVA::SOFIE ONNX Parser Where op has input tensor " +  nodeproto.input(0)
+                                + " but its type is not yet registered");
+   }
    if (!parser.IsRegisteredTensorType(nodeproto.input(1))){
       throw std::runtime_error("TMVA::SOFIE ONNX Parser Where op has input tensor " +  nodeproto.input(1)
                                 + " but its type is not yet registered");
@@ -31,6 +35,7 @@ ParserFuncSignature ParseWhere = [](RModelParser_ONNX &parser, const onnx::NodeP
    std::string output_name = nodeproto.output(0);
 
    switch (input_type) {
+   //note ROPeratore_WHere signature takes as first tensor the condition
    case ETensorType::FLOAT:
       op.reset(new ROperator_Where<float>(nodeproto.input(0), nodeproto.input(1), nodeproto.input(2), output_name));
       break;

From 1748d4ab643ffb9cd5d81f84b16208c2e7a80644 Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Mon, 11 May 2026 18:05:37 +0200
Subject: [PATCH 4/8] [tmva][sofie] Fix a bug in Gather operator when output is
 a param shape tensor

When output is a param shape tensor the tensor values were not assigned in initialization as in a constant tensor, they need to be set at run time in the infer function
because they depend on the provided dynamic shale values

Fix also a issue on Windows in the new COnvertValuesTOString implementation dealing with inf values. Create a specialisation for float or double which will handle the infinity values in numerical limits.
---
 tmva/sofie/inc/TMVA/ROperator_Gather.hxx  | 27 +++++++++----
 tmva/sofie/inc/TMVA/ROperator_Gemm.hxx    |  4 +-
 tmva/sofie/inc/TMVA/ROperator_Reshape.hxx |  1 -
 tmva/sofie/inc/TMVA/SOFIE_common.hxx      | 49 ++++++++++++++++-------
 4 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
index ad91d1256ded1..df72e29fcc85d 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
@@ -17,6 +17,7 @@ class ROperator_Gather final : public ROperator
 {
 private:
 
+   bool fIsOutputParamShape = false; // for shape outputs
    int64_t fAttrAxis = 0;
 
    std::string fNX;
@@ -26,11 +27,13 @@ private:
    std::vector<Dim> fShapeX;
    std::vector<Dim> fShapeIndices;
    std::vector<Dim> fShapeY;
+   std::vector<Dim> fOutputShapeData;
 
    std::vector<int64_t> fIndices;  // indices vector in case they are known at initialization
 
    std::string fType;
 
+
 public:
    ROperator_Gather(){}
    ROperator_Gather(int64_t attrAxis, std::string nameX, std::string nameIndices, std::string nameY):
@@ -121,17 +124,17 @@ public:
       else if (model.IsShapeTensor(fNX) && q <=1  && fIndices.size() > 0) {
          auto inputData = model.GetShapeTensorValues(fNX);
          // if r == 1 and q<=1 then output length is 1 (is a scalar or tensor of size1)
-         std::vector<Dim> outputData(1);
-         outputData[0] = inputData[fIndices[0]];
-         if (outputData[0].isParam) {
-            fIsOutputConstant = true;
+         fOutputShapeData.resize(1);
+         fOutputShapeData[0] = inputData[fIndices[0]];
+         if (fOutputShapeData[0].isParam) {
+            fIsOutputParamShape = true;
             // shapeY can be scalar or vector of size1
-            model.AddShapeTensor(fNY, outputData, fShapeY.size() == 0);
+            model.AddShapeTensor(fNY, fOutputShapeData, fShapeY.size() == 0);
             if (model.Verbose())
                std::cout << "Gather: " << fNX << " " << ConvertDimShapeToString(fShapeX) << " -> " << fNY << " with shape " << ConvertDimShapeToString(fShapeY)
-                   << " and values " << ConvertDimShapeToString(outputData) << " (shape) " << std::endl;
+                   << " and values " << ConvertDimShapeToString(fOutputShapeData) << " (shape) " << std::endl;
          } else {
-            int64_t value = static_cast<int64_t>(outputData[0].dim);
+            int64_t value = static_cast<int64_t>(fOutputShapeData[0].dim);
             auto shapeY = ConvertShapeToInt(fShapeY);
             model.AddConstantTensor(fNY, shapeY, &value);
             fIsOutputConstant = true;
@@ -140,7 +143,7 @@ public:
                    << " and values {" << value <<  "} (constant) " << std::endl;
          }
       }
-      if (!fIsOutputConstant) {
+      if (!fIsOutputConstant && !fIsOutputParamShape) {
          // Add output tensor
          model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
          fType = ConvertTypeToString(model.GetTensorType(fNX));
@@ -159,6 +162,14 @@ public:
          out << "//--------------------(constant)----------\n";
          return out.str();
       }
+      if (fIsOutputParamShape) {
+         // no code to generate here for param shape output. Tensor output is defined in Session constructor
+         out << "//--------------------(shape)----------\n";
+         for (int i = 0; i < static_cast<int>(fOutputShapeData.size()); i++) {
+            out << SP << "tensor_" << fNY << "[" << i << " ] = " << fOutputShapeData[i].GetVal() << ";\n";
+         }
+         return out.str();
+      }
       // The shape of the output is q + r - 1
       size_t r = fShapeX.size();
       // Indices of shape q
diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
index c8ea219f4e228..e3fe93e7a8184 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
@@ -344,7 +344,9 @@ namespace SOFIE{
             //in this case fAttrBeta needs to be equal to zero otherwise second time we run we will use
             // the previous result
             if (fAttrBeta != 0) {
-               throw std::runtime_error("TMVA SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero");
+               // some model don't have bias but Beta is not zero - force it to zero
+               fAttrBeta = 0;
+               std::cout << "WARNING: TMVA SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero - force it to zero";
             }
          }
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
index 41946a33085b5..bbb55252300e2 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
@@ -293,7 +293,6 @@ public:
          }
       } else if (!fAttrAxes.empty()) {
          // case fNShape is empty and axes are provided as attributes (e.g. for Unsqueeze)
-         std::cout << "attribute axes exists\n";
          fShapeOutput = ShapeInference({fShapeInput})[0];
       } else if (fOpMode == Flatten || fOpMode == Squeeze) {
          fShapeOutput = ShapeInference({fShapeInput})[0];
diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
index ff206db95f981..d921d5ae1f772 100644
--- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx
+++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
@@ -215,9 +215,39 @@ std::string ConvertDimShapeToLength(const std::vector<Dim> & shape);
 template<class T>
 std::string ConvertValToString(T value) {
    std::stringstream ret;
-   if (std::is_floating_point_v<T>)
-      ret << std::setprecision(std::numeric_limits<T>::max_digits10);
-   ret << value;
+   ret << std::to_string(value);
+   return ret.str();
+}
+// float specialization
+template<>
+inline std::string ConvertValToString<float>(float value) {
+   std::stringstream ret;
+   // special case for infinity and Nan
+   if (std::isinf(value))
+         ret << (value > 0 ? "std::numeric_limits<float>::infinity()" :
+                                  "-std::numeric_limits<float>::infinity()");
+   else if (std::isnan(value))
+         ret << "std::numeric_limits<float>::quiet_NaN()";
+   else {
+      ret << std::setprecision(std::numeric_limits<float>::max_digits10);
+      ret << value;
+   }
+   return ret.str();
+}
+// double specialization
+template<>
+inline std::string ConvertValToString<double>(double value) {
+   std::stringstream ret;
+   // special case for infinity and Nan
+   if (std::isinf(value))
+         ret << (value > 0 ? "std::numeric_limits<double>::infinity()" :
+                                  "-std::numeric_limits<double>::infinity()");
+   else if (std::isnan(value))
+         ret << "std::numeric_limits<double>::quiet_NaN()";
+   else {
+      ret << std::setprecision(std::numeric_limits<double>::max_digits10);
+      ret << value;
+   }
    return ret.str();
 }
 
@@ -228,18 +258,7 @@ std::string ConvertValuesToString(size_t n, const T * data, size_t maxprint = -1
    std::stringstream ret;
    ret << "{ ";
    for (size_t i = 0; i < std::min(n,maxprint); i++) {
-      if (std::is_floating_point_v<T>) {
-         // special case for infinity and Nan
-         if (std::isinf(data[i]))
-            ret << (data[i] > 0 ? "std::numeric_limits<" + TensorType<T>::Name() + ">::infinity()" :
-                                  "-std::numeric_limits<" + TensorType<T>::Name() + ">::infinity()");
-         else if (std::isnan(data[i]))
-            ret << "std::numeric_limits<" + TensorType<T>::Name() + ">::quiet_NaN()";
-         else
-            ret << std::setprecision(std::numeric_limits<T>::max_digits10) << data[i];
-      } else {
-         ret << std::to_string(data[i]);
-      }
+      ret << ConvertValToString(data[i]);
       if (i < n-1) ret << ", ";
       if (i < n-1 && i == maxprint-1) ret << "..... ";
    }

From 84c2715807307331f436e27b11980a5b4ac75ce2 Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Fri, 15 May 2026 23:17:56 +0200
Subject: [PATCH 5/8] [tmva][sofie] Apply fixes for shape tensors

Fix some operators when input and/or output is a shape tensors

Fix also in Gemm when broadcasting dynamic shape for the bias
---
 tmva/sofie/inc/TMVA/ROperator.hxx             |   2 +-
 tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx |   8 +-
 tmva/sofie/inc/TMVA/ROperator_Concat.hxx      | 174 +++++++++++-------
 tmva/sofie/inc/TMVA/ROperator_Gather.hxx      |  11 +-
 tmva/sofie/inc/TMVA/ROperator_Gemm.hxx        |  72 +++++++-
 tmva/sofie/inc/TMVA/ROperator_Reshape.hxx     | 117 +++++++-----
 tmva/sofie/inc/TMVA/ROperator_Slice.hxx       |  49 ++++-
 tmva/sofie/inc/TMVA/ROperator_Where.hxx       |  19 +-
 tmva/sofie/inc/TMVA/SOFIE_common.hxx          |  10 +
 9 files changed, 319 insertions(+), 143 deletions(-)

diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx
index f23be2f3ac057..9a1464f6d449a 100644
--- a/tmva/sofie/inc/TMVA/ROperator.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator.hxx
@@ -45,7 +45,7 @@ protected:
    const std::string SP = "   ";    ///< space used to correctly indent the generated C++ code
    bool fUseSession = false;        ///< flag to identify if using the session class
    bool fIsOutputConstant = false;  ///< flag to identify if operator has a constant output (no need to generate code)
-   bool fIsOutputParamShape = false;     ///< flag to identify of the output represents a parametric shape (can be knwon at compile time)
+   bool fIsOutputParamShape = false;     ///< flag to identify of the output represents a parametric shape (can be known at compile time)
 
    mutable std::vector<std::string_view> fInputTensorNames;
    mutable std::vector<std::string_view> fOutputTensorNames;
diff --git a/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx b/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx
index da18363e50e81..e6f0dccbf81ba 100644
--- a/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx
@@ -209,8 +209,8 @@ public:
                          << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(dataY) << std::endl;
             }
          } else if (((model.IsShapeTensor(fNA) && model.IsShapeTensor(fNB)) ||
-                    (model.IsShapeTensor(fNA) && model.IsConstantTensor(fNB)) ||
-                    (model.IsShapeTensor(fNB) && model.IsConstantTensor(fNA)))
+                    (model.IsShapeTensor(fNA) && model.IsInitializedTensor(fNB)) ||
+                    (model.IsShapeTensor(fNB) && model.IsInitializedTensor(fNA)))
                      && (fShapeA.size() <=1 && fShapeB.size() <=1 &&  model.GetTensorType(fNA) == ETensorType::INT64)) {
             // case of shape tensors ( tensors are of rank 0 or 1  )
             std::vector<Dim> dimValA;
@@ -235,9 +235,9 @@ public:
                      dimValues[i] = Dim{ static_cast<size_t>(data[0])};
                }
             };
-            if (model.IsConstantTensor(fNA)) {
+            if (model.IsInitializedTensor(fNA)) {
                convertDataToDim(fNA,fShapeA,dimValA);
-            } else if (model.IsConstantTensor(fNB)) {
+            } else if (model.IsInitializedTensor(fNB)) {
                convertDataToDim(fNB,fShapeB,dimValB);
             }
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
index aaef31eff98f3..75b764c3294b3 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
@@ -24,6 +24,7 @@
          std::vector<std::string> fInputs;
          std::string fOutput;
          std::vector<Dim>fOutputShape;
+         std::vector<Dim> fOutputShapeData; // in case output is a shape tensor we store here the output shape value data (can be parametric)
          std::vector<std::vector<Dim>> fInputShapes;
 
      public:
@@ -170,82 +171,125 @@
          }
 
          void Initialize(RModel& model) override {
+            std::vector<std::vector<size_t>> inputIntShapes;
             for (auto &it : fInputs) {
                if (model.CheckIfTensorAlreadyExist(it) == false) {
                   throw std::runtime_error("TMVA SOFIE Concat Op Input Tensor " + it + " is not found in model");
                }
                fInputShapes.push_back(model.GetDimTensorShape(it));
+               if (!model.IsDynamicTensor(it)) {
+                  inputIntShapes.push_back(ConvertShapeToInt(fInputShapes.back()));
+               }
+            }
+            if (inputIntShapes.size() == fInputs.size()) {
+               // if all input shapes are static we can compute output shape at initialization time
+               auto outputIntShape = ShapeInference(inputIntShapes)[0];
+               fOutputShape = ConvertShapeToDim(outputIntShape);
+               if (model.Verbose())
+                  std::cout << "Initialize Concat operator with defined inputs shapes, "
+                           << "output has shape " << ConvertShapeToString(outputIntShape) << std::endl;
+
+            } else {
+               // if at least one input shape is dynamic we need to compute output shape using the symbolic expression for the dimensions
+               fOutputShape = ShapeInference(fInputShapes, model);
+               if (model.Verbose())
+                  std::cout << "Initialize Concat operator with dynamic inputs shapes, "
+                           << "output has shape " << ConvertDimShapeToString(fOutputShape) << std::endl;
             }
-            fOutputShape = ShapeInference(fInputShapes, model);
-            if (model.Verbose())
-               std::cout << "Output of concat operator has shape " << ConvertDimShapeToString(fOutputShape) << std::endl;
 
             // check if concat has constant inputs , axis 0(concat contigous memory and type is integer)
             bool isOutputShape = false;
-            if (model.GetTensorType(fInputs[0]) == ETensorType::INT64 && fAxis == 0) {
-               fIsOutputConstant = true;
-               isOutputShape = true;
 
-               for ( auto & input : fInputs) {
-                  if (!model.IsInitializedTensor(input)) {
-                     fIsOutputConstant = false;
-                     if (!model.IsShapeTensor(input)) {
-                        isOutputShape = false;
-                        break;
-                     }
-                  }
+            // if (model.GetTensorType(fInputs[0]) == ETensorType::INT64 && fAxis == 0) {
+            fIsOutputConstant = true;
+            isOutputShape = true;
+
+            for (auto &input : fInputs) {
+               if (model.IsDynamicTensor(input)) {
+                  fIsOutputConstant = false;
+                  isOutputShape = false;
+                  break;
                }
-               if (fIsOutputConstant) {
-                  auto outputShape = ConvertShapeToInt(fOutputShape);  // conversion must be possible
-                  std::vector<int64_t> outputData(ConvertShapeToLength(outputShape));
-                  size_t offset = 0;
-                  for ( auto & input : fInputs) {
-                     auto inputData = static_cast<int64_t*>(model.GetInitializedTensorData(input).get());
-                     auto inputShape = model.GetTensorShape(input); // shape is not dynamic if it is constant
-                     size_t inputLength = ConvertShapeToLength(inputShape);
-                     std::copy(inputData, inputData + inputLength, outputData.begin() + offset );
-                     offset += inputLength;
-                     // the data of the input tensor don't need to be written in the generated code and data file
-                     model.SetNotWritableInitializedTensor(input);
-                  }
-                  model.AddConstantTensor<int64_t>(fOutput, outputShape, outputData.data());
-                  if (model.Verbose()) {
-                     std::cout << "output of Concat is a constant tensor " << ConvertShapeToString(outputShape) << " : "
-                     << ConvertValuesToString(outputData) << " (constant)" << std::endl;
-                  }
-               } else if (isOutputShape) {
-                  auto outputShape = ConvertShapeToInt(fOutputShape);  // conversion must be possible
-                  std::vector<Dim> outputData(ConvertShapeToLength(outputShape));
-                  size_t offset = 0;
-                  for ( auto & input : fInputs) {
-                     std::vector<Dim> inputData;
-                     auto inputShape = model.GetTensorShape(input); // shape is not dynamic
-                     size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar
-                     if (model.IsShapeTensor(input)) {
-                        inputData = model.GetShapeTensorValues(input);
-                     } else if (model.IsInitializedTensor(input)) {
-                        inputData.resize(inputLength);
-                        auto intData = static_cast<int64_t*>(model.GetInitializedTensorData(input).get());
-                        for (size_t i = 0; i < inputData.size(); i++)
-                           inputData[i] = Dim{ static_cast<size_t>(intData[i])};
-                     }
-                     else {
-                        // this should not happen
-                        throw std::runtime_error("TMVA SOFIE Concat Operator- invalid input type for shape output type");
+               if (!model.IsInitializedTensor(input)) {
+                  if (model.IsShapeTensor(input)) {
+                     // if it is a shape tensor we can have constant output if the shapes are defined)
+                     auto shapeData = model.GetShapeTensorValues(input);
+                     bool isShapeFullyDefined = ConvertShapeToInt(shapeData).size() == shapeData.size();
+                     if (!isShapeFullyDefined) {
+                        fIsOutputConstant = false;
+                     } else {
+                        // if shape is fully defined we can consider output as constant and we can compute the output
+                        // shape at initialization time
+                        fIsOutputConstant = fIsOutputConstant && true;
                      }
-                     std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset );
-                     offset += inputLength;
+                     // inputs are then shape tensors and output is a shape tensor
+                     isOutputShape = true;
+                  } else {
+                     // case of standard intermediate tensor
+                     fIsOutputConstant = false;
+                     isOutputShape = false;
+                     break;
                   }
-                  // add output tensor
-                  model.AddShapeTensor(fOutput,outputData, false); // cannot be a  scalar
-                  if (model.Verbose()) {
-                     std::cout << "output of Concat is a shape tensor " << ConvertShapeToString(outputShape) << " : "
-                     << ConvertDimShapeToString(outputData) << " (shape)" <<  std::endl;
+               } else {
+                  fIsOutputConstant = fIsOutputConstant && true;
+               }
+            }
+            //}
+
+            if (fIsOutputConstant) {
+               auto outputShape = ConvertShapeToInt(fOutputShape); // conversion must be possible
+               std::vector<int64_t> outputData(ConvertShapeToLength(outputShape));
+               size_t offset = 0;
+               for (auto &input : fInputs) {
+                  auto inputData = static_cast<int64_t *>(model.GetInitializedTensorData(input).get());
+                  auto inputShape = model.GetTensorShape(input); // shape is not dynamic if it is constant
+                  size_t inputLength = ConvertShapeToLength(inputShape);
+                  std::copy(inputData, inputData + inputLength, outputData.begin() + offset);
+                  offset += inputLength;
+                  // the data of the input tensor don't need to be written in the generated code and data file
+                  model.SetNotWritableInitializedTensor(input);
+               }
+               model.AddConstantTensor<int64_t>(fOutput, outputShape, outputData.data());
+               if (model.Verbose()) {
+                  std::cout << "output of Concat is a constant tensor " << ConvertShapeToString(outputShape) << " : "
+                            << ConvertValuesToString(outputData) << " (constant)" << std::endl;
+               }
+            } else if (isOutputShape) {
+               auto outputShape = ConvertShapeToInt(fOutputShape); // conversion must be possible
+               if (outputShape.size() != 1)
+                  throw std::runtime_error("TMVA SOFIE Concat Op - output shape for shape tensor must have rank 1");
+               // output shape is a rank 1 tensor with size equal to the output rank
+               std::vector<Dim> outputData(outputShape[0]);
+               size_t offset = 0;
+               for (auto &input : fInputs) {
+                  std::vector<Dim> inputData;
+                  auto inputShape = model.GetTensorShape(input);         // shape is not dynamic
+                  size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar
+                  if (model.IsShapeTensor(input)) {
+                     inputData = model.GetShapeTensorValues(input);
+                  } else if (model.IsInitializedTensor(input)) {
+                     inputData.resize(inputLength);
+                     auto intData = static_cast<int64_t *>(model.GetInitializedTensorData(input).get());
+                     for (size_t i = 0; i < inputData.size(); i++)
+                        inputData[i] = Dim{static_cast<size_t>(intData[i])};
+                  } else {
+                     // this should not happen
+                     throw std::runtime_error("TMVA SOFIE Concat Operator- invalid tensor input " + input +
+                                              " for shape output type");
                   }
-                  fIsOutputConstant = true;
+                  std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset);
+                  offset += inputLength;
                }
+               // add output tensor
+               model.AddShapeTensor(fOutput, outputData, false); // cannot be a  scalar
+               fOutputShapeData = outputData;
+               if (model.Verbose()) {
+                  std::cout << "output of Concat is a shape tensor " << ConvertShapeToString(outputShape) << " : "
+                            << ConvertDimShapeToString(outputData) << " (shape)" << std::endl;
+               }
+               fIsOutputParamShape = true;
             }
-            if (!fIsOutputConstant) {
+            if (!fIsOutputConstant && !fIsOutputParamShape) {
                model.AddIntermediateTensor(fOutput, model.GetTensorType(fInputs[0]), fOutputShape);
                if (model.Verbose()) {
                   std::cout << "Concat ---> " << fOutput << " " <<  ConvertDimShapeToString(fOutputShape) << std::endl;
@@ -260,8 +304,14 @@
 
             if (fIsOutputConstant) return out.str();
 
-            if(fOutputShape.empty()){
-                  throw std::runtime_error("TMVA SOFIE Concat called to Generate without being initialized first");
+            if (fIsOutputParamShape) {
+               // output is a shape tensor defined by the concatenation of the input shapes
+               out << "// output is a shape tensor defined by the concatenation of the input shapes\n";
+               for (int i = 0; i < static_cast<int>(fOutputShape
+                  [0].dim); i++) {
+                  out << SP << "tensor_" << fOutput << "[" << i << "] = " << fOutputShapeData[i] << ";\n";
+               }
+               return out.str();
             }
             // special case when memory is contiguous
             bool hasShapeOnes = true;
diff --git a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
index df72e29fcc85d..3287d25af977f 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
@@ -17,7 +17,6 @@ class ROperator_Gather final : public ROperator
 {
 private:
 
-   bool fIsOutputParamShape = false; // for shape outputs
    int64_t fAttrAxis = 0;
 
    std::string fNX;
@@ -74,18 +73,18 @@ public:
       if (model.IsInitializedTensor(fNIndices)) {
           // empty shape Indices is a scalar value for the indices
          size_t indicesLength = ConvertShapeToLength(model.GetTensorShape(fNIndices));
-         int64_t* indicesData = static_cast<int64_t*>(model.GetInitializedTensorData(fNIndices).get());
+         int64_t* data = static_cast<int64_t*>(model.GetInitializedTensorData(fNIndices).get());
+         // copy in a vector since we may need to update the values in case of negative indices
+         fIndices =std::vector<int64_t>(data, data + indicesLength);
          // update indices data in case of negative dim values
          for (size_t i = 0; i < indicesLength; i++) {
             // move this at generation time?
             if (!fShapeX[fAttrAxis].isParam) {
-               if (indicesData[i] < 0) {
-                  indicesData[i] += fShapeX[fAttrAxis].dim;
+               if (fIndices[i] < 0) {
+                  fIndices[i] += fShapeX[fAttrAxis].dim;
                }
             }
          }
-         // Save in a vector gather Indices of size q
-         fIndices = std::vector<int64_t>(indicesData, indicesData + indicesLength);
       }
       // Output shape
       if (model.Verbose())
diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
index e3fe93e7a8184..7b8a2e989427a 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
@@ -25,6 +25,7 @@ namespace SOFIE{
    private:
       bool fIsDynamic = false;
       bool fBroadcastBias = false;
+      bool fCheckBiasShapeAtRuntime = false; // flag to identify the need to do a run time check of bias shape compatibility in case of dynamic shapes and uni-directional broadcasting
 
       float fAttrAlpha = 1.0;
       float fAttrBeta = 1.0;
@@ -232,9 +233,13 @@ namespace SOFIE{
                fBroadcastBias = true;
                // check if broadcasting is compatible and note that prepend 1 to shapeC
                auto r = UTILITY::MultidirectionalBroadcastShape(fShapeY, fDimShapeC);
-               // return flag must be equal to 1 since this is a unidirectional broadcast of C->Y
-               if (r.first > 1) {
+               // return flag must not have bit equal to 2 since this is a unidirectional broadcast of C->Y
+               //
+               if ((r.first & 2) == 2) {
                   throw std::runtime_error("TMVA SOFIE Gemm Op - bias tensor of shape " + ConvertDimShapeToString(fDimShapeC) + " cannot be uni-directional broadcasted to " + ConvertDimShapeToString(fShapeY));
+               } else if (r.first  == 4) {
+                  // we need to do a run time check of bias shape if it is compatible
+                  fCheckBiasShapeAtRuntime = true;
                }
                fShapeC = ConvertShapeToInt(fDimShapeC);
             }
@@ -273,9 +278,9 @@ namespace SOFIE{
       std::string Generate(std::string opName) override {
          opName = "op_" + opName;
 
-         if (fShapeA.empty() || fShapeB.empty() || fShapeY.empty() || (fNC != "" && fShapeC.empty())) {
-            throw std::runtime_error("TMVA SOFIE Gemm Op called to Generate without being initialized first");
-         }
+         // if (fShapeA.empty() || fShapeB.empty() || fShapeY.empty() || (fNC != "" && fShapeC.empty())) {
+         //    throw std::runtime_error("TMVA SOFIE Gemm Op called to Generate without being initialized first");
+         // }
          std::stringstream out;
          out << "\n//--------- Gemm " << opName << " " << ConvertDimShapeToString(fShapeA) << " * " << ConvertDimShapeToString(fShapeB)
              << " -> " << ConvertDimShapeToString(fShapeY) << "\n";
@@ -346,7 +351,7 @@ namespace SOFIE{
             if (fAttrBeta != 0) {
                // some model don't have bias but Beta is not zero - force it to zero
                fAttrBeta = 0;
-               std::cout << "WARNING: TMVA SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero - force it to zero";
+               std::cout << "WARNING: TMVA SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero - force it to zero\n";
             }
          }
 
@@ -373,6 +378,47 @@ namespace SOFIE{
          bool extraA = (doStackMul && lengthExtra_A != "1");
          bool extraB = (doStackMul && lengthExtra_B != "1");
          bool extraC = (doStackMul && haveExtraC && !fBroadcastBias);
+         // run time check for bias broadcasting
+         std::string biasShapeType = opName + "_biasShapeType";
+         if (fBroadcastBias && fCheckBiasShapeAtRuntime) {
+            // create a flag according to bias shape:
+            // = 1 for (1,Y2)
+            // = 2 for (Y1,1)
+            // = 3 for a scalar
+            out << SP << "int " << biasShapeType << " = 0;\n";
+            // case vector of columns
+            if (sC[0].GetVal() != "1" && sC[1].GetVal() != sY[1].GetVal())
+               out << SP << "if (" << sC[0] << " == 1 && " << sC[1] << " == " << sY[1] << ")\n";
+            else if (sC[0].GetVal() == "1")
+               out << SP << "if (" << sC[1] << " == " << sY[1] << ")\n";
+            else if (sC[1].GetVal() == sY[1].GetVal())
+               out << SP << "if (" << sC[0] << " == 1)\n";
+
+            out << SP << SP << biasShapeType << " = 1;\n";
+
+            // case vector of rows
+            if (sC[1].GetVal() != "1" && sC[0].GetVal() != sY[0].GetVal())
+               out << SP << "else if (" << sC[1] << " == 1 && " << sC[0] << " == " << sY[0] << ")\n";
+            else if (sC[1].GetVal() == "1")
+                out << SP << "else if (" << sC[0] << " == " << sY[0] << ")\n";
+            else if (sC[0].GetVal() == sY[0].GetVal())
+               out << SP << "else if (" << sC[1] << " == 1)\n";
+
+            out << SP << SP << biasShapeType << " = 2;\n";
+
+            // case scalar
+            if (sC[0].GetVal() != "1" && sC[1].GetVal() != "1")
+               out << SP << "else if (" << sC[0] << " == 1 && " << sC[1] << " == 1 )\n";
+            else if (sC[0].GetVal() == "1")
+               out << SP << "else if (" << sC[1] << " == 1)\n";
+            else if (sC[1].GetVal() == "1")
+               out << SP << "else if (" << sC[0] << " == 1)\n";
+            out << SP << SP << biasShapeType << " = 3;\n";
+            out << SP << "else\n";
+            out << SP << SP << "throw std::runtime_error(\"TMVA SOFIE Gemm Op - bias tensor "
+                                 << ConvertDimShapeToString(fDimShapeC) << " cannot be broadcasted to "
+                                 << ConvertDimShapeToString(fShapeY) << "\");\n";
+         }
          auto SP2 = SP;
          if (doStackMul) {
             out << SP << "size_t " << opName << "_y_offset = 0;\n"; // needed if we stack the gemm operations
@@ -390,9 +436,11 @@ namespace SOFIE{
          if (fBroadcastBias) {
 
             fAttrBeta = 1.;
+
+            // loop on first output dimension
             out << SP2 << "for (size_t j = 0; j < " << sY[0] << "; j++) { \n";
             out << SP2 << SP << "size_t y_index = ";
-            if (doStackMul) // add offset in caseof stack multiplications (not sure if bias is present in these cases)
+            if (doStackMul) // add offset in case of stack multiplications (not sure if bias is present in these cases)
                out <<  opName << "_y_offset + ";
             if (sY[1].GetVal() != "1")
                out << sY[1] << " * j;\n";
@@ -410,6 +458,16 @@ namespace SOFIE{
             } else if (sC[0].GetVal() == "1" && sC[1].GetVal() == "1") {
                // scalar case
                out << prefix << "Fill(" << target << " + y_index, tensor_" << fNC << "[0], " << sY[1] << ");\n";
+            } else if (fCheckBiasShapeAtRuntime) {
+               // in the generic dynamic case we check at run time that bias is compatible
+               // we check that bias[0] = 1 or equal to SY[0] and that bias[1] = 1 or equal to SY[1]
+               // tbd: this run-time check coul;d be moved outside the loop for better run time efficiency
+               out << SP2 << SP << "if (" << biasShapeType << " == 1)\n";   // case vector of columns
+               out << SP << prefix << "Copy(" << target << " + y_index, tensor_" << fNC << ", " << sY[1] << ");\n";
+               out << SP2 << SP << "else if (" << biasShapeType << " == 2)\n";  // case vector of rows
+               out << SP << prefix << "Fill(" << target << " + y_index, tensor_" << fNC << "[j], " << sY[1] << ");\n";
+               out << SP2 << SP << "else \n";  // scalar case
+               out << SP << prefix << "Fill(" << target << " + y_index, tensor_" << fNC << "[0], " << sY[1] << ");\n";
             } else {
                throw std::runtime_error("TMVA SOFIE Gemm Op - invalid shape for bias tensor " + ConvertDimShapeToString(fDimShapeC));
             }
diff --git a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
index bbb55252300e2..dee859978e76a 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
@@ -32,11 +32,12 @@ private:
 
    std::string fNData;        // input data tensor name
    std::string fNInput2;       // reshape or axes tensor name depending on operator
-   std::string fNOutput;               // output tensor name
-   std::vector<Dim> fShapeInput;     // input shape data
+   std::string fNOutput;            // output tensor name
+   std::vector<Dim> fShapeInput;    // input shape data
    std::vector<Dim> fShapeOutput;   // output shape data
-   std::vector<int64_t> fAttrAxes;         // axes attributes (provided for all version of Squeeze/Unsqueeze)
-   std::vector<int64_t> fShape;     // shape tensor values provided for Reshape
+   std::vector<Dim> fOutputShapeData; // in case output is a shape tensor we store here the shape value data (can be parametric)
+   std::vector<int64_t> fAttrAxes;  // axes attributes (provided for all version of Squeeze/Unsqueeze)
+   std::vector<int64_t> fShape;     // shape tensor values provided for Reshape for int shapes4
 
 public:
 
@@ -74,34 +75,44 @@ public:
       fOutputTensorNames = { fNOutput };
    }
 
-   // output type is same as input
-   std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
-      auto ret = std::vector<ETensorType>(1, input[0]);
-      return ret;
-   }
-   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override {
-      return input;
-   }
 
    // output shape
-   std::vector<std::vector<Dim>> ShapeInference(const std::vector<std::vector<Dim>> & input)  {
-      std::vector<std::vector<Dim>> ret;
-      auto & input_shape = input[0];
+   std::vector<Dim> DoShapeInference(const std::vector<Dim> & input_shape, const std::vector<Dim> & target_shape)  {
       if (fOpMode == Reshape) {
          // correct the provided shape (here we have the value) for 0 or -1
-         std::vector<Dim> output_shape(fShape.size());
-         assert(!fShape.empty() && !fDynamicShape);
+         // the target_shape can be a scalar in case of not present shape input tensor
+         std::vector<Dim> output_shape = target_shape;
+         bool hasMinusOne = false;
+         bool hasZero = false;
          for (size_t i = 0; i < output_shape.size(); i++) {
-            if (fShape[i] > 0 || (fAllowZero && fShape[i] >= 0))
-               output_shape[i] = Dim{ static_cast<size_t>(fShape[i]) };
-            else if (!fAllowZero && fShape[i] == 0)
-               output_shape[i] = input_shape[i];
+            // case for zero values in given shape: in this case we take the corresponding value from input shape
+            if (!output_shape[i].isParam) {
+               if (output_shape[i].dim == 0) {
+                  hasZero = true;
+                  if (fAllowZero)
+                     output_shape[i] = Dim{0};
+                  else {
+                     if (i > 0 && output_shape.size() != input_shape.size())
+                        std::cout << "WARNING: TMVA Reshape Op : output shape has zero value at index " << i <<
+                                  " but input shape has a different rank than output shape" << std::endl;
+                     if (i >= input_shape.size())
+                        throw std::runtime_error("TMVA Reshape Op : output shape has zero value at index " + std::to_string(i) +
+                              " but input shape does not have corresponding index");
+                     }
+                     output_shape[i] = input_shape[i];
+               } else if (output_shape[i].dim == static_cast<size_t>(-1)) {
+                  hasMinusOne = true;
+               }
+            }
+         }
+         if (hasZero && hasMinusOne) {
+            throw std::runtime_error("TMVA Reshape Op : zero value in shape is not allowed when there is also a -1 in shape");
          }
-         // now case of -1 in shape
+         // now case of -1 in shape - we can infer the value of -1 from all other values
          for (size_t i = 0; i < output_shape.size(); i++) {
-            if (fShape[i] == -1) {
+            if (output_shape[i] == static_cast<size_t>(-1) && !output_shape[i].isParam) {
                auto tmp = output_shape;
-               tmp.erase(tmp.begin() + i);
+               tmp.erase(tmp.begin() + i); // erase -1 value to compute the length of the other dimensions
                auto tmp_length = ConvertDimShapeToLength(tmp);
                auto input_length = ConvertDimShapeToLength(input_shape);
                if (fVerbose)
@@ -174,7 +185,7 @@ public:
             throw std::runtime_error("TMVA Reshape Op : Invalid  shapes : " + ConvertDimShapeToString(input_shape) +
                                      ConvertDimShapeToString(output_shape));
          }
-         ret.push_back(output_shape);
+         return output_shape;
 
       } else if (fOpMode == Flatten) {
          // flatten case
@@ -185,7 +196,7 @@ public:
          auto l1 = ConvertDimShapeToLength(s1);
          auto l2 = ConvertDimShapeToLength(s2);
          std::vector<Dim> newShape = {Dim{l1}, Dim{l2}};
-         ret.push_back(newShape);
+         return newShape;
       } else if (fOpMode == Squeeze) {
          // squeeze
          // assume no axis is provided - remove all axes with value equal to 1
@@ -200,10 +211,8 @@ public:
                }
             }
          } else {
-            std::cout << "getting shape for Squeeze...from attribute\n";
             auto axes = fAttrAxes;
             for (size_t i = 0; i < axes.size(); i++) {
-               std::cout << i << "  " << axes[i] << std::endl;
                if (axes[i] < 0)
                   axes[i] += input_shape.size();
                if (!(output_shape[axes[i]] == Dim{1}))
@@ -213,13 +222,10 @@ public:
             // for calling vector::erase we must sort axes in decreasing order to avoid
             std::sort(axes.begin(), axes.end(), std::greater<int>());
             for (auto & axis : axes) {
-               std::cout << "erase give axis " << axis << "  -> ";
-               for (auto & o : output_shape) std::cout << o << " , ";
-               std::cout << std::endl;
                output_shape.erase(output_shape.begin() + axis);
             }
          }
-         ret.push_back(output_shape);
+         return output_shape;
       }
       else if (fOpMode == Unsqueeze) {
          // unsqueeze
@@ -227,7 +233,7 @@ public:
          auto output_shape = input_shape;
          auto &axes = fAttrAxes;
          // output rank
-         int64_t r = input[0].size() + axes.size();
+         int64_t r = input_shape.size() + axes.size();
          for (auto &a : axes) {
             int64_t i = static_cast<int64_t>(a);
             if (i < -r || i > r - 1)
@@ -238,16 +244,18 @@ public:
                // negative axes
                output_shape.insert(output_shape.end() + i + 1, Dim{1});
          }
-         ret.push_back(output_shape);
+         return output_shape;
       }
-      return ret;
+      throw std::runtime_error("TMVA Reshape Op : Invalid ReshapeOpMode");
+      return {Dim{}};
    }
 
    void Initialize(RModel& model) override {
 
       fVerbose = model.Verbose();
       if (fVerbose)
-         std::cout << "initialize reshape op type " << fOpMode << " - " << fNInput2 << " " << fNData << std::endl;
+         std::cout << "initialize reshape op type " << fOpMode << " -  for input " << fNData
+                   << " to shape given by " << fNInput2 << std::endl;
 
       if (model.CheckIfTensorAlreadyExist(fNData) == false) {
           // input must be a graph input, or already initialized intermediate tensor
@@ -272,16 +280,22 @@ public:
                else
                   fAttrAxes = std::vector<int64_t>(values, values + n);
 
-               fShapeOutput = ShapeInference({fShapeInput})[0];
+               std::vector<Dim> targetShape(fShape.begin(),fShape.end());
+               fShapeOutput = DoShapeInference(fShapeInput, targetShape);
                // set flag to not write tensor in weight file. Its data will be hard-coded in way model is constructed
                model.SetNotWritableInitializedTensor(fNInput2);
             } else if (model.IsShapeTensor(fNInput2)) {
                auto shapeData = model.GetShapeTensorValues(fNInput2);
-               fShapeOutput = shapeData;
+               fShapeOutput = DoShapeInference(fShapeInput, shapeData);
+               if (model.Verbose())
+                  std::cout << "Reshape op - get output shape from shape tensor " << fNInput2 << " with value " << ConvertDimShapeToString(shapeData) << std::endl;
             } else {
                // we cannot get shape at initialization time but at run-time
                fDynamicShape = true;
                // size of shape output us given by size of shape input tensor
+               if (model.IsDynamicTensor(fNInput2)) {
+                  throw std::runtime_error("TMVA Reshape Op 2nd input Tensor " + fNInput2 + " cannot have dynamic shape");
+               }
                auto shapeInput2 = model.GetTensorShape(fNInput2);
                fShapeOutput.resize(shapeInput2[0]);
                for (size_t i = 0; i < fShapeOutput.size(); i++) {
@@ -293,9 +307,9 @@ public:
          }
       } else if (!fAttrAxes.empty()) {
          // case fNShape is empty and axes are provided as attributes (e.g. for Unsqueeze)
-         fShapeOutput = ShapeInference({fShapeInput})[0];
+         fShapeOutput = DoShapeInference(fShapeInput, std::vector<Dim>{});
       } else if (fOpMode == Flatten || fOpMode == Squeeze) {
-         fShapeOutput = ShapeInference({fShapeInput})[0];
+         fShapeOutput = DoShapeInference(fShapeInput, std::vector<Dim>{});
       } else {
          throw std::runtime_error("TMVA Reshape Op : Invalid Input/Attribute data");
       }
@@ -312,14 +326,15 @@ public:
             ConvertValuesToString(ConvertShapeToLength(o_shape), inputData) << std::endl;
          }
       }
-      // for shape tensors we can have it if output shape is size==1 or a scalar
+      // for input shape tensors we can have it if output shape is size==1 or a scalar
       else if (model.IsShapeTensor(fNData) && fShapeOutput.size() <=1) {
-         fIsOutputConstant = true;
-         auto inputData = model.GetShapeTensorValues(fNData);
-         model.AddShapeTensor(fNOutput, inputData);
+         // not sure if we ever end-up here - maybe reshaping from scalar to vector or viceversa
+         fIsOutputParamShape = true;
+         fOutputShapeData = model.GetShapeTensorValues(fNData);
+         model.AddShapeTensor(fNOutput, fOutputShapeData);
          if (model.Verbose()) {
             std::cout << Name() << " : " << fNData << " " << ConvertDimShapeToString(fShapeInput) << " -->  " << fNOutput << " (shape) " << ConvertDimShapeToString(fShapeOutput)  << " : " <<
-            ConvertDimShapeToString(inputData) << std::endl;
+            ConvertDimShapeToString(fOutputShapeData) << std::endl;
          }
       }
       else {
@@ -346,6 +361,15 @@ public:
 
       if (fIsOutputConstant) return out.str();  //no op for constant tensors
 
+      if (fIsOutputParamShape) {
+          // no code to generate here for param shape output. Tensor output is defined in Session constructor
+         out << "//----------------output is a shape tensor----------\n";
+         for (int i = 0; i < static_cast<int>(fShapeOutput[0].dim); i++) {
+            out << SP << "tensor_" << fNOutput << "[" << i << " ] = " << fOutputShapeData[i].GetVal() << ";\n";
+         }
+         return out.str();
+      }
+
       // in case of dynamic output shape we need to set the shape value from input shape tensor
       // and take case of the zero values
       if (fDynamicShape) {
@@ -364,7 +388,8 @@ public:
       if (lengthOut != lengthIn) {
          // check needs to be done at run-time
          out << SP << "if (" << lengthOut << "!=" << lengthIn << ")\n";
-         out << "throw std::runtime_error(\"TMVA SOFIE Reshape Op : output lengths is different than input one\");\n";
+         out << SP << SP << "throw std::runtime_error(\"TMVA SOFIE Reshape " << opName << " output length "
+             << lengthOut << " is different than input one " << lengthIn << "\");\n";
       }
 
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
index dfdf492893113..eb90c6ddb01d6 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
@@ -29,8 +29,10 @@ private:
    std::string fNData;        // input data tensor name
    std::string fNOutput;      // output data name
    std::vector<std::string> fNames;       // tensor names for meta(axis) information
-   std::vector<Dim> fShapeInput;     // input shape data
-   std::vector<Dim> fShapeOutput;   // output shape data
+   std::vector<Dim> fShapeInput;     // input shape
+   std::vector<Dim> fShapeOutput;   // output shape
+   std::vector<Dim> fOutputShapeData;   // output shape data in case output is a shape param tensor
+
    // saved Start/End.Steps are corrected from initial ONNX for negative/default values
    // and are available for each axis
    std::vector<Dim> fStart;         // starting values of slices for all axes
@@ -287,8 +289,11 @@ public:
          size_t outputSize = ConvertShapeToLength(ConvertShapeToInt(fShapeOutput));
          std::vector<int64_t> outputData(outputSize);
          std::vector<size_t> inputStride = UTILITY::ComputeStrideFromShape(ConvertShapeToInt(fShapeInput));
-         for (size_t ii = 0; ii< fStart.size(); ii++)
-            std::cout << fStart[ii] << "  " << fEnd[ii] << "  " << fSteps[ii] << std::endl;
+         if (model.Verbose()) {
+            std::cout << "Do slice for initialized input ..(start, end, step)\n";
+            for (size_t ii = 0; ii< fStart.size(); ii++)
+               std::cout << fStart [ii] << "  " << fEnd[ii] << "  " << fSteps[ii] << std::endl;
+         }
           // perform slice using a recursive function- need to use two lambda functions for this
          auto sliceRecursive = [&](size_t iaxis, size_t & outIdx, size_t & inOffset) {
             auto slice_impl = [&](size_t iax, size_t & outputIdx, size_t & inputOffset, auto & sliceRecImpl) {
@@ -300,14 +305,12 @@ public:
                   indices.push_back(i);
                if (iax == dim-1) { // last axis
                   for (size_t i = 0; i < indices.size(); i++) {
-                     std::cout << outputIdx << " , " << indices[i] << " " << inputOffset << " ; ";
                      outputData[outputIdx] = inputData[inputOffset + indices[i]];
                      outputIdx++;
                   }
                   return;
                } else {
                   for (size_t i = 0; i < indices.size(); i++) {
-                     std::cout << inputStride[iax] << " , " << indices[i] << " " << inputOffset << "  ";
                      size_t offset = inputOffset + inputStride[iax]*indices[i];
                      sliceRecImpl(iax+1, outputIdx, offset,sliceRecImpl);
                   }
@@ -325,6 +328,32 @@ public:
                      << ConvertValuesToString(outputData) << std::endl;
          }
       }
+      else if (model.IsShapeTensor(fNData) && !fStart[0].isParam && !fEnd[0].isParam) {
+         // case of input is a shape tensor. In this case rank=1 always, axis =0 and Slice is trivial
+         auto inputData = model.GetShapeTensorValues(fNData);
+         fOutputShapeData = std::vector<Dim>(inputData.begin() + fStart[0].dim, inputData.begin() + fEnd[0].dim);
+         // try to convert to integer values if possible
+         auto outputData = ConvertShapeToInt(fOutputShapeData);
+         fShapeOutput = { Dim{fOutputShapeData.size()}};
+         if (outputData.empty()) {
+            // is a param shape tensor
+            model.AddShapeTensor(fNOutput, fOutputShapeData);
+            fIsOutputParamShape = true;
+            if (model.Verbose()) {
+               std::cout << "Slice: output is a shape tensor -> " << fNOutput << "  " << ConvertDimShapeToString(fShapeOutput) << " with values "
+                        << ConvertDimShapeToString(fOutputShapeData) << " (shape)" << std::endl;
+            }
+         } else {
+            fIsOutputConstant = true;
+            std::vector<int64_t> data(outputData.size());
+            std::copy(outputData.begin(), outputData.end(), data.begin());
+            model.AddConstantTensor<int64_t>(fNOutput, {data.size()}, data.data());
+            if (model.Verbose()) {
+               std::cout << "Slice: output is a constant tensor -> " << fNOutput << "  " << ConvertDimShapeToString(fShapeOutput) << " with values "
+                        << ConvertDimShapeToString(fOutputShapeData) << " constant " << std::endl;
+            }
+         }
+      }
       else {
          // check if Slice is just an Identity operator in case start = 0, end = input_shape and step=1
          size_t ndim = fShapeInput.size();
@@ -362,6 +391,14 @@ public:
       out << "///------- Slice operator " << opName << "---> " << fNOutput << " "
           << ConvertDimShapeToString(fShapeOutput) << "\n" << std::endl;
       if (fIsOutputConstant) return out.str();  //no op for constant tensors
+      if (fIsOutputParamShape) {
+         out << "/// Slice output is a shape tensor with values : " << ConvertDimShapeToString(fShapeOutput) << "\n";
+         // need to generate code assigning values to shape tensors
+         for (int i = 0; i < static_cast<int>(fShapeOutput[0].dim); i++) {
+                  out << SP << "tensor_" << fNOutput << "[" << i << "] = " << fOutputShapeData[i] << ";\n";
+         }
+         return out.str();
+      }
 
       size_t ndim = fShapeInput.size();
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_Where.hxx b/tmva/sofie/inc/TMVA/ROperator_Where.hxx
index 073c7e1ec19e7..fd498074df513 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Where.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Where.hxx
@@ -213,8 +213,6 @@ public:
          }
          // check case of constant  output (if all inputs are defined)
          if (model.IsInitializedTensor(fNC)) {
-
-            std::cout << "Where op: " << fNC << " is initialized\n";
             std::string nameC = fNBroadcastedC.empty() ? fNC : fNBroadcastedC;
             auto dataC = static_cast<bool *>(model.GetInitializedTensorData(nameC).get());
             model.SetNotWritableInitializedTensor(nameC);
@@ -223,22 +221,18 @@ public:
             std::vector<Dim> shapeDataX;
             std::vector<Dim> shapeDataY;
             if (model.IsInitializedTensor(fNX)) {
-               std::cout << "Where op: " << fNX << " is initialized\n";
                std::string nameX = fNBroadcastedX.empty() ? fNX : fNBroadcastedX;
                dataX = static_cast<T *>(model.GetInitializedTensorData(nameX).get());
                // flag tensors to not be written in a file
                model.SetNotWritableInitializedTensor(nameX);
             } else if (model.IsShapeTensor(fNX)) {
-               std::cout << "Where op: " << fNX << " is a shape tensor\n";
                shapeDataX = model.GetShapeTensorValues(fNX);
             }
             if (model.IsInitializedTensor(fNY)) {
-               std::cout << "Where op: " << fNY << " is initialized\n";
                std::string nameY = fNBroadcastedY.empty() ? fNY : fNBroadcastedY;
                dataY = static_cast<T *>(model.GetInitializedTensorData(nameY).get());
                model.SetNotWritableInitializedTensor(nameY);
             } else if (model.IsShapeTensor(fNY)) {
-               std::cout << "Where op: " << fNY << " is a shape tensor\n";
                shapeDataY = model.GetShapeTensorValues(fNY);
             }
             std::vector<T> dataZ;        // used in case output is constant tensor
@@ -252,14 +246,16 @@ public:
                dataZ.resize(ConvertShapeToLength(fShapeZ));
                for (size_t i = 0; i < dataZ.size(); i++)
                   dataZ[i] = (dataC[i]) ? dataX[i] : dataY[i];
-               std::cout << "data A and B : dataZ constant: " << ConvertValuesToString(dataZ) << std::endl;
+               if (model.Verbose())
+                  std::cout << "data A and B : dataZ constant: " << ConvertValuesToString(dataZ) << std::endl;
             } else if (dataX && shapeDataY.size() > 0) {
                shapeDataZ.resize(ConvertShapeToLength(fShapeZ));
                for (size_t i = 0; i < shapeDataZ.size(); i++) {
                   shapeDataZ[i] = (dataC[i]) ? Dim{size_t(dataX[i])} : shapeDataY[i];
                   isOutputConstantTensor &= !shapeDataZ[i].isParam;
                }
-               std::cout << "data A but shapeB " << ConvertDimShapeToString(shapeDataY) << "  "
+               if (model.Verbose())
+                  std::cout << "data A but shapeB " << ConvertDimShapeToString(shapeDataY) << "  "
                          << isOutputConstantTensor << std::endl;
             } else if (dataY && shapeDataX.size() > 0) {
                shapeDataZ.resize(ConvertShapeToLength(fShapeZ));
@@ -267,7 +263,8 @@ public:
                   shapeDataZ[i] = (dataC[i]) ? shapeDataY[i] : Dim{size_t(dataY[i])};
                   isOutputConstantTensor &= !shapeDataZ[i].isParam;
                }
-               std::cout << "data B but shapeA " << ConvertDimShapeToString(shapeDataX) << "  "
+               if (model.Verbose())
+                  std::cout << "data B but shapeA " << ConvertDimShapeToString(shapeDataX) << "  "
                          << isOutputConstantTensor << std::endl;
             } else if (shapeDataY.size() > 0 && shapeDataX.size() > 0) {
                shapeDataZ.resize(ConvertShapeToLength(fShapeZ));
@@ -275,7 +272,8 @@ public:
                   shapeDataZ[i] = (dataC[i]) ? shapeDataX[i] : shapeDataY[i];
                   isOutputConstantTensor &= !shapeDataZ[i].isParam;
                }
-               std::cout << " shapeA and B " << ConvertDimShapeToString(shapeDataX) << " shapeB "
+               if (model.Verbose())
+                  std::cout << " shapeA and B " << ConvertDimShapeToString(shapeDataX) << " shapeB "
                          << ConvertDimShapeToString(shapeDataY) << "  " << isOutputConstantTensor << std::endl;
             }
             fIsOutputConstant = true;
@@ -427,7 +425,6 @@ public:
             return "0";
          std::string expr;
          size_t offset = rankZ - dimShape.size();
-         std::cout << rankZ << "  " << dimShape.size() << "  " << offset << std::endl;
          for (size_t i = 0; i < dimShape.size(); ++i) {
             if (dimShape[i].dim == 1 || dimShape[i].GetVal() == "1") continue;
             expr += "idx_" + std::to_string(i + offset);
diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
index d921d5ae1f772..e8fbc6ee82720 100644
--- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx
+++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
@@ -250,6 +250,16 @@ inline std::string ConvertValToString<double>(double value) {
    }
    return ret.str();
 }
+// int64_t specialization for INT64_MIN
+template<>
+inline std::string ConvertValToString<int64_t>(int64_t value) {
+   std::stringstream ret;
+   if (value == INT64_MIN)
+      ret << "INT64_MIN";
+   else
+      ret << std::to_string(value);
+   return ret.str();
+}
 
 
 // convert list of values in a string taking into account the precision

From 6cce19fe791eba783c419e846670574f646a3664 Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Fri, 15 May 2026 23:21:22 +0200
Subject: [PATCH 6/8] [tmva][sofie] Fix a big in computing the end of life of a
 tensor

When computing the last usage of tensors, the loop on the input tensors
of the added operator was not performed correctly. The loop was stopping if
an initialized tensor was an input
---
 tmva/sofie/src/RModel.cxx | 52 +++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
index 037c9f292fc3b..a6e2d7432a678 100644
--- a/tmva/sofie/src/RModel.cxx
+++ b/tmva/sofie/src/RModel.cxx
@@ -188,30 +188,34 @@ void RModel::AddInputTensorName(std::string input_name) {
     fInputTensorNames.emplace_back(UTILITY::Clean_name(input_name));
 }
 
-void RModel::AddOperator(std::unique_ptr<ROperator> op, int order_execution) {
-    AddBlasRoutines(op->GetBlasRoutines());
-    auto libs = op->GetStdLibs();
-    auto op_input_tensors = op->GetOpInputTensors();
-    for (auto& stdlib : libs) {
-        AddNeededStdLib(stdlib);
-    }
-    if (order_execution >= 0) {
-        fOperators.insert(fOperators.begin() + order_execution, std::move(op));
-    } else {
-        fOperators.push_back(std::move(op));
-        order_execution = fOperators.size()-1;
-    }
-
-    // storing the last usage of tensors which are input to the operator
-    // (excluding tensors which are inputs to the model or the initialized (weights) tensors)
-    // We call this function during parsing so we don't have yet initialized the operators
-   for(size_t index = 0; index<op_input_tensors.size() &&
-            fInitializedTensors.find(UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInitializedTensors.end() &&
-            std::find(fInputTensorNames.begin(), fInputTensorNames.end(),
-                      UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInputTensorNames.end();
-            ++index)
-   {
-      fIntermediateTensorFrequencyLookup[op_input_tensors[index]] = order_execution;
+void RModel::AddOperator(std::unique_ptr<ROperator> op, int order_execution)
+{
+   AddBlasRoutines(op->GetBlasRoutines());
+   auto libs = op->GetStdLibs();
+   auto op_input_tensors = op->GetOpInputTensors();
+   for (auto &stdlib : libs) {
+      AddNeededStdLib(stdlib);
+   }
+   if (order_execution >= 0) {
+      fOperators.insert(fOperators.begin() + order_execution, std::move(op));
+   } else {
+      fOperators.push_back(std::move(op));
+      order_execution = fOperators.size() - 1;
+   }
+
+   // storing the last usage of tensors which are input to the operator
+   // (excluding tensors which are inputs to the model or the initialized (weights) tensors)
+   // We call this function during parsing so we don't have yet initialized the operators
+   for (size_t index = 0; index < op_input_tensors.size(); index++) {
+      if (!IsInitializedTensor(UTILITY::Clean_name(std::string(op_input_tensors[index]))) &&
+          std::find(fInputTensorNames.begin(), fInputTensorNames.end(),
+                    UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInputTensorNames.end()) {
+
+         fIntermediateTensorFrequencyLookup[op_input_tensors[index]] = order_execution;
+         if (Verbose())
+            std::cout << "adding order execution for " << op_input_tensors[index] << " order " << order_execution
+                      << std::endl;
+      }
    }
 }
 

From 86ad783ffef1c10511bbe9bccd2324797282b203 Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Sun, 17 May 2026 18:07:25 +0200
Subject: [PATCH 7/8] [tmva][sofie] Fix Gather for negative indices in
 initialized tensors

---
 tmva/sofie/inc/TMVA/RModel.hxx           | 1 -
 tmva/sofie/inc/TMVA/ROperator_Gather.hxx | 8 ++++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx
index 08ad14149aacf..a9ff87cace2be 100644
--- a/tmva/sofie/inc/TMVA/RModel.hxx
+++ b/tmva/sofie/inc/TMVA/RModel.hxx
@@ -242,7 +242,6 @@ public:
    bool UseVDT() const { return fUseVDT;}
 
    // Use the ClassDef macro to allow definition of custom streaming
-   // Use Version 0 since we don't support for time being ROOT I/O streaming of RModel objects
    ClassDefNV(RModel, 4);
 };
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
index 3287d25af977f..fafe0dd63ae92 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
@@ -72,6 +72,7 @@ public:
       // case indices tensor is initialized
       if (model.IsInitializedTensor(fNIndices)) {
           // empty shape Indices is a scalar value for the indices
+         bool hasNegativeIndex = false;
          size_t indicesLength = ConvertShapeToLength(model.GetTensorShape(fNIndices));
          int64_t* data = static_cast<int64_t*>(model.GetInitializedTensorData(fNIndices).get());
          // copy in a vector since we may need to update the values in case of negative indices
@@ -81,10 +82,17 @@ public:
             // move this at generation time?
             if (!fShapeX[fAttrAxis].isParam) {
                if (fIndices[i] < 0) {
+                  hasNegativeIndex = true;
                   fIndices[i] += fShapeX[fAttrAxis].dim;
                }
             }
          }
+         // for negative indices we need to add an extra constant tensor
+         if (hasNegativeIndex) {
+            std::string nameIndicesUpdated = fNIndices + "_updated";
+            model.AddConstantTensor(nameIndicesUpdated, model.GetTensorShape(fNIndices), fIndices.data());
+            fNIndices = nameIndicesUpdated;
+         }
       }
       // Output shape
       if (model.Verbose())

From 9e9a991e9182756d2839a5871db32c63a0068ccb Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Tue, 19 May 2026 10:28:16 +0200
Subject: [PATCH 8/8] [tmva][sofie] Fix adding standard library header

Remove check to include standard library header only if within a list of allowed ones.
There is no need for that check and this was excluding addition of extra headers like chrono
---
 tmva/sofie/inc/TMVA/RModel_Base.hxx | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tmva/sofie/inc/TMVA/RModel_Base.hxx b/tmva/sofie/inc/TMVA/RModel_Base.hxx
index 29d4eaf8d7b76..891f67044b405 100644
--- a/tmva/sofie/inc/TMVA/RModel_Base.hxx
+++ b/tmva/sofie/inc/TMVA/RModel_Base.hxx
@@ -82,10 +82,8 @@ public:
    }
    void AddNeededStdLib(std::string libname)
    {
-      static const std::unordered_set<std::string> allowedStdLib = {"vector", "algorithm", "cmath", "memory", "span"};
-      if (allowedStdLib.find(libname) != allowedStdLib.end()) {
-         fNeededStdLib.insert(libname);
-      }
+      // if the library is already in the set, insert does nothing, so we don't need to check before inserting
+      fNeededStdLib.insert(libname);
    }
    void AddNeededCustomHeader(std::string filename)
    {