diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx
index ec4e1115b759d..a9ff87cace2be 100644
--- a/tmva/sofie/inc/TMVA/RModel.hxx
+++ b/tmva/sofie/inc/TMVA/RModel.hxx
@@ -45,6 +45,8 @@ private:
    MemoryPoolInfo fIntermediateMemoryInfo;    ///<!  intermediate memory info (transient)
    std::unordered_map<std::string_view, size_t> fIntermediateTensorFrequencyLookup;    ///<!  lookup table for intermediate tensor frequency (transient)
 
+   std::string fExtraCodeForDimShapes; // extra code needed for initialization of dynamic parameters (e.g. number of non zero elements in NonZero operator)
+
 public:
    /**
        Default constructor. Needed to allow serialization of ROOT objects. See
@@ -108,6 +110,7 @@ public:
 
    void AddShapeTensor(const std::string & name, const std::vector<Dim> & shapeValues, bool scalar = false);
 
+   void AddExtraCodeForDimShapes(const std::string & code) { fExtraCodeForDimShapes += code; }
 
    // add and initialize subgraph to the model
    void InitializeSubGraph(std::shared_ptr<RModel>  graph);
@@ -239,7 +242,7 @@ public:
    bool UseVDT() const { return fUseVDT;}
 
    // Use the ClassDef macro to allow definition of custom streaming
-   ClassDefNV(RModel, 3);
+   ClassDefNV(RModel, 4);
 };
 
 // need to implement here templated member functions and its specialization
diff --git a/tmva/sofie/inc/TMVA/RModel_Base.hxx b/tmva/sofie/inc/TMVA/RModel_Base.hxx
index 29d4eaf8d7b76..891f67044b405 100644
--- a/tmva/sofie/inc/TMVA/RModel_Base.hxx
+++ b/tmva/sofie/inc/TMVA/RModel_Base.hxx
@@ -82,10 +82,8 @@ public:
    }
    void AddNeededStdLib(std::string libname)
    {
-      static const std::unordered_set<std::string> allowedStdLib = {"vector", "algorithm", "cmath", "memory", "span"};
-      if (allowedStdLib.find(libname) != allowedStdLib.end()) {
-         fNeededStdLib.insert(libname);
-      }
+      // if the library is already in the set, insert does nothing, so we don't need to check before inserting
+      fNeededStdLib.insert(libname);
    }
    void AddNeededCustomHeader(std::string filename)
    {
diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx
index f23be2f3ac057..9a1464f6d449a 100644
--- a/tmva/sofie/inc/TMVA/ROperator.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator.hxx
@@ -45,7 +45,7 @@ protected:
    const std::string SP = "   ";    ///< space used to correctly indent the generated C++ code
    bool fUseSession = false;        ///< flag to identify if using the session class
    bool fIsOutputConstant = false;  ///< flag to identify if operator has a constant output (no need to generate code)
-   bool fIsOutputParamShape = false;     ///< flag to identify of the output represents a parametric shape (can be knwon at compile time)
+   bool fIsOutputParamShape = false;     ///< flag to identify of the output represents a parametric shape (can be known at compile time)
 
    mutable std::vector<std::string_view> fInputTensorNames;
    mutable std::vector<std::string_view> fOutputTensorNames;
diff --git a/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx b/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx
index da18363e50e81..e6f0dccbf81ba 100644
--- a/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx
@@ -209,8 +209,8 @@ public:
                          << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(dataY) << std::endl;
             }
          } else if (((model.IsShapeTensor(fNA) && model.IsShapeTensor(fNB)) ||
-                    (model.IsShapeTensor(fNA) && model.IsConstantTensor(fNB)) ||
-                    (model.IsShapeTensor(fNB) && model.IsConstantTensor(fNA)))
+                    (model.IsShapeTensor(fNA) && model.IsInitializedTensor(fNB)) ||
+                    (model.IsShapeTensor(fNB) && model.IsInitializedTensor(fNA)))
                      && (fShapeA.size() <=1 && fShapeB.size() <=1 &&  model.GetTensorType(fNA) == ETensorType::INT64)) {
             // case of shape tensors ( tensors are of rank 0 or 1  )
             std::vector<Dim> dimValA;
@@ -235,9 +235,9 @@ public:
                      dimValues[i] = Dim{ static_cast<size_t>(data[0])};
                }
             };
-            if (model.IsConstantTensor(fNA)) {
+            if (model.IsInitializedTensor(fNA)) {
                convertDataToDim(fNA,fShapeA,dimValA);
-            } else if (model.IsConstantTensor(fNB)) {
+            } else if (model.IsInitializedTensor(fNB)) {
                convertDataToDim(fNB,fShapeB,dimValB);
             }
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
index cace65040c772..85f7ac40e6aac 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
@@ -66,8 +66,9 @@ public:
       if (!fIsOutputConstant)
          model.AddIntermediateTensor(fNY, fType, fShape);
       if (model.Verbose()) {
-         std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << ConvertTypeToString(fType) << " for " << fNY
-                  << " shape " << ConvertDimShapeToString(fShape);
+         std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << ConvertTypeToString(fType);
+         if (fType == ETensorType::BOOL) std::cout << " (converted from BOOL) ";
+         std::cout << " for " << fNY << " shape " << ConvertDimShapeToString(fShape);
          if (fIsOutputConstant) std::cout << " (constant) ";
          std::cout << std::endl;
       }
@@ -87,7 +88,11 @@ public:
 
       out << SP << "for (int id = 0; id < " << length << " ; id++){\n";
 
-      out << SP << SP << "tensor_" << fNY << "[id] = static_cast<"<< ConvertTypeToString(fType) << ">(tensor_" << fNX << "[id]);\n";
+      // need to handle bool case separatly since casting to uint8 will not give right result
+      if (fType == ETensorType::BOOL)
+         out << SP << SP << "tensor_" << fNY << "[id] = (tensor_" << fNX << "[id] != 0) ? 1 : 0;\n";
+      else
+         out << SP << SP << "tensor_" << fNY << "[id] = static_cast<"<< ConvertTypeToString(fType) << ">(tensor_" << fNX << "[id]);\n";
 
       out << SP << "}\n";
       return out.str();
diff --git a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
index aaef31eff98f3..75b764c3294b3 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
@@ -24,6 +24,7 @@
          std::vector<std::string> fInputs;
          std::string fOutput;
          std::vector<Dim>fOutputShape;
+         std::vector<Dim> fOutputShapeData; // in case output is a shape tensor we store here the output shape value data (can be parametric)
          std::vector<std::vector<Dim>> fInputShapes;
 
      public:
@@ -170,82 +171,125 @@
          }
 
          void Initialize(RModel& model) override {
+            std::vector<std::vector<size_t>> inputIntShapes;
             for (auto &it : fInputs) {
                if (model.CheckIfTensorAlreadyExist(it) == false) {
                   throw std::runtime_error("TMVA SOFIE Concat Op Input Tensor " + it + " is not found in model");
                }
                fInputShapes.push_back(model.GetDimTensorShape(it));
+               if (!model.IsDynamicTensor(it)) {
+                  inputIntShapes.push_back(ConvertShapeToInt(fInputShapes.back()));
+               }
+            }
+            if (inputIntShapes.size() == fInputs.size()) {
+               // if all input shapes are static we can compute output shape at initialization time
+               auto outputIntShape = ShapeInference(inputIntShapes)[0];
+               fOutputShape = ConvertShapeToDim(outputIntShape);
+               if (model.Verbose())
+                  std::cout << "Initialize Concat operator with defined inputs shapes, "
+                           << "output has shape " << ConvertShapeToString(outputIntShape) << std::endl;
+
+            } else {
+               // if at least one input shape is dynamic we need to compute output shape using the symbolic expression for the dimensions
+               fOutputShape = ShapeInference(fInputShapes, model);
+               if (model.Verbose())
+                  std::cout << "Initialize Concat operator with dynamic inputs shapes, "
+                           << "output has shape " << ConvertDimShapeToString(fOutputShape) << std::endl;
             }
-            fOutputShape = ShapeInference(fInputShapes, model);
-            if (model.Verbose())
-               std::cout << "Output of concat operator has shape " << ConvertDimShapeToString(fOutputShape) << std::endl;
 
             // check if concat has constant inputs , axis 0(concat contigous memory and type is integer)
             bool isOutputShape = false;
-            if (model.GetTensorType(fInputs[0]) == ETensorType::INT64 && fAxis == 0) {
-               fIsOutputConstant = true;
-               isOutputShape = true;
 
-               for ( auto & input : fInputs) {
-                  if (!model.IsInitializedTensor(input)) {
-                     fIsOutputConstant = false;
-                     if (!model.IsShapeTensor(input)) {
-                        isOutputShape = false;
-                        break;
-                     }
-                  }
+            // if (model.GetTensorType(fInputs[0]) == ETensorType::INT64 && fAxis == 0) {
+            fIsOutputConstant = true;
+            isOutputShape = true;
+
+            for (auto &input : fInputs) {
+               if (model.IsDynamicTensor(input)) {
+                  fIsOutputConstant = false;
+                  isOutputShape = false;
+                  break;
                }
-               if (fIsOutputConstant) {
-                  auto outputShape = ConvertShapeToInt(fOutputShape);  // conversion must be possible
-                  std::vector<int64_t> outputData(ConvertShapeToLength(outputShape));
-                  size_t offset = 0;
-                  for ( auto & input : fInputs) {
-                     auto inputData = static_cast<int64_t*>(model.GetInitializedTensorData(input).get());
-                     auto inputShape = model.GetTensorShape(input); // shape is not dynamic if it is constant
-                     size_t inputLength = ConvertShapeToLength(inputShape);
-                     std::copy(inputData, inputData + inputLength, outputData.begin() + offset );
-                     offset += inputLength;
-                     // the data of the input tensor don't need to be written in the generated code and data file
-                     model.SetNotWritableInitializedTensor(input);
-                  }
-                  model.AddConstantTensor<int64_t>(fOutput, outputShape, outputData.data());
-                  if (model.Verbose()) {
-                     std::cout << "output of Concat is a constant tensor " << ConvertShapeToString(outputShape) << " : "
-                     << ConvertValuesToString(outputData) << " (constant)" << std::endl;
-                  }
-               } else if (isOutputShape) {
-                  auto outputShape = ConvertShapeToInt(fOutputShape);  // conversion must be possible
-                  std::vector<Dim> outputData(ConvertShapeToLength(outputShape));
-                  size_t offset = 0;
-                  for ( auto & input : fInputs) {
-                     std::vector<Dim> inputData;
-                     auto inputShape = model.GetTensorShape(input); // shape is not dynamic
-                     size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar
-                     if (model.IsShapeTensor(input)) {
-                        inputData = model.GetShapeTensorValues(input);
-                     } else if (model.IsInitializedTensor(input)) {
-                        inputData.resize(inputLength);
-                        auto intData = static_cast<int64_t*>(model.GetInitializedTensorData(input).get());
-                        for (size_t i = 0; i < inputData.size(); i++)
-                           inputData[i] = Dim{ static_cast<size_t>(intData[i])};
-                     }
-                     else {
-                        // this should not happen
-                        throw std::runtime_error("TMVA SOFIE Concat Operator- invalid input type for shape output type");
+               if (!model.IsInitializedTensor(input)) {
+                  if (model.IsShapeTensor(input)) {
+                     // if it is a shape tensor we can have constant output if the shapes are defined)
+                     auto shapeData = model.GetShapeTensorValues(input);
+                     bool isShapeFullyDefined = ConvertShapeToInt(shapeData).size() == shapeData.size();
+                     if (!isShapeFullyDefined) {
+                        fIsOutputConstant = false;
+                     } else {
+                        // if shape is fully defined we can consider output as constant and we can compute the output
+                        // shape at initialization time
+                        fIsOutputConstant = fIsOutputConstant && true;
                      }
-                     std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset );
-                     offset += inputLength;
+                     // inputs are then shape tensors and output is a shape tensor
+                     isOutputShape = true;
+                  } else {
+                     // case of standard intermediate tensor
+                     fIsOutputConstant = false;
+                     isOutputShape = false;
+                     break;
                   }
-                  // add output tensor
-                  model.AddShapeTensor(fOutput,outputData, false); // cannot be a  scalar
-                  if (model.Verbose()) {
-                     std::cout << "output of Concat is a shape tensor " << ConvertShapeToString(outputShape) << " : "
-                     << ConvertDimShapeToString(outputData) << " (shape)" <<  std::endl;
+               } else {
+                  fIsOutputConstant = fIsOutputConstant && true;
+               }
+            }
+            //}
+
+            if (fIsOutputConstant) {
+               auto outputShape = ConvertShapeToInt(fOutputShape); // conversion must be possible
+               std::vector<int64_t> outputData(ConvertShapeToLength(outputShape));
+               size_t offset = 0;
+               for (auto &input : fInputs) {
+                  auto inputData = static_cast<int64_t *>(model.GetInitializedTensorData(input).get());
+                  auto inputShape = model.GetTensorShape(input); // shape is not dynamic if it is constant
+                  size_t inputLength = ConvertShapeToLength(inputShape);
+                  std::copy(inputData, inputData + inputLength, outputData.begin() + offset);
+                  offset += inputLength;
+                  // the data of the input tensor don't need to be written in the generated code and data file
+                  model.SetNotWritableInitializedTensor(input);
+               }
+               model.AddConstantTensor<int64_t>(fOutput, outputShape, outputData.data());
+               if (model.Verbose()) {
+                  std::cout << "output of Concat is a constant tensor " << ConvertShapeToString(outputShape) << " : "
+                            << ConvertValuesToString(outputData) << " (constant)" << std::endl;
+               }
+            } else if (isOutputShape) {
+               auto outputShape = ConvertShapeToInt(fOutputShape); // conversion must be possible
+               if (outputShape.size() != 1)
+                  throw std::runtime_error("TMVA SOFIE Concat Op - output shape for shape tensor must have rank 1");
+               // output shape is a rank 1 tensor with size equal to the output rank
+               std::vector<Dim> outputData(outputShape[0]);
+               size_t offset = 0;
+               for (auto &input : fInputs) {
+                  std::vector<Dim> inputData;
+                  auto inputShape = model.GetTensorShape(input);         // shape is not dynamic
+                  size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar
+                  if (model.IsShapeTensor(input)) {
+                     inputData = model.GetShapeTensorValues(input);
+                  } else if (model.IsInitializedTensor(input)) {
+                     inputData.resize(inputLength);
+                     auto intData = static_cast<int64_t *>(model.GetInitializedTensorData(input).get());
+                     for (size_t i = 0; i < inputData.size(); i++)
+                        inputData[i] = Dim{static_cast<size_t>(intData[i])};
+                  } else {
+                     // this should not happen
+                     throw std::runtime_error("TMVA SOFIE Concat Operator- invalid tensor input " + input +
+                                              " for shape output type");
                   }
-                  fIsOutputConstant = true;
+                  std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset);
+                  offset += inputLength;
                }
+               // add output tensor
+               model.AddShapeTensor(fOutput, outputData, false); // cannot be a  scalar
+               fOutputShapeData = outputData;
+               if (model.Verbose()) {
+                  std::cout << "output of Concat is a shape tensor " << ConvertShapeToString(outputShape) << " : "
+                            << ConvertDimShapeToString(outputData) << " (shape)" << std::endl;
+               }
+               fIsOutputParamShape = true;
             }
-            if (!fIsOutputConstant) {
+            if (!fIsOutputConstant && !fIsOutputParamShape) {
                model.AddIntermediateTensor(fOutput, model.GetTensorType(fInputs[0]), fOutputShape);
                if (model.Verbose()) {
                   std::cout << "Concat ---> " << fOutput << " " <<  ConvertDimShapeToString(fOutputShape) << std::endl;
@@ -260,8 +304,14 @@
 
             if (fIsOutputConstant) return out.str();
 
-            if(fOutputShape.empty()){
-                  throw std::runtime_error("TMVA SOFIE Concat called to Generate without being initialized first");
+            if (fIsOutputParamShape) {
+               // output is a shape tensor defined by the concatenation of the input shapes
+               out << "// output is a shape tensor defined by the concatenation of the input shapes\n";
+               for (int i = 0; i < static_cast<int>(fOutputShape
+                  [0].dim); i++) {
+                  out << SP << "tensor_" << fOutput << "[" << i << "] = " << fOutputShapeData[i] << ";\n";
+               }
+               return out.str();
             }
             // special case when memory is contiguous
             bool hasShapeOnes = true;
diff --git a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
index ad91d1256ded1..fafe0dd63ae92 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
@@ -26,11 +26,13 @@ private:
    std::vector<Dim> fShapeX;
    std::vector<Dim> fShapeIndices;
    std::vector<Dim> fShapeY;
+   std::vector<Dim> fOutputShapeData;
 
    std::vector<int64_t> fIndices;  // indices vector in case they are known at initialization
 
    std::string fType;
 
+
 public:
    ROperator_Gather(){}
    ROperator_Gather(int64_t attrAxis, std::string nameX, std::string nameIndices, std::string nameY):
@@ -70,19 +72,27 @@ public:
       // case indices tensor is initialized
       if (model.IsInitializedTensor(fNIndices)) {
           // empty shape Indices is a scalar value for the indices
+         bool hasNegativeIndex = false;
          size_t indicesLength = ConvertShapeToLength(model.GetTensorShape(fNIndices));
-         int64_t* indicesData = static_cast<int64_t*>(model.GetInitializedTensorData(fNIndices).get());
+         int64_t* data = static_cast<int64_t*>(model.GetInitializedTensorData(fNIndices).get());
+         // copy in a vector since we may need to update the values in case of negative indices
+         fIndices =std::vector<int64_t>(data, data + indicesLength);
          // update indices data in case of negative dim values
          for (size_t i = 0; i < indicesLength; i++) {
             // move this at generation time?
             if (!fShapeX[fAttrAxis].isParam) {
-               if (indicesData[i] < 0) {
-                  indicesData[i] += fShapeX[fAttrAxis].dim;
+               if (fIndices[i] < 0) {
+                  hasNegativeIndex = true;
+                  fIndices[i] += fShapeX[fAttrAxis].dim;
                }
             }
          }
-         // Save in a vector gather Indices of size q
-         fIndices = std::vector<int64_t>(indicesData, indicesData + indicesLength);
+         // for negative indices we need to add an extra constant tensor
+         if (hasNegativeIndex) {
+            std::string nameIndicesUpdated = fNIndices + "_updated";
+            model.AddConstantTensor(nameIndicesUpdated, model.GetTensorShape(fNIndices), fIndices.data());
+            fNIndices = nameIndicesUpdated;
+         }
       }
       // Output shape
       if (model.Verbose())
@@ -121,17 +131,17 @@ public:
       else if (model.IsShapeTensor(fNX) && q <=1  && fIndices.size() > 0) {
          auto inputData = model.GetShapeTensorValues(fNX);
          // if r == 1 and q<=1 then output length is 1 (is a scalar or tensor of size1)
-         std::vector<Dim> outputData(1);
-         outputData[0] = inputData[fIndices[0]];
-         if (outputData[0].isParam) {
-            fIsOutputConstant = true;
+         fOutputShapeData.resize(1);
+         fOutputShapeData[0] = inputData[fIndices[0]];
+         if (fOutputShapeData[0].isParam) {
+            fIsOutputParamShape = true;
             // shapeY can be scalar or vector of size1
-            model.AddShapeTensor(fNY, outputData, fShapeY.size() == 0);
+            model.AddShapeTensor(fNY, fOutputShapeData, fShapeY.size() == 0);
             if (model.Verbose())
                std::cout << "Gather: " << fNX << " " << ConvertDimShapeToString(fShapeX) << " -> " << fNY << " with shape " << ConvertDimShapeToString(fShapeY)
-                   << " and values " << ConvertDimShapeToString(outputData) << " (shape) " << std::endl;
+                   << " and values " << ConvertDimShapeToString(fOutputShapeData) << " (shape) " << std::endl;
          } else {
-            int64_t value = static_cast<int64_t>(outputData[0].dim);
+            int64_t value = static_cast<int64_t>(fOutputShapeData[0].dim);
             auto shapeY = ConvertShapeToInt(fShapeY);
             model.AddConstantTensor(fNY, shapeY, &value);
             fIsOutputConstant = true;
@@ -140,7 +150,7 @@ public:
                    << " and values {" << value <<  "} (constant) " << std::endl;
          }
       }
-      if (!fIsOutputConstant) {
+      if (!fIsOutputConstant && !fIsOutputParamShape) {
          // Add output tensor
          model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
          fType = ConvertTypeToString(model.GetTensorType(fNX));
@@ -159,6 +169,14 @@ public:
          out << "//--------------------(constant)----------\n";
          return out.str();
       }
+      if (fIsOutputParamShape) {
+         // no code to generate here for param shape output. Tensor output is defined in Session constructor
+         out << "//--------------------(shape)----------\n";
+         for (int i = 0; i < static_cast<int>(fOutputShapeData.size()); i++) {
+            out << SP << "tensor_" << fNY << "[" << i << " ] = " << fOutputShapeData[i].GetVal() << ";\n";
+         }
+         return out.str();
+      }
       // The shape of the output is q + r - 1
       size_t r = fShapeX.size();
       // Indices of shape q
diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
index c8ea219f4e228..7b8a2e989427a 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
@@ -25,6 +25,7 @@ namespace SOFIE{
    private:
       bool fIsDynamic = false;
       bool fBroadcastBias = false;
+      bool fCheckBiasShapeAtRuntime = false; // flag to identify the need to do a run time check of bias shape compatibility in case of dynamic shapes and uni-directional broadcasting
 
       float fAttrAlpha = 1.0;
       float fAttrBeta = 1.0;
@@ -232,9 +233,13 @@ namespace SOFIE{
                fBroadcastBias = true;
                // check if broadcasting is compatible and note that prepend 1 to shapeC
                auto r = UTILITY::MultidirectionalBroadcastShape(fShapeY, fDimShapeC);
-               // return flag must be equal to 1 since this is a unidirectional broadcast of C->Y
-               if (r.first > 1) {
+               // return flag must not have bit equal to 2 since this is a unidirectional broadcast of C->Y
+               //
+               if ((r.first & 2) == 2) {
                   throw std::runtime_error("TMVA SOFIE Gemm Op - bias tensor of shape " + ConvertDimShapeToString(fDimShapeC) + " cannot be uni-directional broadcasted to " + ConvertDimShapeToString(fShapeY));
+               } else if (r.first  == 4) {
+                  // we need to do a run time check of bias shape if it is compatible
+                  fCheckBiasShapeAtRuntime = true;
                }
                fShapeC = ConvertShapeToInt(fDimShapeC);
             }
@@ -273,9 +278,9 @@ namespace SOFIE{
       std::string Generate(std::string opName) override {
          opName = "op_" + opName;
 
-         if (fShapeA.empty() || fShapeB.empty() || fShapeY.empty() || (fNC != "" && fShapeC.empty())) {
-            throw std::runtime_error("TMVA SOFIE Gemm Op called to Generate without being initialized first");
-         }
+         // if (fShapeA.empty() || fShapeB.empty() || fShapeY.empty() || (fNC != "" && fShapeC.empty())) {
+         //    throw std::runtime_error("TMVA SOFIE Gemm Op called to Generate without being initialized first");
+         // }
          std::stringstream out;
          out << "\n//--------- Gemm " << opName << " " << ConvertDimShapeToString(fShapeA) << " * " << ConvertDimShapeToString(fShapeB)
              << " -> " << ConvertDimShapeToString(fShapeY) << "\n";
@@ -344,7 +349,9 @@ namespace SOFIE{
             //in this case fAttrBeta needs to be equal to zero otherwise second time we run we will use
             // the previous result
             if (fAttrBeta != 0) {
-               throw std::runtime_error("TMVA SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero");
+               // some model don't have bias but Beta is not zero - force it to zero
+               fAttrBeta = 0;
+               std::cout << "WARNING: TMVA SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero - force it to zero\n";
             }
          }
 
@@ -371,6 +378,47 @@ namespace SOFIE{
          bool extraA = (doStackMul && lengthExtra_A != "1");
          bool extraB = (doStackMul && lengthExtra_B != "1");
          bool extraC = (doStackMul && haveExtraC && !fBroadcastBias);
+         // run time check for bias broadcasting
+         std::string biasShapeType = opName + "_biasShapeType";
+         if (fBroadcastBias && fCheckBiasShapeAtRuntime) {
+            // create a flag according to bias shape:
+            // = 1 for (1,Y2)
+            // = 2 for (Y1,1)
+            // = 3 for a scalar
+            out << SP << "int " << biasShapeType << " = 0;\n";
+            // case vector of columns
+            if (sC[0].GetVal() != "1" && sC[1].GetVal() != sY[1].GetVal())
+               out << SP << "if (" << sC[0] << " == 1 && " << sC[1] << " == " << sY[1] << ")\n";
+            else if (sC[0].GetVal() == "1")
+               out << SP << "if (" << sC[1] << " == " << sY[1] << ")\n";
+            else if (sC[1].GetVal() == sY[1].GetVal())
+               out << SP << "if (" << sC[0] << " == 1)\n";
+
+            out << SP << SP << biasShapeType << " = 1;\n";
+
+            // case vector of rows
+            if (sC[1].GetVal() != "1" && sC[0].GetVal() != sY[0].GetVal())
+               out << SP << "else if (" << sC[1] << " == 1 && " << sC[0] << " == " << sY[0] << ")\n";
+            else if (sC[1].GetVal() == "1")
+                out << SP << "else if (" << sC[0] << " == " << sY[0] << ")\n";
+            else if (sC[0].GetVal() == sY[0].GetVal())
+               out << SP << "else if (" << sC[1] << " == 1)\n";
+
+            out << SP << SP << biasShapeType << " = 2;\n";
+
+            // case scalar
+            if (sC[0].GetVal() != "1" && sC[1].GetVal() != "1")
+               out << SP << "else if (" << sC[0] << " == 1 && " << sC[1] << " == 1 )\n";
+            else if (sC[0].GetVal() == "1")
+               out << SP << "else if (" << sC[1] << " == 1)\n";
+            else if (sC[1].GetVal() == "1")
+               out << SP << "else if (" << sC[0] << " == 1)\n";
+            out << SP << SP << biasShapeType << " = 3;\n";
+            out << SP << "else\n";
+            out << SP << SP << "throw std::runtime_error(\"TMVA SOFIE Gemm Op - bias tensor "
+                                 << ConvertDimShapeToString(fDimShapeC) << " cannot be broadcasted to "
+                                 << ConvertDimShapeToString(fShapeY) << "\");\n";
+         }
          auto SP2 = SP;
          if (doStackMul) {
             out << SP << "size_t " << opName << "_y_offset = 0;\n"; // needed if we stack the gemm operations
@@ -388,9 +436,11 @@ namespace SOFIE{
          if (fBroadcastBias) {
 
             fAttrBeta = 1.;
+
+            // loop on first output dimension
             out << SP2 << "for (size_t j = 0; j < " << sY[0] << "; j++) { \n";
             out << SP2 << SP << "size_t y_index = ";
-            if (doStackMul) // add offset in caseof stack multiplications (not sure if bias is present in these cases)
+            if (doStackMul) // add offset in case of stack multiplications (not sure if bias is present in these cases)
                out <<  opName << "_y_offset + ";
             if (sY[1].GetVal() != "1")
                out << sY[1] << " * j;\n";
@@ -408,6 +458,16 @@ namespace SOFIE{
             } else if (sC[0].GetVal() == "1" && sC[1].GetVal() == "1") {
                // scalar case
                out << prefix << "Fill(" << target << " + y_index, tensor_" << fNC << "[0], " << sY[1] << ");\n";
+            } else if (fCheckBiasShapeAtRuntime) {
+               // in the generic dynamic case we check at run time that bias is compatible
+               // we check that bias[0] = 1 or equal to SY[0] and that bias[1] = 1 or equal to SY[1]
+               // tbd: this run-time check coul;d be moved outside the loop for better run time efficiency
+               out << SP2 << SP << "if (" << biasShapeType << " == 1)\n";   // case vector of columns
+               out << SP << prefix << "Copy(" << target << " + y_index, tensor_" << fNC << ", " << sY[1] << ");\n";
+               out << SP2 << SP << "else if (" << biasShapeType << " == 2)\n";  // case vector of rows
+               out << SP << prefix << "Fill(" << target << " + y_index, tensor_" << fNC << "[j], " << sY[1] << ");\n";
+               out << SP2 << SP << "else \n";  // scalar case
+               out << SP << prefix << "Fill(" << target << " + y_index, tensor_" << fNC << "[0], " << sY[1] << ");\n";
             } else {
                throw std::runtime_error("TMVA SOFIE Gemm Op - invalid shape for bias tensor " + ConvertDimShapeToString(fDimShapeC));
             }
diff --git a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx
index 8587035f8d44b..0aebf5b14309b 100644
--- a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx
@@ -19,6 +19,7 @@ private:
 
    std::string fNX;
    std::string fNY;
+   std::string fNonZeroParam; // name of the parameter used to store the number of non zero elements when output is not constant
    std::vector<Dim> fShapeX;
    std::vector<Dim> fShapeY;
 
@@ -93,7 +94,15 @@ public:
          fShapeY[0] = fShapeX.size();
 
          // identify as -1 since we will declare maximum as size of input
-         fShapeY[1] = Dim{std::string("v_NonZero_") + fNX, static_cast<size_t>(-1)};
+         // we will compute at run time the actual number of non zero and rearrange the output vector accordingly
+         fNonZeroParam = "v_NonZero_" + fNX;
+         fShapeY[1] = Dim{fNonZeroParam, static_cast<size_t>(-1)};
+
+         // declare the parameter for number of non zero elements, used when output is not constant
+         auto inputLength = ConvertDimShapeToLength(fShapeX);
+         std::string codeDecl = SP + "size_t " + fNonZeroParam + " = " + inputLength + ";\n";
+         codeDecl += SP + "fV_NonZero_" + fNX + " = " + fNonZeroParam + ";\n";
+         model.AddExtraCodeForDimShapes(codeDecl);
 
          model.AddIntermediateTensor(fNY, ETensorType::INT64, fShapeY);
          if (model.Verbose()) {
@@ -104,13 +113,12 @@ public:
 
    std::string GenerateSessionMembersCode(std::string /*opName*/) override {
       if (fIsOutputConstant) return "";
-      // define output value used as max non zero with max size = input shape * N
-      auto inputLength = ConvertDimShapeToLength(fShapeX);
       std::stringstream out;
-      out << SP << "size_t fV_NonZero_" << fNX << " = " << inputLength << ";\n";
+      out << SP << "size_t fV_NonZero_" << fNX << " = 0;\n";
       return out.str();
    }
 
+
    std::string Generate(std::string opName) override {
       if (fIsOutputConstant) {
          return "";
@@ -127,9 +135,9 @@ public:
          inputLength = ConvertShapeToLength(intShapeX);
 
       size_t dims = fShapeX.size();
-      out << "\n//------ NonZero\n";
+      out << "\n//------ NonZero  -> " << ConvertDimShapeToString(fShapeY) << "\n";
 
-      std::string vnonzero = "v_NonZero_" + fNX;
+      std::string vnonzero = fNonZeroParam;
 
       // loop on input indices
       out << SP << "size_t offset_" << opName << " = 0;\n";
diff --git a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
index 4168144f2e708..dee859978e76a 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
@@ -32,11 +32,12 @@ private:
 
    std::string fNData;        // input data tensor name
    std::string fNInput2;       // reshape or axes tensor name depending on operator
-   std::string fNOutput;               // output tensor name
-   std::vector<Dim> fShapeInput;     // input shape data
+   std::string fNOutput;            // output tensor name
+   std::vector<Dim> fShapeInput;    // input shape data
    std::vector<Dim> fShapeOutput;   // output shape data
-   std::vector<int64_t> fAttrAxes;         // axes attributes (provided for all version of Squeeze/Unsqueeze)
-   std::vector<int64_t> fShape;     // shape tensor values provided for Reshape
+   std::vector<Dim> fOutputShapeData; // in case output is a shape tensor we store here the shape value data (can be parametric)
+   std::vector<int64_t> fAttrAxes;  // axes attributes (provided for all version of Squeeze/Unsqueeze)
+   std::vector<int64_t> fShape;     // shape tensor values provided for Reshape for int shapes4
 
 public:
 
@@ -74,34 +75,44 @@ public:
       fOutputTensorNames = { fNOutput };
    }
 
-   // output type is same as input
-   std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
-      auto ret = std::vector<ETensorType>(1, input[0]);
-      return ret;
-   }
-   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override {
-      return input;
-   }
 
    // output shape
-   std::vector<std::vector<Dim>> ShapeInference(const std::vector<std::vector<Dim>> & input)  {
-      std::vector<std::vector<Dim>> ret;
-      auto & input_shape = input[0];
+   std::vector<Dim> DoShapeInference(const std::vector<Dim> & input_shape, const std::vector<Dim> & target_shape)  {
       if (fOpMode == Reshape) {
          // correct the provided shape (here we have the value) for 0 or -1
-         std::vector<Dim> output_shape(fShape.size());
-         assert(!fShape.empty() && !fDynamicShape);
+         // the target_shape can be a scalar in case of not present shape input tensor
+         std::vector<Dim> output_shape = target_shape;
+         bool hasMinusOne = false;
+         bool hasZero = false;
          for (size_t i = 0; i < output_shape.size(); i++) {
-            if (fShape[i] > 0 || (fAllowZero && fShape[i] >= 0))
-               output_shape[i] = Dim{ static_cast<size_t>(fShape[i]) };
-            else if (!fAllowZero && fShape[i] == 0)
-               output_shape[i] = input_shape[i];
+            // case for zero values in given shape: in this case we take the corresponding value from input shape
+            if (!output_shape[i].isParam) {
+               if (output_shape[i].dim == 0) {
+                  hasZero = true;
+                  if (fAllowZero)
+                     output_shape[i] = Dim{0};
+                  else {
+                     if (i > 0 && output_shape.size() != input_shape.size())
+                        std::cout << "WARNING: TMVA Reshape Op : output shape has zero value at index " << i <<
+                                  " but input shape has a different rank than output shape" << std::endl;
+                     if (i >= input_shape.size())
+                        throw std::runtime_error("TMVA Reshape Op : output shape has zero value at index " + std::to_string(i) +
+                              " but input shape does not have corresponding index");
+                     }
+                     output_shape[i] = input_shape[i];
+               } else if (output_shape[i].dim == static_cast<size_t>(-1)) {
+                  hasMinusOne = true;
+               }
+            }
+         }
+         if (hasZero && hasMinusOne) {
+            throw std::runtime_error("TMVA Reshape Op : zero value in shape is not allowed when there is also a -1 in shape");
          }
-         // now case of -1 in shape
+         // now case of -1 in shape - we can infer the value of -1 from all other values
          for (size_t i = 0; i < output_shape.size(); i++) {
-            if (fShape[i] == -1) {
+            if (output_shape[i] == static_cast<size_t>(-1) && !output_shape[i].isParam) {
                auto tmp = output_shape;
-               tmp.erase(tmp.begin() + i);
+               tmp.erase(tmp.begin() + i); // erase -1 value to compute the length of the other dimensions
                auto tmp_length = ConvertDimShapeToLength(tmp);
                auto input_length = ConvertDimShapeToLength(input_shape);
                if (fVerbose)
@@ -174,7 +185,7 @@ public:
             throw std::runtime_error("TMVA Reshape Op : Invalid  shapes : " + ConvertDimShapeToString(input_shape) +
                                      ConvertDimShapeToString(output_shape));
          }
-         ret.push_back(output_shape);
+         return output_shape;
 
       } else if (fOpMode == Flatten) {
          // flatten case
@@ -185,7 +196,7 @@ public:
          auto l1 = ConvertDimShapeToLength(s1);
          auto l2 = ConvertDimShapeToLength(s2);
          std::vector<Dim> newShape = {Dim{l1}, Dim{l2}};
-         ret.push_back(newShape);
+         return newShape;
       } else if (fOpMode == Squeeze) {
          // squeeze
          // assume no axis is provided - remove all axes with value equal to 1
@@ -200,10 +211,8 @@ public:
                }
             }
          } else {
-            std::cout << "getting shape for Squeeze...from attribute\n";
             auto axes = fAttrAxes;
             for (size_t i = 0; i < axes.size(); i++) {
-               std::cout << i << "  " << axes[i] << std::endl;
                if (axes[i] < 0)
                   axes[i] += input_shape.size();
                if (!(output_shape[axes[i]] == Dim{1}))
@@ -213,13 +222,10 @@ public:
             // for calling vector::erase we must sort axes in decreasing order to avoid
             std::sort(axes.begin(), axes.end(), std::greater<int>());
             for (auto & axis : axes) {
-               std::cout << "erase give axis " << axis << "  -> ";
-               for (auto & o : output_shape) std::cout << o << " , ";
-               std::cout << std::endl;
                output_shape.erase(output_shape.begin() + axis);
             }
          }
-         ret.push_back(output_shape);
+         return output_shape;
       }
       else if (fOpMode == Unsqueeze) {
          // unsqueeze
@@ -227,7 +233,7 @@ public:
          auto output_shape = input_shape;
          auto &axes = fAttrAxes;
          // output rank
-         int64_t r = input[0].size() + axes.size();
+         int64_t r = input_shape.size() + axes.size();
          for (auto &a : axes) {
             int64_t i = static_cast<int64_t>(a);
             if (i < -r || i > r - 1)
@@ -238,16 +244,18 @@ public:
                // negative axes
                output_shape.insert(output_shape.end() + i + 1, Dim{1});
          }
-         ret.push_back(output_shape);
+         return output_shape;
       }
-      return ret;
+      throw std::runtime_error("TMVA Reshape Op : Invalid ReshapeOpMode");
+      return {Dim{}};
    }
 
    void Initialize(RModel& model) override {
 
       fVerbose = model.Verbose();
       if (fVerbose)
-         std::cout << "initialize reshape op type " << fOpMode << " - " << fNInput2 << " " << fNData << std::endl;
+         std::cout << "initialize reshape op type " << fOpMode << " -  for input " << fNData
+                   << " to shape given by " << fNInput2 << std::endl;
 
       if (model.CheckIfTensorAlreadyExist(fNData) == false) {
           // input must be a graph input, or already initialized intermediate tensor
@@ -272,16 +280,22 @@ public:
                else
                   fAttrAxes = std::vector<int64_t>(values, values + n);
 
-               fShapeOutput = ShapeInference({fShapeInput})[0];
+               std::vector<Dim> targetShape(fShape.begin(),fShape.end());
+               fShapeOutput = DoShapeInference(fShapeInput, targetShape);
                // set flag to not write tensor in weight file. Its data will be hard-coded in way model is constructed
                model.SetNotWritableInitializedTensor(fNInput2);
             } else if (model.IsShapeTensor(fNInput2)) {
                auto shapeData = model.GetShapeTensorValues(fNInput2);
-               fShapeOutput = shapeData;
+               fShapeOutput = DoShapeInference(fShapeInput, shapeData);
+               if (model.Verbose())
+                  std::cout << "Reshape op - get output shape from shape tensor " << fNInput2 << " with value " << ConvertDimShapeToString(shapeData) << std::endl;
             } else {
                // we cannot get shape at initialization time but at run-time
                fDynamicShape = true;
                // size of shape output us given by size of shape input tensor
+               if (model.IsDynamicTensor(fNInput2)) {
+                  throw std::runtime_error("TMVA Reshape Op 2nd input Tensor " + fNInput2 + " cannot have dynamic shape");
+               }
                auto shapeInput2 = model.GetTensorShape(fNInput2);
                fShapeOutput.resize(shapeInput2[0]);
                for (size_t i = 0; i < fShapeOutput.size(); i++) {
@@ -293,10 +307,9 @@ public:
          }
       } else if (!fAttrAxes.empty()) {
          // case fNShape is empty and axes are provided as attributes (e.g. for Unsqueeze)
-         std::cout << "attribute axes exists\n";
-         fShapeOutput = ShapeInference({fShapeInput})[0];
+         fShapeOutput = DoShapeInference(fShapeInput, std::vector<Dim>{});
       } else if (fOpMode == Flatten || fOpMode == Squeeze) {
-         fShapeOutput = ShapeInference({fShapeInput})[0];
+         fShapeOutput = DoShapeInference(fShapeInput, std::vector<Dim>{});
       } else {
          throw std::runtime_error("TMVA Reshape Op : Invalid Input/Attribute data");
       }
@@ -313,14 +326,15 @@ public:
             ConvertValuesToString(ConvertShapeToLength(o_shape), inputData) << std::endl;
          }
       }
-      // for shape tensors we can have it if output shape is size==1 or a scalar
+      // for input shape tensors we can have it if output shape is size==1 or a scalar
       else if (model.IsShapeTensor(fNData) && fShapeOutput.size() <=1) {
-         fIsOutputConstant = true;
-         auto inputData = model.GetShapeTensorValues(fNData);
-         model.AddShapeTensor(fNOutput, inputData);
+         // not sure if we ever end-up here - maybe reshaping from scalar to vector or viceversa
+         fIsOutputParamShape = true;
+         fOutputShapeData = model.GetShapeTensorValues(fNData);
+         model.AddShapeTensor(fNOutput, fOutputShapeData);
          if (model.Verbose()) {
             std::cout << Name() << " : " << fNData << " " << ConvertDimShapeToString(fShapeInput) << " -->  " << fNOutput << " (shape) " << ConvertDimShapeToString(fShapeOutput)  << " : " <<
-            ConvertDimShapeToString(inputData) << std::endl;
+            ConvertDimShapeToString(fOutputShapeData) << std::endl;
          }
       }
       else {
@@ -332,7 +346,7 @@ public:
    }
 
    std::string Generate(std::string opName) override {
-      if (fIsOutputConstant) return "";  //no op for constant tensors
+
 
       std::stringstream out;
       std::string opType = "Reshape";
@@ -345,6 +359,17 @@ public:
 
       out << SP << "///--------" << opType << " operator " << opName << " --> " << ConvertDimShapeToString(fShapeOutput) << "\n";
 
+      if (fIsOutputConstant) return out.str();  //no op for constant tensors
+
+      if (fIsOutputParamShape) {
+          // no code to generate here for param shape output. Tensor output is defined in Session constructor
+         out << "//----------------output is a shape tensor----------\n";
+         for (int i = 0; i < static_cast<int>(fShapeOutput[0].dim); i++) {
+            out << SP << "tensor_" << fNOutput << "[" << i << " ] = " << fOutputShapeData[i].GetVal() << ";\n";
+         }
+         return out.str();
+      }
+
       // in case of dynamic output shape we need to set the shape value from input shape tensor
       // and take case of the zero values
       if (fDynamicShape) {
@@ -363,7 +388,8 @@ public:
       if (lengthOut != lengthIn) {
          // check needs to be done at run-time
          out << SP << "if (" << lengthOut << "!=" << lengthIn << ")\n";
-         out << "throw std::runtime_error(\"TMVA SOFIE Reshape Op : output lengths is different than input one\");\n";
+         out << SP << SP << "throw std::runtime_error(\"TMVA SOFIE Reshape " << opName << " output length "
+             << lengthOut << " is different than input one " << lengthIn << "\");\n";
       }
 
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
index d119fa3a29ea1..eb90c6ddb01d6 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
@@ -29,8 +29,10 @@ private:
    std::string fNData;        // input data tensor name
    std::string fNOutput;      // output data name
    std::vector<std::string> fNames;       // tensor names for meta(axis) information
-   std::vector<Dim> fShapeInput;     // input shape data
-   std::vector<Dim> fShapeOutput;   // output shape data
+   std::vector<Dim> fShapeInput;     // input shape
+   std::vector<Dim> fShapeOutput;   // output shape
+   std::vector<Dim> fOutputShapeData;   // output shape data in case output is a shape param tensor
+
    // saved Start/End.Steps are corrected from initial ONNX for negative/default values
    // and are available for each axis
    std::vector<Dim> fStart;         // starting values of slices for all axes
@@ -287,8 +289,11 @@ public:
          size_t outputSize = ConvertShapeToLength(ConvertShapeToInt(fShapeOutput));
          std::vector<int64_t> outputData(outputSize);
          std::vector<size_t> inputStride = UTILITY::ComputeStrideFromShape(ConvertShapeToInt(fShapeInput));
-         for (size_t ii = 0; ii< fStart.size(); ii++)
-            std::cout << fStart[ii] << "  " << fEnd[ii] << "  " << fSteps[ii] << std::endl;
+         if (model.Verbose()) {
+            std::cout << "Do slice for initialized input ..(start, end, step)\n";
+            for (size_t ii = 0; ii< fStart.size(); ii++)
+               std::cout << fStart [ii] << "  " << fEnd[ii] << "  " << fSteps[ii] << std::endl;
+         }
           // perform slice using a recursive function- need to use two lambda functions for this
          auto sliceRecursive = [&](size_t iaxis, size_t & outIdx, size_t & inOffset) {
             auto slice_impl = [&](size_t iax, size_t & outputIdx, size_t & inputOffset, auto & sliceRecImpl) {
@@ -300,14 +305,12 @@ public:
                   indices.push_back(i);
                if (iax == dim-1) { // last axis
                   for (size_t i = 0; i < indices.size(); i++) {
-                     std::cout << outputIdx << " , " << indices[i] << " " << inputOffset << " ; ";
                      outputData[outputIdx] = inputData[inputOffset + indices[i]];
                      outputIdx++;
                   }
                   return;
                } else {
                   for (size_t i = 0; i < indices.size(); i++) {
-                     std::cout << inputStride[iax] << " , " << indices[i] << " " << inputOffset << "  ";
                      size_t offset = inputOffset + inputStride[iax]*indices[i];
                      sliceRecImpl(iax+1, outputIdx, offset,sliceRecImpl);
                   }
@@ -325,6 +328,32 @@ public:
                      << ConvertValuesToString(outputData) << std::endl;
          }
       }
+      else if (model.IsShapeTensor(fNData) && !fStart[0].isParam && !fEnd[0].isParam) {
+         // case of input is a shape tensor. In this case rank=1 always, axis =0 and Slice is trivial
+         auto inputData = model.GetShapeTensorValues(fNData);
+         fOutputShapeData = std::vector<Dim>(inputData.begin() + fStart[0].dim, inputData.begin() + fEnd[0].dim);
+         // try to convert to integer values if possible
+         auto outputData = ConvertShapeToInt(fOutputShapeData);
+         fShapeOutput = { Dim{fOutputShapeData.size()}};
+         if (outputData.empty()) {
+            // is a param shape tensor
+            model.AddShapeTensor(fNOutput, fOutputShapeData);
+            fIsOutputParamShape = true;
+            if (model.Verbose()) {
+               std::cout << "Slice: output is a shape tensor -> " << fNOutput << "  " << ConvertDimShapeToString(fShapeOutput) << " with values "
+                        << ConvertDimShapeToString(fOutputShapeData) << " (shape)" << std::endl;
+            }
+         } else {
+            fIsOutputConstant = true;
+            std::vector<int64_t> data(outputData.size());
+            std::copy(outputData.begin(), outputData.end(), data.begin());
+            model.AddConstantTensor<int64_t>(fNOutput, {data.size()}, data.data());
+            if (model.Verbose()) {
+               std::cout << "Slice: output is a constant tensor -> " << fNOutput << "  " << ConvertDimShapeToString(fShapeOutput) << " with values "
+                        << ConvertDimShapeToString(fOutputShapeData) << " constant " << std::endl;
+            }
+         }
+      }
       else {
          // check if Slice is just an Identity operator in case start = 0, end = input_shape and step=1
          size_t ndim = fShapeInput.size();
@@ -339,7 +368,7 @@ public:
          }
 
          model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), fShapeOutput);
-         if (fIdentitySlice)  model.AddAliasTensor(fNOutput, fNData);
+         //if (fIdentitySlice)  model.AddAliasTensor(fNOutput, fNData);
 
          if (model.Verbose()) {
             std::cout << "Slice " << fNData << "  " << ConvertDimShapeToString(fShapeInput)
@@ -362,12 +391,21 @@ public:
       out << "///------- Slice operator " << opName << "---> " << fNOutput << " "
           << ConvertDimShapeToString(fShapeOutput) << "\n" << std::endl;
       if (fIsOutputConstant) return out.str();  //no op for constant tensors
+      if (fIsOutputParamShape) {
+         out << "/// Slice output is a shape tensor with values : " << ConvertDimShapeToString(fShapeOutput) << "\n";
+         // need to generate code assigning values to shape tensors
+         for (int i = 0; i < static_cast<int>(fShapeOutput[0].dim); i++) {
+                  out << SP << "tensor_" << fNOutput << "[" << i << "] = " << fOutputShapeData[i] << ";\n";
+         }
+         return out.str();
+      }
 
       size_t ndim = fShapeInput.size();
 
       if (fIdentitySlice) {
-         out << "/// Slice is just an identity (copy pointers) \n";
-         out << SP << "tensor_" << fNOutput << " = tensor_" << fNData << ";\n";
+         out << "/// Slice is just an identity (copy) \n";
+         //out << SP << "tensor_" << fNOutput << " = const_cast<" << ConvertTypeToString(fOutputType) << " *>(tensor_" << fNData << ");\n";
+         out << SP << "std::copy(tensor_" << fNData << ", tensor_" << fNData << " + " << ConvertDimShapeToLength(fShapeInput) << ", tensor_" << fNOutput << ");\n";
          return out.str();
       }
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx b/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx
index db79c2b6d0f7d..025d6d678088a 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Softmax.hxx
@@ -62,12 +62,14 @@ public:
       }
    }
 
-   std::string Generate(std::string OpName) override {
-      OpName = "op_" + OpName;
+   std::string Generate(std::string opName) override {
+      opName = "op_" + opName;
       if (fShape.empty()) {
          throw std::runtime_error("TMVA SOFIE Operator Softmax called to Generate without being initialized first");
       }
       std::stringstream out;
+       out << "///------- Softmax " << opName << " ---> "  // << fNY << " "
+           << ConvertDimShapeToString(fShape) << "\n" << std::endl;
       size_t size = fShape.size();
       auto length_str = ConvertDimShapeToLength(fShape);
       size_t axis = fAttrAxis < 0 ? size + fAttrAxis : fAttrAxis;
@@ -85,7 +87,7 @@ public:
             num_rows = "(" + length_str + ") / (" + axis_size + ")";
          }
 
-         out << "\n" << SP << "//------ SOFTMAX - " << size << "  " << length_str << "  " << axis << "\n";
+         out << SP << "//-----  softmax axis is last one - " << axis << "\n";
          out << SP << "for (int i = 0; i < " << num_rows << "; ++i) {\n";
          out << SP << SP << "size_t offset = i * " << axis_size << ";\n";
          out << SP << SP << fType << " const * x_ptr = &tensor_" << fNX << "[offset];\n";
@@ -111,6 +113,7 @@ public:
          out << SP << "}\n";
 
       } else {
+         // generic case for any axis
          auto stride = UTILITY::ComputeStrideFromShape(fShape);
          size_t k = 0;
          std::vector<std::string> l(size);
@@ -118,7 +121,7 @@ public:
             if (i != axis) {
                for (size_t j = 0; j < k; j++) out << SP;
                l[i] = std::string("i") + std::to_string(i);
-               out << "for (int " << l[i] << " = 0; " << l[i] << " < " << fShape[i] << "; " << l[i] << "++) {\n";
+               out << SP << "for (int " << l[i] << " = 0; " << l[i] << " < " << fShape[i] << "; " << l[i] << "++) {\n";
                k++;
             }
          }
@@ -167,7 +170,8 @@ public:
          out << "for (int i = 0; i < " << fShape[axis] << "; i++) {\n";
          for (size_t j = 0; j < size; j++) out << SP;
          out << "size_t id = index + i";
-         if (stride[axis].GetVal() != "1") out << "*(" << stride[axis] << ");\n";
+         if (stride[axis].GetVal() != "1") out << "*(" << stride[axis] << ")";
+         out << ";\n";
          for (size_t j = 0; j < size; j++) out << SP;
          out << "tensor_" << fNY << "[id] /= sum;\n";
          if (fLogSoftmax) {
diff --git a/tmva/sofie/inc/TMVA/ROperator_Where.hxx b/tmva/sofie/inc/TMVA/ROperator_Where.hxx
index 4c42ad6d655d9..fd498074df513 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Where.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Where.hxx
@@ -7,32 +7,36 @@
 
 #include <sstream>
 
-namespace TMVA {
-namespace Experimental {
-namespace SOFIE {
+namespace TMVA{
+namespace Experimental{
+namespace SOFIE{
 
-template <typename T>
-class ROperator_Where final : public ROperator {
+
+
+template<typename T>
+class ROperator_Where final : public ROperator{
 private:
 
    bool fIsInputBoolTensor = false;
 
-   // Tensor names: C = condition, X = true branch, Y = false branch, Z = output
-   std::string fNC;            // condition (bool)
-   std::string fNX;            // true-branch values
-   std::string fNY;            // false-branch values
-   std::string fNZ;            // output
-   std::string fNBroadcastedC;
+
+   std::string fNX;
+   std::string fNY;
+   std::string fNC;
    std::string fNBroadcastedX;
    std::string fNBroadcastedY;
+   std::string fNBroadcastedC;
+   std::string fNZ;
 
-   // Static shapes (used when all inputs are non-dynamic)
-   std::vector<size_t> fShapeC;
+
+
+   // static shapes (used when tensors are not dynamic) )
    std::vector<size_t> fShapeX;
    std::vector<size_t> fShapeY;
+   std::vector<size_t> fShapeC;
    std::vector<size_t> fShapeZ;
 
-   // Dynamic shapes (Dim-aware, used when any input is dynamic)
+   // Dynamic generic shapes
    std::vector<Dim> fDimShapeC;
    std::vector<Dim> fDimShapeX;
    std::vector<Dim> fDimShapeY;
@@ -46,47 +50,37 @@ private:
    int fBroadcastFlag = 0;
 
 public:
-   ROperator_Where() {}
-   ROperator_Where(const std::string &nameC,
-                   const std::string &nameX,
-                   const std::string &nameY,
-                   const std::string &nameZ)
-      : fNC(UTILITY::Clean_name(nameC)),
-        fNX(UTILITY::Clean_name(nameX)),
-        fNY(UTILITY::Clean_name(nameY)),
-        fNZ(UTILITY::Clean_name(nameZ))
-   {
-      fInputTensorNames  = { fNC, fNX, fNY };
-      fOutputTensorNames = { fNZ };
-   }
+   ROperator_Where(){}
+   ROperator_Where(const std::string & nameC, const std::string & nameX, const std::string & nameY, const std::string & nameZ):
+      fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)), fNC(UTILITY::Clean_name(nameC)), fNZ(UTILITY::Clean_name(nameZ)){
+         fInputTensorNames = { fNX, fNY, fNC };
+         fOutputTensorNames = { fNZ };
+      }
 
    // type of output given input
-   std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override
-   {
-      // output type follows X (and Y), not C (which is bool)
-      return { input[1] };
+   std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
+      return input;
    }
 
    // shape of output tensors given input tensors
-   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override
-   {
-      // conservative: assume same shape (broadcasting resolved in Initialize)
-      return { input[1] };
+   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override {
+      // assume now inputs have same shape (no broadcasting)
+      auto ret = std::vector<std::vector<size_t>>(1, input[0]); // return vector size 1 with first input
+      return ret;
    }
 
-   void Initialize(RModel &model) override
-   {
-      // ---------------------------------------------------------------- //
-      //  Check all inputs exist
-      // ---------------------------------------------------------------- //
-      if (!model.CheckIfTensorAlreadyExist(fNC))
-         throw std::runtime_error(std::string("TMVA SOFIE Where Op: condition tensor ") + fNC + " not found in model");
-      if (!model.CheckIfTensorAlreadyExist(fNX))
-         throw std::runtime_error(std::string("TMVA SOFIE Where Op: X tensor ") + fNX + " not found in model");
-      if (!model.CheckIfTensorAlreadyExist(fNY))
-         throw std::runtime_error(std::string("TMVA SOFIE Where Op: Y tensor ") + fNY + " not found in model");
-
-      // condition tensor is bool (uint8) - mark if it is a live input tensor
+   void Initialize(RModel& model) override {
+      // input must be a graph input, or already initialized intermediate tensor
+      if (!model.CheckIfTensorAlreadyExist(fNX)){
+         throw std::runtime_error(std::string("TMVA SOFIE Where Op Input Tensor ") + fNX + "is not found in model");
+      }
+      if (!model.CheckIfTensorAlreadyExist(fNY)) {
+         throw std::runtime_error(std::string("TMVA SOFIE Where Op Input Tensor ") + fNY + "is not found in model");
+      }
+      if (!model.CheckIfTensorAlreadyExist(fNC)) {
+         throw std::runtime_error(std::string("TMVA SOFIE Where Op Input Tensor ") + fNC + "is not found in model");
+      }
+      // check if fNC input tensor is boolean
       if (model.IsReadyInputTensor(fNC))
          fIsInputBoolTensor = true;
 
@@ -117,13 +111,14 @@ public:
          fDimShapeY = ConvertShapeToDim(fShapeY);
       }
 
+
       if (model.Verbose()) {
          if (dynamicInputs & 1)
             std::cout << "Where : condition " << fNC << " is dynamic " << ConvertDimShapeToString(fDimShapeC) << "\n";
          if (dynamicInputs & 2)
-            std::cout << "Where : X " << fNX << " is dynamic " << ConvertDimShapeToString(fDimShapeX) << "\n";
+            std::cout << "Where :  " << fNX << " is dynamic " << ConvertDimShapeToString(fDimShapeX) << "\n";
          if (dynamicInputs & 4)
-            std::cout << "Where : Y " << fNY << " is dynamic " << ConvertDimShapeToString(fDimShapeY) << "\n";
+            std::cout << "Where : Y " << fNZ << " is dynamic " << ConvertDimShapeToString(fDimShapeZ) << "\n";
       }
 
       // ---------------------------------------------------------------- //
@@ -131,79 +126,184 @@ public:
       // ---------------------------------------------------------------- //
       if (dynamicInputs == 0) {
 
-         // Multidirectional broadcast over all three tensors
-         auto retXY = UTILITY::MultidirectionalBroadcastShape(fShapeX, fShapeY);
-         fBroadcastFlag = retXY.first;
-         fShapeZ = retXY.second;
-         // also factor in C
-         auto retCZ = UTILITY::MultidirectionalBroadcastShape(fShapeC, fShapeZ);
-         fBroadcastFlag |= retCZ.first;
-         fShapeZ = retCZ.second;
-
-         bool allConstant = model.IsInitializedTensor(fNC) &&
-                            model.IsInitializedTensor(fNX) &&
-                            model.IsInitializedTensor(fNY);
-
-         if (allConstant) {
-            // ----------------------------------------------------------
-            //  Constant folding: evaluate Where at model initialisation
-            // ----------------------------------------------------------
-            auto broadcastIfNeeded = [&](const std::string &name,
-                                         const std::vector<size_t> &shape,
-                                         std::string &bcName,
-                                         const std::string &prefix) {
-               if (shape != fShapeZ) {
-                  bcName = prefix + name + "to" + fNZ;
-                  auto data = model.GetInitializedTensorData(name);
-                  std::shared_ptr<void> bcData(
-                     UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), shape, fShapeZ),
+         bool broadcast = !UTILITY::AreSameShape(fShapeX, fShapeY) || !UTILITY::AreSameShape(fShapeX, fShapeC);
+         if (broadcast) {
+            // find shape to broadcast between X,Y,C looking for max length
+            size_t lengthX = ConvertShapeToLength(fShapeX);
+            size_t lengthY = ConvertShapeToLength(fShapeY);
+            size_t lengthC = ConvertShapeToLength(fShapeC);
+            bool broadcastX = false, broadcastY = false, broadcastC = false;
+            if (lengthX >= lengthY && lengthX >= lengthC) {
+               fShapeZ = fShapeX;
+               // broadcast Y and C if different than X
+               broadcastY = (lengthY != lengthX);
+               broadcastC = (lengthC != lengthX);
+            } else if (lengthY >= lengthX && lengthY >= lengthC) {
+               fShapeZ = fShapeY;
+               // broadcast X and C if different than Y
+               broadcastX = (lengthX != lengthY);
+               broadcastC = (lengthC != lengthY);
+            } else if (lengthC >= lengthX && lengthC >= lengthY) {
+               fShapeZ = fShapeC;
+               // broadcast X and Y if different than C
+               broadcastX = (lengthX != lengthC);
+               broadcastY = (lengthY != lengthC);
+            }
+
+            // Broadcast X to Z
+            if (broadcastX) {
+               fNBroadcastedX = "BC_" + fNX + "_to_" + fNZ;
+               if (model.IsInitializedTensor(fNX)) {
+                  auto data = model.GetInitializedTensorData(fNX);
+                  std::shared_ptr<void> broadcastedData(
+                     UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), fShapeX, fShapeZ),
                      std::default_delete<T[]>());
-                  model.AddConstantTensor(bcName, model.GetTensorType(name), fShapeZ, bcData);
+                  // Update the data and the shape of X
+                  model.AddConstantTensor(fNBroadcastedX, model.GetTensorType(fNX), fShapeZ, broadcastedData);
+                  fShapeX = fShapeZ;
+               } else {
+                  // I need to prepend to shape of X the extra dimensions added for broadcasting to Z
+                  if (fShapeX.size() < fShapeZ.size()) {
+                     size_t nPrepend = fShapeZ.size() - fShapeX.size();
+                     fShapeX.insert(fShapeX.begin(), nPrepend, 1);
+                  }
                }
-            };
-
-            broadcastIfNeeded(fNX, fShapeX, fNBroadcastedX, "BC_");
-            broadcastIfNeeded(fNY, fShapeY, fNBroadcastedY, "BC_");
-            broadcastIfNeeded(fNC, fShapeC, fNBroadcastedC, "BC_");
-
-            const std::string &nameC = fNBroadcastedC.empty() ? fNC : fNBroadcastedC;
-            const std::string &nameX = fNBroadcastedX.empty() ? fNX : fNBroadcastedX;
-            const std::string &nameY = fNBroadcastedY.empty() ? fNY : fNBroadcastedY;
+            }
+            // Broadcast Y to Z
+            if (broadcastY) {
+               fNBroadcastedY = "BC_" + fNY + "_to_" + fNZ;
+               if (model.IsInitializedTensor(fNY)) {
+                  auto data = model.GetInitializedTensorData(fNY);
+                  std::shared_ptr<void> broadcastedData(
+                     UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), fShapeY, fShapeZ),
+                     std::default_delete<T[]>());
+                  // do not update tensor B but add broadcasted one (since it can be input to some other operators)
+                  model.AddConstantTensor(fNBroadcastedY, model.GetTensorType(fNY), fShapeZ, broadcastedData);
+                  fShapeY = fShapeZ;
+               } else {
+                  // I need to prepend to shape of Y the extra dimensions added for broadcasting to Z
+                  if (fShapeY.size() < fShapeZ.size()) {
+                     size_t nPrepend = fShapeZ.size() - fShapeY.size();
+                     fShapeY.insert(fShapeY.begin(), nPrepend, 1);
+                  }
 
+               }
+            }
+            // Broadcast C to Z
+            if (broadcastC) {
+               fNBroadcastedC = "BC_" + fNC + "_to_" + fNZ;
+               if (model.IsInitializedTensor(fNC)) {
+                  auto data = model.GetInitializedTensorData(fNC);
+                  std::shared_ptr<void> broadcastedData(
+                     UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), fShapeC, fShapeZ),
+                     std::default_delete<T[]>());
+                  // do not update tensor C but add broadcasted one (since it can be input to some other operators)
+                  model.AddConstantTensor(fNBroadcastedC, model.GetTensorType(fNC), fShapeZ, broadcastedData);
+                  fShapeC = fShapeZ;
+               } else {
+                  // I need to prepend to shape of C the extra dimensions added for broadcasting to Z
+                  if (fShapeC.size() < fShapeZ.size()) {
+                     size_t nPrepend = fShapeZ.size() - fShapeC.size();
+                     fShapeC.insert(fShapeC.begin(), nPrepend, 1);
+                  }
+               }
+            }
+         } else {
+            fShapeZ = fShapeX;
+         }
+         // check case of constant  output (if all inputs are defined)
+         if (model.IsInitializedTensor(fNC)) {
+            std::string nameC = fNBroadcastedC.empty() ? fNC : fNBroadcastedC;
             auto dataC = static_cast<bool *>(model.GetInitializedTensorData(nameC).get());
-            auto dataX = static_cast<T *>   (model.GetInitializedTensorData(nameX).get());
-            auto dataY = static_cast<T *>   (model.GetInitializedTensorData(nameY).get());
-
-            size_t len = ConvertShapeToLength(fShapeZ);
-            std::vector<T> dataZ(len);
-            for (size_t i = 0; i < len; ++i)
-               dataZ[i] = dataC[i] ? dataX[i] : dataY[i];
-
-            model.AddConstantTensor<T>(fNZ, fShapeZ, dataZ.data());
             model.SetNotWritableInitializedTensor(nameC);
-            model.SetNotWritableInitializedTensor(nameX);
-            model.SetNotWritableInitializedTensor(nameY);
+            T *dataX = nullptr;
+            T *dataY = nullptr;
+            std::vector<Dim> shapeDataX;
+            std::vector<Dim> shapeDataY;
+            if (model.IsInitializedTensor(fNX)) {
+               std::string nameX = fNBroadcastedX.empty() ? fNX : fNBroadcastedX;
+               dataX = static_cast<T *>(model.GetInitializedTensorData(nameX).get());
+               // flag tensors to not be written in a file
+               model.SetNotWritableInitializedTensor(nameX);
+            } else if (model.IsShapeTensor(fNX)) {
+               shapeDataX = model.GetShapeTensorValues(fNX);
+            }
+            if (model.IsInitializedTensor(fNY)) {
+               std::string nameY = fNBroadcastedY.empty() ? fNY : fNBroadcastedY;
+               dataY = static_cast<T *>(model.GetInitializedTensorData(nameY).get());
+               model.SetNotWritableInitializedTensor(nameY);
+            } else if (model.IsShapeTensor(fNY)) {
+               shapeDataY = model.GetShapeTensorValues(fNY);
+            }
+            std::vector<T> dataZ;        // used in case output is constant tensor
+            std::vector<Dim> shapeDataZ; // used in case output is a shape tensor (can be also constant if all
+                                         // dimensions are not parametric)
+            // if fNC (condition) is initialized we know the output is a shape or a constant tensor,
+            // so we can compute it at initialization and add it as a constant tensor to the model
+            // (and not add the operator output as intermediate tensor to the model)
+            bool isOutputConstantTensor = true;
+            if (dataX && dataY) {
+               dataZ.resize(ConvertShapeToLength(fShapeZ));
+               for (size_t i = 0; i < dataZ.size(); i++)
+                  dataZ[i] = (dataC[i]) ? dataX[i] : dataY[i];
+               if (model.Verbose())
+                  std::cout << "data A and B : dataZ constant: " << ConvertValuesToString(dataZ) << std::endl;
+            } else if (dataX && shapeDataY.size() > 0) {
+               shapeDataZ.resize(ConvertShapeToLength(fShapeZ));
+               for (size_t i = 0; i < shapeDataZ.size(); i++) {
+                  shapeDataZ[i] = (dataC[i]) ? Dim{size_t(dataX[i])} : shapeDataY[i];
+                  isOutputConstantTensor &= !shapeDataZ[i].isParam;
+               }
+               if (model.Verbose())
+                  std::cout << "data A but shapeB " << ConvertDimShapeToString(shapeDataY) << "  "
+                         << isOutputConstantTensor << std::endl;
+            } else if (dataY && shapeDataX.size() > 0) {
+               shapeDataZ.resize(ConvertShapeToLength(fShapeZ));
+               for (size_t i = 0; i < shapeDataZ.size(); i++) {
+                  shapeDataZ[i] = (dataC[i]) ? shapeDataY[i] : Dim{size_t(dataY[i])};
+                  isOutputConstantTensor &= !shapeDataZ[i].isParam;
+               }
+               if (model.Verbose())
+                  std::cout << "data B but shapeA " << ConvertDimShapeToString(shapeDataX) << "  "
+                         << isOutputConstantTensor << std::endl;
+            } else if (shapeDataY.size() > 0 && shapeDataX.size() > 0) {
+               shapeDataZ.resize(ConvertShapeToLength(fShapeZ));
+               for (size_t i = 0; i < shapeDataZ.size(); i++) {
+                  shapeDataZ[i] = (dataC[i]) ? shapeDataX[i] : shapeDataY[i];
+                  isOutputConstantTensor &= !shapeDataZ[i].isParam;
+               }
+               if (model.Verbose())
+                  std::cout << " shapeA and B " << ConvertDimShapeToString(shapeDataX) << " shapeB "
+                         << ConvertDimShapeToString(shapeDataY) << "  " << isOutputConstantTensor << std::endl;
+            }
             fIsOutputConstant = true;
-            fOutputTensorNames.pop_back();
-
-            if (model.Verbose())
-               std::cout << "Where --> " << fNZ << " " << ConvertShapeToString(fShapeZ)
-                         << " : " << ConvertValuesToString(dataZ) << " (constant)\n";
-         } else {
-            // ----------------------------------------------------------
-            //  Non-constant static tensors - we don't need to broadcast tensors
-            // ----------------------------------------------------------
+            // add as constant or shape tensor depending on the case
+            if (dataZ.size() > 0)
+               model.AddConstantTensor<T>(fNZ, fShapeZ, dataZ.data());
+            else if (shapeDataZ.size() > 0)
+               model.AddShapeTensor(fNZ, shapeDataZ, fShapeZ.size() == 0);
+            else {
+               fIsOutputConstant = false;
+            }
+            if (fIsOutputConstant && model.Verbose())
+               std::cout << "Where op ---> " << fNZ << "  " << ConvertShapeToString(fShapeZ) << " : "
+                         << ((dataZ.size() > 0) ? ConvertValuesToString(dataZ) : ConvertDimShapeToString(shapeDataZ))
+                         << ((dataZ.size() > 0) ? " (constant)" : " (shape)") << std::endl;
+
+            // output is a constant tensor
+            if (fIsOutputConstant)
+               fOutputTensorNames.pop_back();
+         }
+         if (!fIsOutputConstant) {
 
             fDimShapeZ = ConvertShapeToDim(fShapeZ);
             model.AddIntermediateTensor(fNZ, model.GetTensorType(fNX), fShapeZ);
-
             if (model.Verbose())
-               std::cout << "Where : C=" << fNC << " " << ConvertShapeToString(fShapeC)
-                         << "  X=" << fNX << " " << ConvertShapeToString(fShapeX)
-                         << "  Y=" << fNY << " " << ConvertShapeToString(fShapeY)
-                         << " --> Z=" << fNZ << " " << ConvertShapeToString(fShapeZ) << "\n";
+               std::cout << "Where : condition : " << fNC << "  " << ConvertShapeToString(fShapeC) << " X "
+                         << fNX << "  " << ConvertShapeToString(fShapeX) << " Y " << fNY << "  "
+                         << ConvertShapeToString(fShapeY) << " ---> " << fNZ << "  " << ConvertShapeToString(fShapeZ)
+                         << std::endl;
          }
-
       } else {
          // ---------------------------------------------------------------- //
          //  Dynamic path: at least one input has a parametric shape
@@ -227,7 +327,7 @@ public:
             for (size_t i = 0; i < fDimShapeZ.size(); i++) {
                auto &s = fDimShapeZ[i];
                if (s.isParam && s.param.find("std::max") != std::string::npos) {
-                  // prefer X dim over Y dim
+                  // prefer A dim over B dim
                   if (i < fDimShapeX.size() && IsInputDimParam(fDimShapeX[i].param)) {
                      s = (fDimShapeX[i].dim != 1) ? fDimShapeX[i] : fDimShapeY[i];
                   } else if (i < fDimShapeY.size() && IsInputDimParam(fDimShapeY[i].param)) {
@@ -236,35 +336,42 @@ public:
                }
             }
          }
+         // I need to prepend to shape of X,Y,C the extra dimensions added for broadcasting to Z
+         if (fDimShapeX.size() < fDimShapeZ.size()) {
+            size_t nPrepend = fDimShapeZ.size() - fDimShapeX.size();
+            fDimShapeX.insert(fDimShapeX.begin(), nPrepend, Dim{1});
+         }
+         if (fDimShapeY.size() < fDimShapeZ.size()) {
+            size_t nPrepend = fDimShapeZ.size() - fDimShapeY.size();
+            fDimShapeY.insert(fDimShapeY.begin(), nPrepend, Dim{1});
+         }
+         if (fDimShapeC.size() < fDimShapeZ.size()) {
+            size_t nPrepend = fDimShapeZ.size() - fDimShapeC.size();
+            fDimShapeC.insert(fDimShapeC.begin(), nPrepend, Dim{1});
+         }
 
          model.AddIntermediateTensor(fNZ, model.GetTensorType(fNX), fDimShapeZ);
 
          if (model.Verbose())
             std::cout << "Where (dynamic) : C=" << ConvertDimShapeToString(fDimShapeC)
-                      << "  X=" << ConvertDimShapeToString(fDimShapeX)
-                      << "  Y=" << ConvertDimShapeToString(fDimShapeY)
-                      << " --> Z=" << ConvertDimShapeToString(fDimShapeZ) << "\n";
+                      << "  A=" << ConvertDimShapeToString(fDimShapeX)
+                      << "  B=" << ConvertDimShapeToString(fDimShapeY)
+                      << " --> Y=" << ConvertDimShapeToString(fDimShapeZ) << "\n";
       }
    }
 
-   std::string GenerateInitCode() override
-   {
+   std::string GenerateInitCode() override {
       std::stringstream out;
       return out.str();
    }
 
-   std::string Generate(std::string opName) override
-   {
-      if (fIsOutputConstant) return "";
+   std::string Generate(std::string opName) override {
 
       opName = "op_" + opName;
-
-      if (fDimShapeZ.empty()) {
-         throw std::runtime_error("TMVA SOFIE Where Op called to Generate without being initialized first");
-      }
-
       std::stringstream out;
       out << SP << "\n//------ WHERE " << opName << " --> " << ConvertDimShapeToString(fDimShapeZ) << "\n";
+      if (fIsOutputConstant) return out.str();
+
 
       // ---------------------------------------------------------------- //
       //  Runtime broadcast validation (dynamic shapes, flag bit 4)
@@ -281,14 +388,14 @@ public:
                out << SP << SP << "if (" << fDimShapeX[i] << " != 1 && "
                    << fDimShapeX[i] << " != " << fDimShapeZ[i] << ")\n";
                out << SP << SP << SP
-                   << "throw std::runtime_error(\"SOFIE Where: cannot broadcast X dim " << i << " in " << opName << "\");\n";
+                   << "throw std::runtime_error(\"SOFIE Where: cannot broadcast A dim " << i << " in " << opName << "\");\n";
             }
             // validate Y vs Z
             if (i < fDimShapeY.size() && fDimShapeY[i].isParam) {
                out << SP << SP << "if (" << fDimShapeY[i] << " != 1 && "
                    << fDimShapeY[i] << " != " << fDimShapeZ[i] << ")\n";
                out << SP << SP << SP
-                   << "throw std::runtime_error(\"SOFIE Where: cannot broadcast Y dim " << i << " in " << opName << "\");\n";
+                   << "throw std::runtime_error(\"SOFIE Where: cannot broadcast B dim " << i << " in " << opName << "\");\n";
             }
             // validate C vs Z
             if (i < fDimShapeC.size() && fDimShapeC[i].isParam) {
@@ -300,10 +407,8 @@ public:
          }
          out << SP << "}\n";
       }
-
+      // implement now where using teh strides and looping on the different dimensions
       // ---------------------------------------------------------------- //
-      //  Runtime for non-constant, non-initialised tensors
-      //
       //  Generate loop(s) with per-dimension stride-based index arithmetic
       // ---------------------------------------------------------------- //
       auto stridesX = UTILITY::ComputeStrideFromShape(fDimShapeX);
@@ -336,9 +441,10 @@ public:
       std::string idxY = buildIdxExpr(fDimShapeY, stridesY, fDimShapeZ.size());
       std::string idxC = buildIdxExpr(fDimShapeC, stridesC, fDimShapeZ.size());
 
-      // Emit nested loops over output shape
+       // Emit nested loops over output shape
       int nloop = 0;
       std::string idxZ;
+      // case Z is a scalar (all dimensions are 1) or Z has no dimension
       if (fDimShapeZ.empty() ||
           std::all_of(fDimShapeZ.begin(), fDimShapeZ.end(),
                       [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
@@ -375,10 +481,13 @@ public:
 
       return out.str();
    }
+
+
 };
 
-} // namespace SOFIE
-} // namespace Experimental
-} // namespace TMVA
+}//SOFIE
+}//Experimental
+}//TMVA
+
 
-#endif // TMVA_SOFIE_ROperator_Where
+#endif //TMVA_SOFIE_ROperator_Where
diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
index 9f35cca5f7db3..e8fbc6ee82720 100644
--- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx
+++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
@@ -215,9 +215,49 @@ std::string ConvertDimShapeToLength(const std::vector<Dim> & shape);
 template<class T>
 std::string ConvertValToString(T value) {
    std::stringstream ret;
-   if (std::is_floating_point_v<T>)
-      ret << std::setprecision(std::numeric_limits<T>::max_digits10);
-   ret << value;
+   ret << std::to_string(value);
+   return ret.str();
+}
+// float specialization
+template<>
+inline std::string ConvertValToString<float>(float value) {
+   std::stringstream ret;
+   // special case for infinity and Nan
+   if (std::isinf(value))
+         ret << (value > 0 ? "std::numeric_limits<float>::infinity()" :
+                                  "-std::numeric_limits<float>::infinity()");
+   else if (std::isnan(value))
+         ret << "std::numeric_limits<float>::quiet_NaN()";
+   else {
+      ret << std::setprecision(std::numeric_limits<float>::max_digits10);
+      ret << value;
+   }
+   return ret.str();
+}
+// double specialization
+template<>
+inline std::string ConvertValToString<double>(double value) {
+   std::stringstream ret;
+   // special case for infinity and Nan
+   if (std::isinf(value))
+         ret << (value > 0 ? "std::numeric_limits<double>::infinity()" :
+                                  "-std::numeric_limits<double>::infinity()");
+   else if (std::isnan(value))
+         ret << "std::numeric_limits<double>::quiet_NaN()";
+   else {
+      ret << std::setprecision(std::numeric_limits<double>::max_digits10);
+      ret << value;
+   }
+   return ret.str();
+}
+// int64_t specialization for INT64_MIN
+template<>
+inline std::string ConvertValToString<int64_t>(int64_t value) {
+   std::stringstream ret;
+   if (value == INT64_MIN)
+      ret << "INT64_MIN";
+   else
+      ret << std::to_string(value);
    return ret.str();
 }
 
@@ -228,12 +268,7 @@ std::string ConvertValuesToString(size_t n, const T * data, size_t maxprint = -1
    std::stringstream ret;
    ret << "{ ";
    for (size_t i = 0; i < std::min(n,maxprint); i++) {
-      if (std::is_floating_point_v<T>)
-         ret << std::setprecision(std::numeric_limits<T>::max_digits10) << data[i];
-      else
-         // cast in case of boolean (int8)
-         ret << data[i];
-
+      ret << ConvertValToString(data[i]);
       if (i < n-1) ret << ", ";
       if (i < n-1 && i == maxprint-1) ret << "..... ";
    }
@@ -779,7 +814,8 @@ inline void Fill(float *output, float value, int size)
    std::fill(output, output + size, value);
 }
 
-inline void Copy(float *output, float const *input, int size)
+template <class T>
+inline void Copy(T *output, T const *input, int size)
 {
    std::copy(input, input + size, output);
 }
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
index 5c30a42619e55..a6e2d7432a678 100644
--- a/tmva/sofie/src/RModel.cxx
+++ b/tmva/sofie/src/RModel.cxx
@@ -188,30 +188,34 @@ void RModel::AddInputTensorName(std::string input_name) {
     fInputTensorNames.emplace_back(UTILITY::Clean_name(input_name));
 }
 
-void RModel::AddOperator(std::unique_ptr<ROperator> op, int order_execution) {
-    AddBlasRoutines(op->GetBlasRoutines());
-    auto libs = op->GetStdLibs();
-    auto op_input_tensors = op->GetOpInputTensors();
-    for (auto& stdlib : libs) {
-        AddNeededStdLib(stdlib);
-    }
-    if (order_execution >= 0) {
-        fOperators.insert(fOperators.begin() + order_execution, std::move(op));
-    } else {
-        fOperators.push_back(std::move(op));
-        order_execution = fOperators.size()-1;
-    }
-
-    // storing the last usage of tensors which are input to the operator
-    // (excluding tensors which are inputs to the model or the initialized (weights) tensors)
-    // We call this function during parsing so we don't have yet initialized the operators
-   for(size_t index = 0; index<op_input_tensors.size() &&
-            fInitializedTensors.find(UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInitializedTensors.end() &&
-            std::find(fInputTensorNames.begin(), fInputTensorNames.end(),
-                      UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInputTensorNames.end();
-            ++index)
-   {
-      fIntermediateTensorFrequencyLookup[op_input_tensors[index]] = order_execution;
+void RModel::AddOperator(std::unique_ptr<ROperator> op, int order_execution)
+{
+   AddBlasRoutines(op->GetBlasRoutines());
+   auto libs = op->GetStdLibs();
+   auto op_input_tensors = op->GetOpInputTensors();
+   for (auto &stdlib : libs) {
+      AddNeededStdLib(stdlib);
+   }
+   if (order_execution >= 0) {
+      fOperators.insert(fOperators.begin() + order_execution, std::move(op));
+   } else {
+      fOperators.push_back(std::move(op));
+      order_execution = fOperators.size() - 1;
+   }
+
+   // storing the last usage of tensors which are input to the operator
+   // (excluding tensors which are inputs to the model or the initialized (weights) tensors)
+   // We call this function during parsing so we don't have yet initialized the operators
+   for (size_t index = 0; index < op_input_tensors.size(); index++) {
+      if (!IsInitializedTensor(UTILITY::Clean_name(std::string(op_input_tensors[index]))) &&
+          std::find(fInputTensorNames.begin(), fInputTensorNames.end(),
+                    UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInputTensorNames.end()) {
+
+         fIntermediateTensorFrequencyLookup[op_input_tensors[index]] = order_execution;
+         if (Verbose())
+            std::cout << "adding order execution for " << op_input_tensors[index] << " order " << order_execution
+                      << std::endl;
+      }
    }
 }
 
@@ -763,6 +767,7 @@ std::string GenerateConstantTensorCode(const std::pair<std::string, InitializedT
 
    // and check if all values are the same
    bool sameData = false;
+
    // for non stack allocation check if data are the same
    if (!allocateOnStack && length > 1) {
       size_t idx = 1;
@@ -797,6 +802,19 @@ void RModel::GenerateInitializedTensorInfo()
       size_t length = ConvertShapeToLength(i.second.shape());
       if (!fUseWeightFile || i.second.IsConstantTensor() || !i.second.IsWeightTensor() || i.second.type() != ETensorType::FLOAT ) {
          if (i.second.type() == ETensorType::FLOAT) {
+            // check if NaN of Inf are inside tensor data
+            bool hasInfOrNaN = false;
+            const float *data = i.second.data<float>();
+            for (size_t idx = 0; idx < length; idx++) {
+               if (std::is_floating_point<float>::value) {
+                  if (std::isinf(data[idx]) || std::isnan(data[idx])) {
+                     hasInfOrNaN = true;
+                     break;
+                  }
+               }
+            }
+            if (hasInfOrNaN)
+               AddNeededStdLib("limits");
             fGC += GenerateConstantTensorCode<float>(i);
             fConstantTensorSize += length * sizeof(float);
          } else if (i.second.type() == ETensorType::INT64) {
@@ -1158,6 +1176,7 @@ void RModel::GenerateOutput()
          // Use the session member (fXxx) when any dim is a runtime-computed identifier
          // (e.g. NonZero count). For expression-type dims derived from input shapes
          // (e.g. "((W+-3)/2+1)"), use the expression directly.
+         // for input shape parameters we don't need to use the session member since it is passed as argument to the infer function and it is not a runtime computed value
          bool hasRuntimeParam = false;
          for (auto const &dim : GetDynamicTensorShape(name)) {
             if (dim.isParam && IsIdentifier(dim.param) && !IsInputTensorShapeParam(dim.param))
@@ -1304,9 +1323,10 @@ void RModel::GenerateSessionCode()
 
    // storing the parameters for future checking to avoid mismatches
    if (!fDimShapeNames.empty()) {
-      fGC += "\n\n";
-      std::sort(fDimShapeNames.begin(), fDimShapeNames.end());
-      for (const auto &p : fDimShapeNames) {
+      fGC += "\n//   dynamic shape parameters\n";
+      auto dimShapeNames = fDimShapeNames;
+      std::sort(dimShapeNames.begin(), dimShapeNames.end());
+      for (const auto &p : dimShapeNames) {
          fGC += "size_t " + memberNameForDimShape(p) + ";\n";
       }
    }
@@ -1344,8 +1364,7 @@ void RModel::GenerateSessionCode()
       // add initialization of shape parameters
       // assume all parameters are of type size_t
       if (!fDimShapeNames.empty()) {
-         // sort first the shape parameters in alphabetical order to avoid a random order
-         std::sort(fDimShapeNames.begin(), fDimShapeNames.end() );
+         // need to use same order as in infer function not alphabetical one
          for (auto &p : fDimShapeNames) {
             fGC += ",\n";
             fGC += "        size_t " + p + " = " + fShapeParams[p];
@@ -1361,6 +1380,8 @@ void RModel::GenerateSessionCode()
             fGC += "   " + memberNameForDimShape(p) + " = " + p + ";\n";
          }
       }
+      // add some extra code needed for initialization of dynamic parameters
+      fGC += fExtraCodeForDimShapes;
 
       if (fUseWeightFile) {
          fGC += "\n//--- reading weights from file\n";
@@ -1759,6 +1780,42 @@ void RModel::GenerateRequiredInputTensorInfo()
 
    fGC +=
       "\nconstexpr bool hasDynamicInputTensors{" + std::string{hasDynamicInputTensors ? "true" : "false"} + "};\n\n";
+
+   fGC += "\n// Output tensor dimensions\n";
+   bool hasDynamicOutputTensors = false;
+   for (std::size_t iOutput = 0; iOutput < fOutputTensorNames.size(); ++iOutput) {
+      auto const &name = fOutputTensorNames[iOutput];
+      if (IsDynamicTensor(name)) {
+         hasDynamicOutputTensors = true;
+      }
+      std::vector<Dim> shape = GetDimTensorShape(name);
+      fGC += "constexpr std::array<SingleDim, " + std::to_string(shape.size()) + "> dim_" + name + "{";
+      for (std::size_t iDim = 0; iDim < shape.size(); ++iDim) {
+         auto const &dim = shape[iDim];
+         if (dim.isParam) {
+            fGC += "SingleDim{\"" + dim.GetVal() + "\"}";
+         } else {
+            fGC += "SingleDim{" + dim.GetVal() + "}";
+         }
+         if (iDim != shape.size() - 1) {
+            fGC += ", ";
+         }
+      }
+      fGC += "};\n";
+   }
+   fGC += "\nconstexpr std::array<TensorDims, " + std::to_string(fOutputTensorNames.size()) + "> outputTensorDims{\n";
+   for (std::size_t iOutput = 0; iOutput < fOutputTensorNames.size(); ++iOutput) {
+      auto const &name = fOutputTensorNames[iOutput];
+      fGC += SP + "makeDims(dim_" + name + ")";
+      if (iOutput == fOutputTensorNames.size() - 1) {
+         fGC += "\n";
+      } else {
+         fGC += ",\n";
+      }
+   }
+   fGC += "};\n";
+   fGC +=
+      "\nconstexpr bool hasDynamicOutputTensors{" + std::string{hasDynamicOutputTensors ? "true" : "false"} + "};\n\n";
 }
 
 void RModel::PrintRequiredInputTensors() const {
diff --git a/tmva/sofie_parsers/src/ParseWhere.cxx b/tmva/sofie_parsers/src/ParseWhere.cxx
index 6ebcf161e5012..dc4b436282cab 100644
--- a/tmva/sofie_parsers/src/ParseWhere.cxx
+++ b/tmva/sofie_parsers/src/ParseWhere.cxx
@@ -12,6 +12,10 @@ ParserFuncSignature ParseWhere = [](RModelParser_ONNX &parser, const onnx::NodeP
       throw std::runtime_error("TMVA::SOFIE ONNX Parser Where op has invalid input size");
    }
    // condition boolean vector is input 0
+   if (!parser.IsRegisteredTensorType(nodeproto.input(0))){
+      throw std::runtime_error("TMVA::SOFIE ONNX Parser Where op has input tensor " +  nodeproto.input(0)
+                                + " but its type is not yet registered");
+   }
    if (!parser.IsRegisteredTensorType(nodeproto.input(1))){
       throw std::runtime_error("TMVA::SOFIE ONNX Parser Where op has input tensor " +  nodeproto.input(1)
                                 + " but its type is not yet registered");
@@ -31,6 +35,7 @@ ParserFuncSignature ParseWhere = [](RModelParser_ONNX &parser, const onnx::NodeP
    std::string output_name = nodeproto.output(0);
 
    switch (input_type) {
+   //note ROPeratore_WHere signature takes as first tensor the condition
    case ETensorType::FLOAT:
       op.reset(new ROperator_Where<float>(nodeproto.input(0), nodeproto.input(1), nodeproto.input(2), output_name));
       break;