|
| 1 | +#pragma once |
| 2 | +#include "common.h" |
| 3 | +#include "kmc/state.h" |
| 4 | +#include "analysis/utils.h" |
| 5 | + |
| 6 | +namespace analysis |
| 7 | +{ |
| 8 | + void analyzeChainLengthDist(Eigen::MatrixXd &sequenceStatsMatrix, const std::vector<double> &monomerFWs, AnalysisState &state) |
| 9 | + { |
| 10 | + if (sequenceStatsMatrix.rows() == 0 || sequenceStatsMatrix.cols() == 0) |
| 11 | + return; |
| 12 | + |
| 13 | + // SequenceStatsMatrix: (numPolymers x (A Count, B Count, ..., A SeqCount, B SeqCount, ..., A SeqLen2, B SeqLen2, ...)) |
| 14 | + // Extract monomer count distribution (shape: numPolymers x numMonomers) |
| 15 | + Eigen::MatrixXd monomerCountDist = sequenceStatsMatrix.leftCols(registry::NUM_MONOMERS); |
| 16 | + |
| 17 | + // Chain length calculations (Get chain lengths by summing monomer counts) |
| 18 | + Eigen::VectorXd chainLengths = monomerCountDist.rowwise().sum(); |
| 19 | + state.nAvgCL = chainLengths.mean(); |
| 20 | + if (state.nAvgCL != 0.0) |
| 21 | + { |
| 22 | + state.wAvgCL = chainLengths.array().square().mean() / state.nAvgCL; |
| 23 | + state.dispCL = state.wAvgCL / state.nAvgCL; |
| 24 | + } |
| 25 | + |
| 26 | + // If any monomer has FW of 0, skip molecular weight calculations |
| 27 | + // and set molecular weight averages to chain length averages |
| 28 | + Eigen::VectorXd FWs = Eigen::Map<const Eigen::VectorXd>(monomerFWs.data(), monomerFWs.size()); |
| 29 | + if ((FWs.array() == 0.0).any()) |
| 30 | + { |
| 31 | + state.nAvgMW = state.nAvgCL; |
| 32 | + state.wAvgMW = state.wAvgCL; |
| 33 | + state.dispMW = state.dispCL; |
| 34 | + return; |
| 35 | + } |
| 36 | + |
| 37 | + // Molecular weight calculations |
| 38 | + Eigen::MatrixXd weightedMonomerCountDist = monomerCountDist.array().rowwise() * FWs.transpose().array(); |
| 39 | + Eigen::VectorXd molecularWeights = weightedMonomerCountDist.rowwise().sum(); |
| 40 | + state.nAvgMW = molecularWeights.mean(); |
| 41 | + if (state.nAvgMW != 0.0) |
| 42 | + { |
| 43 | + state.wAvgMW = molecularWeights.array().square().mean() / state.nAvgMW; |
| 44 | + state.dispMW = state.wAvgMW / state.nAvgMW; |
| 45 | + } |
| 46 | + } |
| 47 | + |
| 48 | + // SequenceStatsMatrix: (numPolymers x (A Count, B Count, ..., A SeqCount, B SeqCount, ..., A SeqLen2, B SeqLen2, ...)) |
| 49 | + void analyzeSequenceLengthDist(Eigen::MatrixXd &sequenceStatsMatrix, AnalysisState &state) |
| 50 | + { |
| 51 | + |
| 52 | + if (sequenceStatsMatrix.rows() == 0 || sequenceStatsMatrix.cols() < SequenceStats::SIZE()) |
| 53 | + return; |
| 54 | + |
| 55 | + // Sum stats over all polymers -> (1 x (Total A Count, B Count, ..., A SeqCount, B SeqCount, ..., A SeqLen2, B SeqLen2, ...)) |
| 56 | + auto totalStats = sequenceStatsMatrix.colwise().sum(); |
| 57 | + auto totalMonomerCounts = totalStats.leftCols(registry::NUM_MONOMERS).sum(); // Total chain length (i.e. total monomer count across all types) |
| 58 | + |
| 59 | + for (size_t i = 0; i < registry::NUM_MONOMERS; ++i) |
| 60 | + { |
| 61 | + auto monomerCounts = totalStats(0 * registry::NUM_MONOMERS + i); // Total count of monomer type i across all polymers |
| 62 | + auto sequenceCounts = totalStats(1 * registry::NUM_MONOMERS + i); // Total number of sequences of monomer type i across all polymers |
| 63 | + auto sequenceLengths2 = totalStats(2 * registry::NUM_MONOMERS + i); // Sum of squared sequence lengths of monomer type i across all polymers |
| 64 | + |
| 65 | + if (sequenceCounts > 0 && monomerCounts > 0) |
| 66 | + { |
| 67 | + state.nAvgComp[i] = monomerCounts / totalMonomerCounts; // Total count of monomer i / total count of all monomers |
| 68 | + state.nAvgSL[i] = monomerCounts / sequenceCounts; // Total count of monomer i / total number of sequences of monomer i |
| 69 | + state.wAvgSL[i] = sequenceLengths2 / monomerCounts; // Sum of squared sequence lengths of monomer i / total count of monomer i |
| 70 | + state.dispSL[i] = state.wAvgSL[i] / state.nAvgSL[i]; // Sequence length dispersity of monomer i |
| 71 | + } |
| 72 | + } |
| 73 | + } |
| 74 | + |
| 75 | + void analyze(const SpeciesSet &speciesSet, SystemState &systemState) |
| 76 | + { |
| 77 | + auto sequenceData = speciesSet.getRawSequenceData(); |
| 78 | + auto summary = analysis::calculateSequenceSummary(sequenceData); |
| 79 | + |
| 80 | + SequenceState sequenceState = SequenceState{systemState.kmc, summary.positionalStats}; |
| 81 | + |
| 82 | + AnalysisState analysisState; |
| 83 | + analysis::analyzeChainLengthDist(summary.sequenceStatsMatrix, speciesSet.getMonomerFWs(), analysisState); |
| 84 | + analysis::analyzeSequenceLengthDist(summary.sequenceStatsMatrix, analysisState); |
| 85 | + |
| 86 | + systemState.sequence = sequenceState; |
| 87 | + systemState.analysis = analysisState; |
| 88 | + } |
| 89 | +} |
0 commit comments