Skip to content

Commit 84fc5ce

Browse files
committed
Changing compatability for data with session id to data without (using student id and problem name)
1 parent fc9e71f commit 84fc5ce

1 file changed

Lines changed: 102 additions & 51 deletions

File tree

src/components/GraphvizProcessing.ts

Lines changed: 102 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import Papa from 'papaparse';
22
import {SequenceCount} from "@/Context";
3+
34
// import {autoType} from "d3";
45

56
interface CSVRow {
6-
'Session Id': string;
7+
'Session Id'?: string;
78
'Time': string;
89
'Step Name': string;
910
'Outcome': string;
@@ -27,18 +28,21 @@ export const loadAndSortData = (csvData: string): CSVRow[] => {
2728
const transformedData = parsedData.map(row => ({
2829
'Session Id': row['Session Id'],
2930
'Time': row['Time'],
30-
'Step Name': row['Step Name'],// || 'DoneButton',
31+
'Step Name': row['Step Name'] || 'DoneButton',
3132
'Outcome': row['Outcome'],
3233
'CF (Workspace Progress Status)': row['CF (Workspace Progress Status)'],
3334
'Problem Name': row['Problem Name'],
3435
'Anon Student Id': row['Anon Student Id']
3536
}));
3637
console.log(transformedData)
3738
return transformedData.sort((a, b) => {
38-
if (a['Session Id'] === b['Session Id']) {
39+
if (a['Anon Student Id'] === b['Anon Student Id']) {
40+
if (a['Problem Name'] === b['Problem Name']) {
3941
return new Date(a['Time']).getTime() - new Date(b['Time']).getTime();
4042
}
41-
return a['Session Id'].localeCompare(b['Session Id']);
43+
return a['Problem Name'].localeCompare(b['Problem Name']);
44+
}
45+
return a['Anon Student Id'].localeCompare(b['Anon Student Id']);
4246
});
4347
};
4448

@@ -49,32 +53,62 @@ export const loadAndSortData = (csvData: string): CSVRow[] => {
4953
* @param selfLoops - A boolean to include self-loops.
5054
* @returns A dictionary mapping session IDs to sequences of step names.
5155
*/
52-
export const createStepSequences = (sortedData: CSVRow[], selfLoops: boolean): { [key: string]: string[] } => {
56+
// export const createStepSequences = (sortedData: CSVRow[], selfLoops: boolean): { [key: string]: string[] } => {
57+
// return sortedData.reduce((acc, row) => {
58+
// const sessionId = row['Session Id'];
59+
// if (!acc[sessionId]) acc[sessionId] = [];
60+
// // console.log(acc['stepName'], sessionId)
61+
// const stepName = row['Step Name'];
62+
// if (selfLoops || acc[sessionId].length === 0 || acc[sessionId][acc[sessionId].length - 1] !== stepName) {
63+
// acc[sessionId].push(stepName);
64+
// }
65+
//
66+
// return acc;
67+
// }, {} as { [key: string]: string[] });
68+
// };
69+
export const createStepSequences = (sortedData: CSVRow[], selfLoops: boolean): { [key: string]: { [key: string]: string[] }} => {
5370
return sortedData.reduce((acc, row) => {
54-
const sessionId = row['Session Id'];
55-
if (!acc[sessionId]) acc[sessionId] = [];
56-
// console.log(acc['stepName'], sessionId)
71+
const studentId:string = row['Anon Student Id'];
72+
const problemName:string = row['Problem Name'];
73+
74+
if (!acc[studentId]) acc[studentId] = {}; // Initialize student entry if not present
75+
if (!acc[studentId][problemName]) acc[studentId][problemName] = []; // Initialize problem entry if not present
76+
77+
// console.log(acc['stepName'], sessionId)
5778
const stepName = row['Step Name'];
58-
if (selfLoops || acc[sessionId].length === 0 || acc[sessionId][acc[sessionId].length - 1] !== stepName) {
59-
acc[sessionId].push(stepName);
79+
if (selfLoops || acc[studentId][problemName].length === 0 || acc[studentId][problemName][acc[studentId][problemName].length - 1] !== stepName) {
80+
acc[studentId][problemName].push(stepName);
6081
}
6182

6283
return acc;
63-
}, {} as { [key: string]: string[] });
84+
}, {} as { [key: string]: { [key: string]: string[] } });
6485
};
65-
6686
/**
6787
* Creates outcome sequences from sorted data.
6888
* @param sortedData - The sorted CSV rows.
6989
* @returns A dictionary mapping session IDs to sequences of outcomes.
7090
*/
71-
export const createOutcomeSequences = (sortedData: CSVRow[]): { [key: string]: string[] } => {
91+
// export const createOutcomeSequences = (sortedData: CSVRow[]): { [key: string]: string[] } => {
92+
// return sortedData.reduce((acc, row) => {
93+
// const sessionId = row['Session Id'];
94+
//
95+
// if (!acc[sessionId]) acc[sessionId] = [];
96+
// acc[sessionId].push(row['Outcome']);
97+
// return acc;
98+
// }, {} as { [key: string]: string[] });
99+
// };
100+
export const createOutcomeSequences = (sortedData: CSVRow[]): { [key: string]: { [key: string]: string[] } } => {
72101
return sortedData.reduce((acc, row) => {
73-
const sessionId = row['Session Id'];
74-
if (!acc[sessionId]) acc[sessionId] = [];
75-
acc[sessionId].push(row['Outcome']);
102+
const studentId = row['Anon Student Id'];
103+
const problemName = row['Problem Name'];
104+
105+
if (!acc[studentId]) acc[studentId] = {}; // Initialize student entry if not present
106+
if (!acc[studentId][problemName]) acc[studentId][problemName] = []; // Initialize problem entry if not present
107+
108+
acc[studentId][problemName].push(row['Outcome']); // Store outcome sequence under student & problem
109+
76110
return acc;
77-
}, {} as { [key: string]: string[] });
111+
}, {} as { [key: string]: { [key: string]: string[] } });
78112
};
79113

80114
/**
@@ -83,38 +117,37 @@ export const createOutcomeSequences = (sortedData: CSVRow[]): { [key: string]: s
83117
* @param topN - The number of top sequences to return (default is 5).
84118
* @returns An array of the top sequences and their counts.
85119
*/
86-
export function getTopSequences(stepSequences: { [key: string]: string[] }, topN: number = 5) {
87-
// Create a frequency map to count how many times each unique sequence (list) occurs
120+
export function getTopSequences(stepSequences: { [key: string]: { [key: string]: string[] } }, topN: number = 5) {
88121
const sequenceCounts: { [sequence: string]: number } = {};
89122

90-
// Iterate over the values (which are lists) of the stepSequences dictionary
91-
Object.values(stepSequences).forEach((sequence) => {
92-
const sequenceKey = JSON.stringify(sequence); // Convert the list to a string key
123+
// Iterate through the outer object
124+
Object.values(stepSequences).forEach((nestedObj) => {
125+
// Iterate through the inner object to access each sequence (which is an array)
126+
Object.values(nestedObj).forEach((sequence) => {
127+
const sequenceKey = JSON.stringify(sequence); // Convert sequence array to a string key
93128

94-
// Count occurrences of each unique sequence
95-
if (sequenceCounts[sequenceKey]) {
96-
sequenceCounts[sequenceKey]++;
97-
} else {
98-
sequenceCounts[sequenceKey] = 1;
99-
}
129+
// Count occurrences of each unique sequence
130+
sequenceCounts[sequenceKey] = (sequenceCounts[sequenceKey] || 0) + 1;
131+
});
100132
});
101133

102-
// Filter sequences of length 5 or greater then sort the sequences based on their counts in descending order and take the top N
134+
// Filter sequences of length 5 or greater, then sort and take top N
103135
const sortedSequences = Object.entries(sequenceCounts)
104136
.filter(([sequence]) => JSON.parse(sequence).length >= 5)
105137
.sort(([, countA], [, countB]) => countB - countA)
106138
.slice(0, topN);
107139

108-
// Convert to the desired format: { sequence: [step1, step2, step3], count }
140+
// Convert back to array format
109141
const topSequences = sortedSequences.map(([sequenceKey, count]) => ({
110142
sequence: JSON.parse(sequenceKey), // Convert the string back to an array
111143
count,
112144
}));
113145

114-
console.log("Processing topSequences: ", topSequences); // Log the top sequences for debugging
115-
return topSequences; // Return the array of top sequences
146+
console.log("Processing topSequences: ", topSequences);
147+
return topSequences;
116148
}
117149

150+
118151
interface EdgeCounts {
119152
edgeCounts: { [key: string]: number };
120153
totalNodeEdges: { [key: string]: number };
@@ -130,8 +163,8 @@ interface EdgeCounts {
130163
* @returns Various edge-related counts and statistics.
131164
*/
132165
export const countEdges = (
133-
stepSequences: { [key: string]: string[] },
134-
outcomeSequences: { [key: string]: string[] }
166+
stepSequences: { [key: string]: { [key: string]: string[] } },
167+
outcomeSequences: { [key: string]: { [key: string]: string[] } }
135168
): {
136169
totalNodeEdges: { [p: string]: number };
137170
edgeOutcomeCounts: { [p: string]: { [p: string]: number } };
@@ -147,35 +180,53 @@ export const countEdges = (
147180
const edgeCounts: { [key: string]: number } = {};
148181
const top5Sequences = getTopSequences(stepSequences, 5);
149182

150-
Object.keys(stepSequences).forEach((sessionId) => {
151-
const steps = stepSequences[sessionId];
152-
const outcomes = outcomeSequences[sessionId];
183+
// Iterate over first-level keys (e.g., student ID, problem, etc.)
184+
Object.keys(stepSequences).forEach((studentId) => {
185+
const innerStepSequences = stepSequences[studentId]; // { [key: string]: string[] }
186+
const innerOutcomeSequences = outcomeSequences[studentId] || {}; // Handle missing outcome sequences
153187

154-
if (steps.length < 2) return;
188+
// Iterate over second-level keys (actual step sequences)
189+
Object.keys(innerStepSequences).forEach((problemName) => {
190+
const steps = innerStepSequences[problemName]; // string[]
191+
const outcomes = innerOutcomeSequences[problemName] || []; // string[] (fallback to empty array)
155192

156-
for (let i = 0; i < steps.length - 1; i++) {
157-
const currentStep = steps[i];
158-
const nextStep = steps[i + 1];
159-
const outcome = outcomes[i + 1];
193+
if (steps.length < 2) return; // Ignore sequences with < 2 steps
160194

161-
const edgeKey = `${currentStep}->${nextStep}`;
162-
edgeCounts[edgeKey] = (edgeCounts[edgeKey] || 0) + 1;
163-
edgeOutcomeCounts[edgeKey] = edgeOutcomeCounts[edgeKey] || {};
164-
edgeOutcomeCounts[edgeKey][outcome] = (edgeOutcomeCounts[edgeKey][outcome] || 0) + 1;
165-
totalNodeEdges[currentStep] = (totalNodeEdges[currentStep] || 0) + 1;
195+
for (let i = 0; i < steps.length - 1; i++) {
196+
const currentStep = steps[i];
197+
const nextStep = steps[i + 1];
198+
const outcome = outcomes[i + 1];
166199

167-
if (edgeCounts[edgeKey] > maxEdgeCount) maxEdgeCount = edgeCounts[edgeKey];
168-
}
200+
const edgeKey = `${currentStep}->${nextStep}`;
201+
edgeCounts[edgeKey] = (edgeCounts[edgeKey] || 0) + 1;
202+
edgeOutcomeCounts[edgeKey] = edgeOutcomeCounts[edgeKey] || {};
203+
edgeOutcomeCounts[edgeKey][outcome] = (edgeOutcomeCounts[edgeKey][outcome] || 0) + 1;
204+
totalNodeEdges[currentStep] = (totalNodeEdges[currentStep] || 0) + 1;
205+
206+
if (edgeCounts[edgeKey] > maxEdgeCount) {
207+
maxEdgeCount = edgeCounts[edgeKey];
208+
}
209+
}
210+
});
169211
});
170212

213+
// Compute ratioEdges based on totalNodeEdges
171214
Object.keys(edgeCounts).forEach((edge) => {
172215
const [start] = edge.split('->');
173-
ratioEdges[edge] = edgeCounts[edge] / (totalNodeEdges[start] || 0);
216+
ratioEdges[edge] = edgeCounts[edge] / (totalNodeEdges[start] || 1); // Avoid division by zero
174217
});
175218

176-
return {edgeCounts, totalNodeEdges, ratioEdges, edgeOutcomeCounts, maxEdgeCount, topSequences: top5Sequences};
219+
return {
220+
edgeCounts,
221+
totalNodeEdges,
222+
ratioEdges,
223+
edgeOutcomeCounts,
224+
maxEdgeCount,
225+
topSequences: top5Sequences
226+
};
177227
};
178228

229+
179230
/**
180231
* Normalizes edge thicknesses based on their ratios for better visual representation.
181232
* @param ratioEdges - The edge ratios to normalize.

0 commit comments

Comments
 (0)