Skip to content

Commit a92d289

Browse files
committed
dfeed.web.web.view.moderation: Add page visit tracking to user journey timeline
- Add parseWebLog() function to parse Web.log for page visits matching the poster's IP and User-Agent - Track external referrers (links from other sites) as separate events - Add "page_visit" and "referrer" event types to JourneyEvent struct - Add url field to JourneyEvent for linking to visited pages - Extract userIP and userAgent from PostProcess logs for correlation - Interleave web events chronologically between PostProcess log sections - Add CSS styling for page_visit (blue) and referrer (purple) events - Render URLs as clickable links in the timeline details
1 parent a67f395 commit a92d289

1 file changed

Lines changed: 222 additions & 5 deletions

File tree

src/dfeed/web/web/view/moderation.d

Lines changed: 222 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2020, 2025 Vladimir Panteleev <vladimir@thecybershadow.net>
1+
/* Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2020, 2025, 2026 Vladimir Panteleev <vladimir@thecybershadow.net>
22
*
33
* This program is free software: you can redistribute it and/or modify
44
* it under the terms of the GNU Affero General Public License as
@@ -57,12 +57,13 @@ import dfeed.web.web.user : user, userSettings;
5757
struct JourneyEvent
5858
{
5959
string timestamp;
60-
string type; // "captcha", "spam_check", "moderation", "posted", "info", "log_file", "approval"
60+
string type; // "captcha", "spam_check", "moderation", "posted", "info", "log_file", "approval", "page_visit", "referrer"
6161
string message;
6262
bool success; // true for success, false for failure
6363
string details; // additional details like spamicity value
6464
string sourceFile; // log file name
6565
int lineNumber; // line number in log file (1-based)
66+
string url; // for page_visit events, the URL to link to
6667
}
6768

6869
JourneyEvent[] parsePostingJourney(string messageID)
@@ -208,6 +209,126 @@ JourneyEvent[] parsePostingJourney(string messageID)
208209
}
209210
}
210211

212+
// Parse Web.log for page visits matching IP and User-Agent
213+
// Returns the web events separately (not added to main events array)
214+
JourneyEvent[] parseWebLog(string postProcessLogFile, string ip, string userAgent)
215+
{
216+
JourneyEvent[] webEvents;
217+
218+
if (ip.length == 0)
219+
return webEvents;
220+
221+
// Replace "PostProcess-xxx.log" with "Web.log" to get the Web log for the same date
222+
auto webLogFile = postProcessLogFile.matchFirst(`^(.* - )PostProcess-[a-z]+\.log$`);
223+
if (!webLogFile)
224+
return webEvents;
225+
226+
auto logPath = webLogFile[1] ~ "Web.log";
227+
if (!exists(logPath))
228+
return webEvents;
229+
230+
auto content = cast(string)read(logPath);
231+
auto logFileName = baseName(logPath);
232+
int lineNum = 0;
233+
234+
// Track referrers we've already added to avoid duplicates
235+
bool[string] seenReferrers;
236+
237+
foreach (line; content.split("\n"))
238+
{
239+
lineNum++;
240+
if (line.length < 30 || line[0] != '[')
241+
continue;
242+
243+
// Parse log line: [timestamp] \tIP\tSTATUS\tTIME\tMETHOD\tURL\tCONTENT-TYPE[\tREFERER\tUSER-AGENT]
244+
auto closeBracket = line.indexOf("]");
245+
if (closeBracket < 0)
246+
continue;
247+
auto timestamp = line[1..closeBracket];
248+
auto rest = line[closeBracket + 2 .. $];
249+
250+
auto fields = rest.split("\t");
251+
if (fields.length < 7)
252+
continue;
253+
254+
// Field indices (first field is empty for alignment):
255+
// [0]=empty, [1]=IP, [2]=STATUS, [3]=TIME, [4]=METHOD, [5]=URL, [6]=CONTENT-TYPE, [7]=REFERER, [8]=USER-AGENT
256+
auto logIP = fields[1];
257+
auto status = fields[2];
258+
auto method = fields[4];
259+
auto url = fields[5];
260+
auto contentType = fields[6];
261+
string referer = fields.length > 7 ? fields[7] : "-";
262+
string logUserAgent = fields.length > 8 ? fields[8] : "";
263+
264+
// Check if this matches our user's IP
265+
if (logIP != ip)
266+
continue;
267+
268+
// If we have a User-Agent to match, check it (but don't require it)
269+
if (userAgent.length > 0 && logUserAgent.length > 0 && logUserAgent != userAgent)
270+
continue;
271+
272+
// Only interested in text/html pages (GET and POST requests)
273+
// Also include POST redirects (3xx status with no content type)
274+
if (method != "GET" && method != "POST")
275+
continue;
276+
bool isRedirect = status.length >= 1 && status[0] == '3';
277+
if (!contentType.startsWith("text/html") && !(method == "POST" && isRedirect))
278+
continue;
279+
280+
// Skip static resources
281+
if (url.canFind("/static/"))
282+
continue;
283+
284+
// Extract just the path from the URL for display
285+
string displayPath = url;
286+
auto hostEnd = url.indexOf("://");
287+
if (hostEnd >= 0)
288+
{
289+
auto pathStart = url.indexOf("/", hostEnd + 3);
290+
if (pathStart >= 0)
291+
displayPath = url[pathStart .. $];
292+
}
293+
294+
// Check for external referrer (not from same site)
295+
if (referer != "-" && referer.length > 0)
296+
{
297+
// Check if referrer is external (doesn't contain our host)
298+
auto urlHost = url.indexOf("://");
299+
string ourHost;
300+
if (urlHost >= 0)
301+
{
302+
auto hostStart = urlHost + 3;
303+
auto hostEndPos = url.indexOf("/", hostStart);
304+
ourHost = hostEndPos >= 0 ? url[hostStart .. hostEndPos] : url[hostStart .. $];
305+
}
306+
307+
bool isExternal = ourHost.length > 0 && !referer.canFind(ourHost);
308+
309+
if (isExternal && referer !in seenReferrers)
310+
{
311+
seenReferrers[referer] = true;
312+
auto evt = JourneyEvent(timestamp, "referrer", "External referrer", true, "", logFileName, lineNum);
313+
evt.url = referer;
314+
webEvents ~= evt;
315+
}
316+
}
317+
318+
// Add page visit event
319+
auto eventMessage = method == "POST" ? "Form submission" : "Page visit";
320+
auto evt = JourneyEvent(timestamp, "page_visit", eventMessage, true, displayPath, logFileName, lineNum);
321+
evt.url = url;
322+
webEvents ~= evt;
323+
}
324+
325+
return webEvents;
326+
}
327+
328+
// Track IP and User-Agent for web log correlation
329+
string userIP;
330+
string userAgent;
331+
211332
// Parse each log file
212333
foreach (ref related; relatedLogs)
213334
{
@@ -244,8 +365,13 @@ JourneyEvent[] parsePostingJourney(string messageID)
244365
// Parse different event types
245366
if (message.startsWith("IP: "))
246367
{
368+
userIP = message[4..$];
247369
events ~= JourneyEvent(timestamp, "info", "IP Address", true, message[4..$], logFileName, lineNum);
248370
}
371+
else if (message.startsWith("[Header] User-Agent: "))
372+
{
373+
userAgent = message[21..$];
374+
}
249375
else if (message.startsWith("CAPTCHA OK"))
250376
{
251377
events ~= JourneyEvent(timestamp, "captcha", "CAPTCHA solved successfully", true, "", logFileName, lineNum);
@@ -321,10 +447,79 @@ JourneyEvent[] parsePostingJourney(string messageID)
321447
}
322448
}
323449

324-
// Search for approval event (added last so it appears in chronological order)
450+
// Search for approval event
325451
if (primaryLog !is null)
326452
searchBannedLog(postID, primaryLog);
327453

454+
// Parse Web.log for page visits (returns separate array, doesn't modify events)
455+
JourneyEvent[] webEvents;
456+
if (primaryLog !is null)
457+
webEvents = parseWebLog(primaryLog, userIP, userAgent);
458+
459+
// Interleave web events between PostProcess log sections based on timestamps
460+
if (webEvents.length > 0)
461+
{
462+
// Sort web events by timestamp
463+
webEvents.sort!((a, b) => a.timestamp < b.timestamp);
464+
465+
// Split events into sections (each section starts with a log_file header)
466+
struct LogSection
467+
{
468+
size_t startIdx; // Index of log_file header in events array
469+
size_t endIdx; // Index after last event in this section
470+
string firstTimestamp; // First non-header event timestamp
471+
}
472+
LogSection[] sections;
473+
474+
for (size_t i = 0; i < events.length; i++)
475+
{
476+
if (events[i].type == "log_file")
477+
{
478+
LogSection section;
479+
section.startIdx = i;
480+
// Find the end of this section (next log_file header or end of array)
481+
size_t j = i + 1;
482+
while (j < events.length && events[j].type != "log_file")
483+
{
484+
if (section.firstTimestamp.length == 0 && events[j].timestamp.length > 0)
485+
section.firstTimestamp = events[j].timestamp;
486+
j++;
487+
}
488+
section.endIdx = j;
489+
sections ~= section;
490+
i = j - 1; // Continue from end of section
491+
}
492+
}
493+
494+
// Rebuild events with web events interleaved
495+
JourneyEvent[] newEvents;
496+
size_t webIdx = 0;
497+
498+
foreach (sectionIdx, ref section; sections)
499+
{
500+
// Insert web events that occurred before this section's first event
501+
while (webIdx < webEvents.length &&
502+
(section.firstTimestamp.length == 0 || webEvents[webIdx].timestamp < section.firstTimestamp))
503+
{
504+
newEvents ~= webEvents[webIdx];
505+
webIdx++;
506+
}
507+
508+
// Add this section's events
509+
for (size_t i = section.startIdx; i < section.endIdx; i++)
510+
newEvents ~= events[i];
511+
}
512+
513+
// Add any remaining web events after all sections
514+
while (webIdx < webEvents.length)
515+
{
516+
newEvents ~= webEvents[webIdx];
517+
webIdx++;
518+
}
519+
520+
events = newEvents;
521+
}
522+
328523
return events;
329524
}
330525

@@ -348,10 +543,13 @@ void renderJourneyTimeline(JourneyEvent[] events)
348543
`.journey-event.spam_detail { border-left-color: #A85; background: #FFFAF5; padding: 0.33em 0.75em; }` ~
349544
`.journey-event.log_file { border-left-color: #85A; background: #F5F5F5; }` ~
350545
`.journey-event.approval { border-left-color: #5A5; background: #F0FFF0; }` ~
546+
`.journey-event.page_visit { border-left-color: #59A; background: #F5FAFF; }` ~
547+
`.journey-event.referrer { border-left-color: #A59; background: #FAF5FF; }` ~
351548
`.journey-event.log_file:not(:first-child) { margin-top: 1em; border-top: 2px solid #E6E6E6; }` ~
352549
`.journey-timestamp { color: #666; font-size: 0.95em; }` ~
353550
`.journey-message { font-weight: bold; }` ~
354551
`.journey-details { color: #666; font-size: 0.95em; margin-top: 0.25em; }` ~
552+
`.journey-details a { color: #369; }` ~
355553
`.journey-source { color: #999; font-size: 0.9em; float: right; }` ~
356554
`</style>` ~
357555
`<div class="journey-timeline">` ~
@@ -368,6 +566,10 @@ void renderJourneyTimeline(JourneyEvent[] events)
368566
cssClass = "spam_detail";
369567
else if (event.type == "approval")
370568
cssClass = "approval";
569+
else if (event.type == "page_visit")
570+
cssClass = "page_visit";
571+
else if (event.type == "referrer")
572+
cssClass = "referrer";
371573
else if (event.success)
372574
cssClass = "success";
373575
else if (event.type == "info")
@@ -396,10 +598,25 @@ void renderJourneyTimeline(JourneyEvent[] events)
396598
html.putEncodedEntities(event.message);
397599
html.put(`</span>`);
398600

399-
if (event.details.length > 0)
601+
if (event.details.length > 0 || event.url.length > 0)
400602
{
401603
html.put(`<div class="journey-details">`);
402-
html.putEncodedEntities(event.details);
604+
if (event.url.length > 0)
605+
{
606+
html.put(`<a href="`);
607+
html.putEncodedEntities(event.url);
608+
html.put(`" target="_blank" rel="noopener">`);
609+
// For page visits, show the path; for referrers, show the full URL
610+
if (event.details.length > 0)
611+
html.putEncodedEntities(event.details);
612+
else
613+
html.putEncodedEntities(event.url);
614+
html.put(`</a>`);
615+
}
616+
else
617+
{
618+
html.putEncodedEntities(event.details);
619+
}
403620
html.put(`</div>`);
404621
}
405622
html.put(`</div>`);

0 commit comments

Comments
 (0)