@@ -353,6 +353,228 @@ describe('myformat parser', () => {
3533534 . Run tests: ` bun test `
3543545 . Push and open a PR
355355
356+ ## Step-by-Step Example: Parsing Unstructured Logs
357+
358+ Let's walk through creating a parser for a common unstructured log format:
359+
360+ ```
361+ 2025-01-15 08:00:35 ERROR [payment] Failed to process payment for order #12345
362+ 2025-01-15 08:00:40 DEBUG [scheduler] Running job: cleanup_sessions
363+ 2025-01-15 08:00:45 WARN [memory] Heap usage high: 1.5GB / 2GB (75%)
364+ ```
365+
366+ ### Step 1: Analyze the Format
367+
368+ Break down the log structure:
369+ - ` 2025-01-15 08:00:35 ` - timestamp (YYYY-MM-DD HH:MM: SS )
370+ - ` ERROR ` - log level (DEBUG, INFO, WARN, ERROR, FATAL)
371+ - ` [payment] ` - component name in brackets
372+ - ` Failed to process... ` - the message
373+
374+ ### Step 2: Write the Detection Pattern
375+
376+ The detection pattern should match uniquely to avoid false positives:
377+
378+ ``` typescript
379+ // Matches: date + time + level keyword
380+ detect : / ^ \d {4} -\d {2} -\d {2} \d {2} :\d {2} :\d {2} (DEBUG| INFO| WARN| ERROR| FATAL)/
381+ ```
382+
383+ ### Step 3: Write the Parse Pattern
384+
385+ Use named capture groups for clarity:
386+
387+ ``` typescript
388+ parse : {
389+ pattern : / ^ (?<timestamp >\d {4} -\d {2} -\d {2} \d {2} :\d {2} :\d {2} ) (?<level >\w + )\s + \[ (?<component >[^ \] ] + )\] (?<message >. + )$ / ,
390+ fields : {
391+ timestamp : ' timestamp' ,
392+ level : ' level' ,
393+ message : ' message' ,
394+ },
395+ }
396+ ```
397+
398+ ### Step 4: Complete Config
399+
400+ ``` typescript
401+ // loq.config.ts
402+ export default {
403+ formats: [
404+ {
405+ name: ' myapp' ,
406+ detect: / ^ \d {4} -\d {2} -\d {2} \d {2} :\d {2} :\d {2} (DEBUG| INFO| WARN| ERROR| FATAL)/ ,
407+ parse: {
408+ pattern: / ^ (?<timestamp >\d {4} -\d {2} -\d {2} \d {2} :\d {2} :\d {2} ) (?<level >\w + )\s + \[ (?<component >[^ \] ] + )\] (?<message >. + )$ / ,
409+ fields: {
410+ timestamp: ' timestamp' ,
411+ level: ' level' ,
412+ message: ' message' ,
413+ },
414+ },
415+ },
416+ ],
417+ };
418+ ```
419+
420+ ### Step 5: Test It
421+
422+ ``` bash
423+ # Now these queries work!
424+ loq plain.log where level=error
425+ loq plain.log where component=payment
426+ loq plain.log where level=error and component=database
427+ loq plain.log count by level
428+ loq plain.log count by component
429+ ```
430+
431+ ### Step 6: Add the Component Field
432+
433+ To query by component, add it to the fields:
434+
435+ ``` typescript
436+ parse : {
437+ pattern : / ^ (?<timestamp >\d {4} -\d {2} -\d {2} \d {2} :\d {2} :\d {2} ) (?<level >\w + )\s + \[ (?<component >[^ \] ] + )\] (?<message >. + )$ / ,
438+ fields : {
439+ timestamp : ' timestamp' ,
440+ level : ' level' ,
441+ message : ' message' ,
442+ component : ' component' , // Add this!
443+ },
444+ }
445+ ```
446+
447+ ---
448+
449+ ## More Real-World Examples
450+
451+ ### Kubernetes Pod Logs
452+
453+ ``` typescript
454+ {
455+ name : ' k8s' ,
456+ detect : / ^ \d {4} -\d {2} -\d {2} T\d {2} :\d {2} :\d {2} \. \d + Z\s / ,
457+ parse : {
458+ pattern : / ^ (?<timestamp >\d {4} -\d {2} -\d {2} T\d {2} :\d {2} :\d {2} \. \d + Z)\s + (?<level >\w + )\s + (?<message >. + )$ / ,
459+ fields : {
460+ timestamp : ' timestamp' ,
461+ level : ' level' ,
462+ message : ' message' ,
463+ },
464+ },
465+ }
466+ ```
467+
468+ ### Python Logging
469+
470+ ``` typescript
471+ {
472+ name : ' python' ,
473+ detect : / ^ \d {4} -\d {2} -\d {2} \d {2} :\d {2} :\d {2} ,\d {3} - \w + -/ ,
474+ parse : {
475+ pattern : / ^ (?<timestamp >\d {4} -\d {2} -\d {2} \d {2} :\d {2} :\d {2} ,\d {3} ) - (?<logger >\w + ) - (?<level >\w + ) - (?<message >. + )$ / ,
476+ fields : {
477+ timestamp : ' timestamp' ,
478+ level : ' level' ,
479+ message : ' message' ,
480+ logger : ' logger' ,
481+ },
482+ },
483+ }
484+ ```
485+
486+ ### Java/Log4j
487+
488+ ``` typescript
489+ {
490+ name : ' log4j' ,
491+ detect : / ^ \d {4} -\d {2} -\d {2} \d {2} :\d {2} :\d {2} \. \d {3} \[ / ,
492+ parse : {
493+ pattern : / ^ (?<timestamp >\d {4} -\d {2} -\d {2} \d {2} :\d {2} :\d {2} \. \d {3} ) \[ (?<thread >[^ \] ] + )\] (?<level >\w + )\s + (?<logger >\S + ) - (?<message >. + )$ / ,
494+ fields : {
495+ timestamp : ' timestamp' ,
496+ level : ' level' ,
497+ message : ' message' ,
498+ thread : ' thread' ,
499+ logger : ' logger' ,
500+ },
501+ },
502+ }
503+ ```
504+
505+ ### Go/Zap Logger (Non-JSON)
506+
507+ ``` typescript
508+ {
509+ name : ' zap' ,
510+ detect : / ^ \d {4} -\d {2} -\d {2} T\d {2} :\d {2} :\d {2} \. \d {3} [+-] \d {4} \s + (DEBUG| INFO| WARN| ERROR)/ ,
511+ parse : {
512+ pattern : / ^ (?<timestamp >\d {4} -\d {2} -\d {2} T\d {2} :\d {2} :\d {2} \. \d {3} [+-] \d {4} )\s + (?<level >\w + )\s + (?<logger >\S + )\s + (?<message >[^ \t ] + )(?:\t (?<fields >. + ))? $ / ,
513+ fields : {
514+ timestamp : ' timestamp' ,
515+ level : ' level' ,
516+ message : ' message' ,
517+ logger : ' logger' ,
518+ },
519+ },
520+ }
521+ ```
522+
523+ ### Heroku Router Logs
524+
525+ ``` typescript
526+ {
527+ name : ' heroku' ,
528+ detect : / ^ . * heroku\[ router\] :/ ,
529+ parse : (line ) => {
530+ const match = line .match (/ ^ (?<timestamp >\S + ) . * heroku\[ router\] : at=(?<status >\w + ) method=(?<method >\w + ) path="(?<path >[^ "] + )". * status=(?<code >\d + ). * connect=(?<connect >\d + )ms service=(?<service >\d + )ms/ );
531+ if (! match ) return null ;
532+ const { timestamp, status, method, path, code, connect, service } = match .groups ! ;
533+ return {
534+ timestamp ,
535+ level: status === ' error' ? ' error' : (parseInt (code ) >= 400 ? ' warn' : ' info' ),
536+ message: ` ${method } ${path } ${code } ` ,
537+ fields: {
538+ method ,
539+ path ,
540+ status: parseInt (code ),
541+ connect_ms: parseInt (connect ),
542+ service_ms: parseInt (service ),
543+ },
544+ };
545+ },
546+ }
547+ ```
548+
549+ ### Multiline Stack Traces
550+
551+ For logs with stack traces, you may need special handling:
552+
553+ ``` typescript
554+ {
555+ name : ' java-multiline' ,
556+ detect : / ^ \d {4} -\d {2} -\d {2} \d {2} :\d {2} :\d {2} \. \d {3} / ,
557+ parse : (line ) => {
558+ // Skip continuation lines (stack traces)
559+ if (line .startsWith (' \t ' ) || line .startsWith (' at ' )) {
560+ return null ; // These get attached to previous entry
561+ }
562+
563+ const match = line .match (/ ^ (?<timestamp >\d {4} -\d {2} -\d {2} \d {2} :\d {2} :\d {2} \. \d {3} ) (?<level >\w + ) (?<message >. + )$ / );
564+ if (! match ) return null ;
565+
566+ return {
567+ timestamp: match .groups ! .timestamp ,
568+ level: match .groups ! .level .toLowerCase (),
569+ message: match .groups ! .message ,
570+ fields: {},
571+ };
572+ },
573+ }
574+ ```
575+
576+ ---
577+
356578## Debugging Custom Formats
357579
358580Test your format detection:
0 commit comments