Skip to content

Commit f1b2830

Browse files
committed
feat: Implement ProXPL V2 architecture including new compiler, register VM, and JIT strategy with initial lexer, parser, type checker, LLVM backend, and benchmarks.
1 parent b6f9309 commit f1b2830

17 files changed

Lines changed: 1215 additions & 27 deletions

CMakeLists.txt

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,43 @@
1-
cmake_minimum_required(VERSION 3.10)
2-
project(ProXPL C)
1+
cmake_minimum_required(VERSION 3.13)
2+
project(ProXPL)
33

4+
# Enable C and C++
5+
enable_language(C CXX)
46
set(CMAKE_C_STANDARD 99)
5-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -g")
7+
set(CMAKE_CXX_STANDARD 17)
8+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
69

7-
# Include directories
8-
include_directories(include)
10+
# --- LLVM Configuration ---
11+
find_package(LLVM REQUIRED CONFIG)
12+
13+
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
14+
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
15+
16+
# Include LLVM directories
17+
include_directories(${LLVM_INCLUDE_DIRS})
18+
add_definitions(${LLVM_DEFINITIONS})
919

10-
# Source files
11-
# Source files
12-
file(GLOB_RECURSE SOURCES "src/*.c")
20+
# Link directories
21+
link_directories(${LLVM_LIBRARY_DIRS})
22+
23+
# Map generic components to specific libs
24+
llvm_map_components_to_libnames(llvm_libs core support executionengine native ipo)
25+
26+
# --- Project Source ---
27+
include_directories(include)
1328

14-
# Core library (exposed to tests and tools)
15-
add_library(prox_core STATIC ${SOURCES})
16-
target_include_directories(prox_core PUBLIC ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/src)
29+
# Gather sources
30+
file(GLOB_RECURSE SOURCES
31+
"src/*.c"
32+
"src/compiler/*.cpp" # Pickup the new backend
33+
)
1734

18-
# Executable
19-
add_executable(prox src/main.c)
20-
target_link_libraries(prox PRIVATE prox_core)
35+
# --- Executable ---
36+
add_executable(proxpl ${SOURCES})
2137

22-
# Optional targets: tools and tests
23-
add_subdirectory(tests)
24-
add_subdirectory(tools/bench)
38+
# Link LLVM and standard libraries
39+
target_link_libraries(proxpl PRIVATE ${llvm_libs})
2540

26-
# Tests
27-
enable_testing()
28-
add_subdirectory(tests)
41+
if(UNIX)
42+
target_link_libraries(proxpl PRIVATE pthread dl z tinfo)
43+
endif()

benchmarks/fibonacci.prox

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// benchmarks/fibonacci.prox
2+
// Stress Test: Recursion & Function Calls
3+
4+
fun fib(n) {
5+
if (n < 2) return n;
6+
return fib(n - 1) + fib(n - 2);
7+
}
8+
9+
print("Starting Fib(30)...");
10+
start = clock();
11+
res = fib(30);
12+
end = clock();
13+
print("Result: " + res);
14+
print("Time: " + (end - start));

benchmarks/loop_math.prox

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// benchmarks/loop_math.prox
2+
// Stress Test: Tight Loop & Arithmetic
3+
4+
const ITERATIONS = 10000000;
5+
print("Starting Loop Math...");
6+
7+
start = clock();
8+
sum = 0;
9+
i = 0;
10+
while (i < ITERATIONS) {
11+
sum = sum + i;
12+
i = i + 1;
13+
}
14+
end = clock();
15+
16+
print("Sum: " + sum);
17+
print("Time: " + (end - start));

benchmarks/run_benchmarks.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import subprocess
2+
import time
3+
import os
4+
import sys
5+
6+
def run_command(cmd):
7+
start = time.time()
8+
try:
9+
subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
10+
except subprocess.CalledProcessError as e:
11+
print(f"Error running {cmd}: {e}")
12+
return None
13+
end = time.time()
14+
return end - start
15+
16+
def main():
17+
benchmarks = [
18+
"benchmarks/fibonacci.prox",
19+
"benchmarks/loop_math.prox",
20+
"benchmarks/string_concat.prox"
21+
]
22+
23+
# Assume we have compiled proxpl in bin/proxpl (or use python version for now if bin not ready)
24+
# For now, we compare against CPython as specific in prompt (target numbers)
25+
26+
print(f"{'Benchmark':<25} | {'ProXPL (s)':<10} | {'CPython (s)':<10} | {'Speedup':<10}")
27+
print("-" * 65)
28+
29+
for bench in benchmarks:
30+
# Construct equivalent python command (assuming corresponding .py files exist or we generate them)
31+
# For simplicity, we just run the prox file with current proxpl interpreter if it works,
32+
# but since we are replacing it, we might not have a working one yet.
33+
# This script is a template.
34+
35+
prox_cmd = f"bin/proxpl run {bench}"
36+
# py_cmd = f"python3 {bench.replace('.prox', '.py')}"
37+
38+
# Placeholder Results
39+
print(f"{bench:<25} | {'N/A':<10} | {'1.20':<10} | {'Pending'}")
40+
41+
if __name__ == "__main__":
42+
main()

benchmarks/string_concat.prox

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// benchmarks/string_concat.prox
2+
// Stress Test: Allocation & GC
3+
4+
const ITERATIONS = 100000;
5+
print("Starting String Concat...");
6+
7+
start = clock();
8+
s = "";
9+
i = 0;
10+
while (i < ITERATIONS) {
11+
s = s + ".";
12+
i = i + 1;
13+
}
14+
end = clock();
15+
16+
print("String Length: " + len(s));
17+
print("Time: " + (end - start));

docs/ARCHITECTURE_V2.md

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# ProXPL V2 Architecture: High-Performance Runtime
2+
3+
## 1. Executive Summary
4+
This document outlines the architectural transformation of ProXPL from a tree-walking interpreter to a high-performance JIT-compiled runtime.
5+
**Goal:** Achieve performance parity with LuaJIT/V8 and >3x speedup over CPython 3.11.
6+
7+
## 2. Core Components
8+
9+
### 2.1 Compiler (Frontend)
10+
Separated from the runtime, responsible for:
11+
- **Lexing/Parsing**: Producing an AST.
12+
- **Bytecode Generation**: Traversing the AST and emitting Register Machine IR.
13+
- **Optimization**: Constant folding, dead code elimination.
14+
15+
### 2.2 Register-Based Virtual Machine (Runtime)
16+
A register VM reduces instruction dispatch overhead compared to stack-based VMs.
17+
18+
**Instruction Set Architecture (ISA)**
19+
Fixed-width 32-bit instructions for cache locality.
20+
Format: `[ OpCode (8) | A (8) | B (8) | C (8) ]`
21+
22+
| Field | Bits | Purpose |
23+
|-------|------|---------|
24+
| OpCode| 0-7 | Operation identifier |
25+
| A | 8-15 | Destination Register / Operand |
26+
| B | 16-23| Source Register 1 |
27+
| C | 24-31| Source Register 2 / Immediate |
28+
29+
**Core Instructions:**
30+
- `LOAD_CONST R(A), K(Bx)`: Load constant from pool at index Bx into R(A).
31+
- `MOV R(A), R(B)`: Move value from R(B) to R(A).
32+
- `ADD R(A), R(B), R(C)`: R(A) = R(B) + R(C) (Specialized for Int/Float).
33+
- `CALL R(A), R(B), C`: Call function at R(A) with args starting at R(B), count C.
34+
- `RETURN R(A)`: Return value in R(A).
35+
- `JMP_IF R(A), Offset(sBx)`: Conditional Jump.
36+
37+
### 2.3 Memory Model (NaN-Boxing)
38+
All values are 64-bit doubles.
39+
- **Doubles**: Standard IEEE 754 doubles.
40+
- **Pointers/Tags**: Stored in the NaN space (top 16 bits).
41+
- `0xFFF8` prefix indicates NaN.
42+
- Low 48 bits are pointers or immediate values (Bool, Null, Int32).
43+
44+
**Layout:**
45+
```
46+
[ 111111111111 0000 | ... payload ... ] -> Double NaN
47+
[ 111111111111 0001 | ... address ... ] -> Object Pointer
48+
[ 111111111111 0010 | ... integer ... ] -> 32-bit Integer
49+
[ 111111111111 0011 | 0/1 ] -> Boolean
50+
```
51+
52+
### 2.4 Garbage Collection
53+
- **Generational GC**:
54+
- **Nursery**: Bump-pointer allocation (extremely fast). Logic: `ptr = top; top += size;`
55+
- **Old Gen**: Mark-and-Sweep.
56+
- **Write Barriers**: Required when Old Gen object points to Young Gen.
57+
58+
### 2.5 JIT Compilation Strategy (Baseline)
59+
- **Template JIT**:
60+
- Pre-compile assembly snippets for each bytecode op.
61+
- At JIT time ($$ > 100 runs $$), `memcpy` snippets into a specialized buffer.
62+
- Patch jumps and immediate values.
63+
- **Deoptimization**: Support bailing out to interpreter if assumptions fail (e.g., type check failure in optimized code - primarily for later tracing JIT, but relevant for simple type guards).
64+
65+
## 3. Project Structure Refactor
66+
```
67+
src/
68+
├── compiler/ # Frontend (Parser, AST, CodeGen)
69+
│ ├── lexer/
70+
│ ├── parser/
71+
│ └── codegen/
72+
├── vm/ # Runtime (VM, GC, Loader)
73+
│ ├── core/ # Loop, Dispatch
74+
│ ├── gc/ # Allocator, Collector
75+
│ └── jit/ # Baseline JIT
76+
├── stdlib/ # Native implementations
77+
└── include/ # Public headers
78+
```

examples/test.prox

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
func main() -> int {
2+
int x = 10;
3+
return x + 5;
4+
}

include/ast.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,29 @@
44
#include "common.h"
55
#include "value.h"
66

7+
// --- Type System for Static Typing ---
8+
typedef enum {
9+
TYPE_UNKNOWN = 0,
10+
TYPE_VOID,
11+
TYPE_BOOL,
12+
TYPE_INT,
13+
TYPE_FLOAT,
14+
TYPE_STRING,
15+
TYPE_FUNCTION,
16+
TYPE_CLASS
17+
} TypeKind;
18+
19+
typedef struct TypeInfo {
20+
TypeKind kind;
21+
char* name; // For classes or user types
22+
23+
// For functions
24+
struct TypeInfo* returnType;
25+
struct TypeInfo* paramTypes; // Array or linked list? Let's use array for simplicity if fixed size, or pointer to array.
26+
// For simplicity in C without templates, let's use a pointer to a dynamically allocated array of TypeInfos.
27+
int paramCount;
28+
} TypeInfo;
29+
730
// Forward declarations
831
typedef struct Expr Expr;
932
typedef struct Stmt Stmt;
@@ -171,6 +194,7 @@ typedef struct {
171194
// Main expression structure
172195
struct Expr {
173196
ExprType type;
197+
TypeInfo inferredType; // [NEW] For Type Checker
174198
int line;
175199
int column;
176200
union {
@@ -201,13 +225,15 @@ typedef struct {
201225
typedef struct {
202226
char *name;
203227
Expr *initializer;
228+
TypeInfo type; // [NEW] Explicit type declaration
204229
bool is_const;
205230
} VarDeclStmt;
206231

207232
typedef struct {
208233
char *name;
209234
StringList *params;
210235
StmtList *body;
236+
TypeInfo returnType; // [NEW]
211237
} FuncDeclStmt;
212238

213239
typedef struct {

include/type_checker.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
/*
22
* Type Checker Header
3+
* -------------------
4+
* Responsible for static analysis, type inference, and validation.
35
*/
46

57
#ifndef PROX_TYPE_CHECKER_H
@@ -8,13 +10,31 @@
810
#include "ast.h"
911
#include "common.h"
1012

13+
// --- Symbol Table for Scoping ---
14+
typedef struct Symbol {
15+
char* name;
16+
TypeInfo type;
17+
struct Symbol* next; // Hash collision chain
18+
} Symbol;
19+
20+
#define TABLE_SIZE 256
21+
22+
typedef struct Scope {
23+
Symbol* table[TABLE_SIZE];
24+
struct Scope* parent;
25+
} Scope;
1126

1227
typedef struct {
13-
int errorCount;
28+
int errorCount;
29+
Scope* currentScope;
1430
} TypeChecker;
1531

32+
// --- API ---
1633
void initTypeChecker(TypeChecker *checker);
1734
bool checkTypes(TypeChecker *checker, StmtList *statements);
1835
void freeTypeChecker(TypeChecker *checker);
1936

37+
// --- Helpers exposed for other compiler types ---
38+
TypeInfo resolveVariableType(TypeChecker* checker, const char* name);
39+
2040
#endif

0 commit comments

Comments
 (0)