Branchless-Programming-Example/program.py at main · maxron84/Branchless-Programming-Example · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
"""
Branchless Programming Examples in Python

"Branchless" means we avoid if/else and other conditional jumps.
Instead, we use arithmetic and bit tricks so the CPU executes
the SAME instructions regardless of the input values.

Why? Modern CPUs "guess" which branch an if-statement will take
(branch prediction). When they guess wrong, they waste 10-20 cycles
flushing the pipeline. Branchless code has no guesses to get wrong.

PYTHON-SPECIFIC WARNING:
    Python integers have ARBITRARY PRECISION — they grow as large as needed.
    This means `>> 31` does NOT reliably extract a sign bit like in C/JS!

    Example:  (-7) >> 31  = -1   ✓  (works, fits in 32 bits)
    Example:  (2**40) >> 31 = 512  ✗  (NOT 0 or -1 — number is wider than 32 bits)

    To safely extract a sign bit in Python, we use `& 0xFFFFFFFF` to truncate
    to 32 bits first. The examples below demonstrate this approach.

    In real Python code, you'd normally just write `if x < 0:` — these
    branchless techniques are mainly useful in C, C++, and assembly.
    We show them here purely for educational purposes.
"""


def to_signed32(n):
    """
    Convert a Python integer to a 32-bit signed integer.

    Python integers have unlimited size, so we need this helper to simulate
    the 32-bit behavior that C and JavaScript get for free.

    How it works:
      1. `n & 0xFFFFFFFF` masks to the lowest 32 bits (result: 0 to 4294967295)
      2. If bit 31 is set (value >= 2^31), the original number was negative
         in 32-bit representation, so we subtract 2^32 to get the negative value.

    Examples:
      to_signed32(42)  →  42       (fits fine, no change)
      to_signed32(-7)  → -7        (-7 & 0xFFFFFFFF = 4294967289, then - 2^32 = -7)
      to_signed32(0)   →  0
    """
    n = n & 0xFFFFFFFF  # Keep only the lower 32 bits
    if n >= 0x80000000:  # If bit 31 (sign bit) is set...
        n -= 0x100000000  # ...interpret as negative
    return n


def get_sign(num):
    """
    Returns +1 for positive, 0 for zero, -1 for negative.

    TRICK: We extract two facts using arithmetic right shift on 32-bit values:
      1. Is the number negative?  →  num >> 31 gives 0 or -1
      2. Is the number positive?  →  (-num) >> 31 gives 0 or -1
    Then: isPositive - isNegative

    WORKED EXAMPLE (num = -7):

      Step 1: is_negative
        to_signed32(-7) = -7
        -7 >> 31 = -1          (arithmetic shift fills with 1s)
        is_negative = -(-1) = 1

      Step 2: is_positive
        to_signed32(7) = 7
        7 >> 31 = 0
        is_positive = -(0) = 0

      Result: 0 - 1 = -1  ✓

    WORKED EXAMPLE (num = 42):
        is_negative = -(to_signed32(42) >> 31) = -(0) = 0
        is_positive = -(to_signed32(-42) >> 31) = -(-1) = 1
        Result: 1 - 0 = 1  ✓

    WORKED EXAMPLE (num = 0):
        is_negative = -(0 >> 31) = 0
        is_positive = -(0 >> 31) = 0
        Result: 0 - 0 = 0  ✓
    """
    num32 = to_signed32(num)
    neg32 = to_signed32(-num)

    is_negative = -(num32 >> 31)   # 1 if num < 0, else 0
    is_positive = -(neg32 >> 31)   # 1 if num > 0, else 0

    return is_positive - is_negative


def min_branchless(a, b):
    """
    Returns the smaller of two integers using bit masking.

    TRICK: Compute (a - b), then use its sign bit as a mask.
      - If a < b → diff is negative → mask = -1 (all 1-bits)
      - If a >= b → diff is non-negative → mask = 0

    Formula:  min = b + (diff & mask)
      - When a < b:  mask = -1, so (diff & mask) = diff → b + diff = b + (a-b) = a  ✓
      - When a >= b: mask = 0,  so (diff & mask) = 0   → b + 0 = b                  ✓

    WORKED EXAMPLE (a=15, b=9):
        diff = 15 - 9 = 6
        mask = to_signed32(6) >> 31 = 0
        result = 9 + (6 & 0) = 9 + 0 = 9  ✓

    WORKED EXAMPLE (a=3, b=7):
        diff = 3 - 7 = -4
        mask = to_signed32(-4) >> 31 = -1
        result = 7 + (-4 & -1) = 7 + (-4) = 3  ✓
    """
    diff = a - b
    mask = to_signed32(diff) >> 31  # 0 if a >= b, -1 if a < b
    return b + (diff & mask)


def is_smaller(a, b):
    """
    Returns 1 if a < b, 0 otherwise.
    In Python, int(True) = 1, int(False) = 0.
    """
    return int((a - b) < 0)


def min_with_helper(a, b):
    """
    Alternative branchless min using multiplication instead of bit masking.

    Formula: result = a + (b - a) * is_smaller(b, a)
      - When b < a (i.e., a is larger): a + (b - a) * 1 = a + b - a = b  ✓
      - When b >= a (i.e., a is smaller or equal): a + (b - a) * 0 = a   ✓

    WORKED EXAMPLE (a=15, b=9):
        is_smaller(9, 15) = 1  (because 9 < 15)
        result = 15 + (9 - 15) * 1 = 15 + (-6) = 9  ✓

    WORKED EXAMPLE (a=3, b=7):
        is_smaller(7, 3) = 0  (because 7 >= 3)
        result = 3 + (7 - 3) * 0 = 3 + 0 = 3  ✓
    """
    return a + (b - a) * is_smaller(b, a)


# === Demo ===
if __name__ == "__main__":
    num1, num2, num3 = 42, -7, 0

    print("=== Branchless Sign Detection ===")
    print(f"sign({num1})  = {get_sign(num1):+d}")
    print(f"sign({num2}) = {get_sign(num2):+d}")
    print(f"sign({num3})  = {get_sign(num3):+d}")

    a, b = 15, 9

    print("\n=== Branchless Minimum ===")
    print(f"min({a}, {b}) via bitmask:        {min_branchless(a, b)}")
    print(f"min({a}, {b}) via multiplication: {min_with_helper(a, b)}")