Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/parser/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,9 @@ namespace OpenLogReplicator {
transaction->commitSequence = redoLogRecord1->sequence;
transaction->commitScn = redoLogRecord1->scn;
transaction->commitTimestamp = redoLogRecord1->timestamp;
if ((redoLogRecord1->flg & OpCode::FLG_ROLLBACK_OP0504) != 0)
transaction->rollback = true;

if ((transaction->commitScn > metadata->firstDataScn && !transaction->system) ||
(transaction->commitScn > metadata->firstSchemaScn && transaction->system)) {
if (transaction->begin) {
Expand Down
24 changes: 20 additions & 4 deletions src/parser/Transaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,13 @@ namespace OpenLogReplicator {
return;
}

// In RAC online mode, Oracle generates phantom undo records for INSERTs on LOB tables.
// These appear as INSERT rollbacks with no preceding LOB index records on the stack.
// Legitimate rollbacks (out-of-line LOBs) always have LOB records stripped first.
// In RAC online mode, Oracle generates phantom undo records for DML on LOB tables.
// These appear as rollbacks with no preceding LOB index records on the stack.
// Legitimate LOB rollbacks always strip LOB index records (0x0A02 etc.) first.
// Covers INSERT->DELETE (0x0B02->0x0B03) and UPDATE->UPDATE (0x0B05->0x0B05).
if (!lobStripped && transactionBuffer->deferCommittedTransactions &&
lastRedoLogRecord2->opCode == 0x0B02 && redoLogRecord1->opCode == 0x0B03) {
((lastRedoLogRecord2->opCode == 0x0B02 && redoLogRecord1->opCode == 0x0B03) ||
(lastRedoLogRecord2->opCode == 0x0B05 && redoLogRecord1->opCode == 0x0B05))) {
const DbTable* table = metadata->schema->checkTableDict(redoLogRecord1->obj);
if (table != nullptr && !table->lobs.empty()) {
return;
Expand Down Expand Up @@ -493,6 +495,20 @@ namespace OpenLogReplicator {
} else {
metadata->ctx->warning(60017, "minimal supplemental log missing or redo log inconsistency for transaction " +
xid.toString() + ", offset: " + redoLogRecord1->fileOffset.toString());
// Pending redo1/redo2 contains an incomplete multi-piece DML from a different row/table.
// Discard the orphaned fragment and start fresh with the current record.
redo1.clear();
redo2.clear();

if (op == 0x05010B02)
transactionType = Format::TRANSACTION_TYPE::INSERT;
else if (op == 0x05010B03)
transactionType = Format::TRANSACTION_TYPE::DELETE;
else
transactionType = Format::TRANSACTION_TYPE::UPDATE;

redo1.push_back(redoLogRecord1);
redo2.push_back(redoLogRecord2);
}
}

Expand Down
31 changes: 31 additions & 0 deletions tests/KNOWN-LIMITATIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,36 @@ applies at DB creation, not pre-built).

---

## L13. LogMiner LOB After-Image Unavailable for Unchanged Columns

Oracle LogMiner only includes LOB (CLOB/BLOB) column values in change events
when the column was **explicitly set or changed** by the SQL statement.
Unchanged LOB columns are not included in the redo data and appear as
`__debezium_unavailable_value` in Debezium output.

This is documented behavior, not a bug. OLR reads LOB data directly from redo
log records and can deliver actual LOB content even when LogMiner cannot.

**Evidence — Debezium documentation:**

> Oracle only supplies column values for CLOB, NCLOB, and BLOB data types if
> they're explicitly set or changed in a SQL statement. As a result, change
> events never contain the value of an unchanged CLOB, NCLOB, or BLOB column.
> Instead, they contain placeholders as defined by the connector property,
> `unavailable.value.placeholder`.

Source: [Debezium Connector for Oracle — LOB support](https://debezium.io/documentation/reference/stable/connectors/oracle.html),
[DBZ-4276](https://github.com/debezium/debezium/pull/2929)

**Evidence — fuzz test (2026-03-29):** 776 value diffs on FUZZ_LOB table where
LogMiner has `__debezium_unavailable_value` and OLR has actual BLOB/CLOB data.
Zero reverse cases (OLR unavailable, LogMiner has value).

**Test handling:** `validator.py` skips comparison when either side has
`__debezium_unavailable_value` (both before and after images).

---

## Summary

### External Limitations (Oracle / Debezium — cannot be fixed in OLR)
Expand All @@ -297,6 +327,7 @@ applies at DB creation, not pre-built).
| L5 | NCHAR uses UNISTR() encoding | Oracle LogMiner | Decode in logminer2json.py |
| L6 | LOB disabled by default | Debezium config | Set `lob.enabled=true` |
| L7 | No mid-stream DDL | Debezium OLR adapter | Skip DDL in twin-test ([#13](https://github.com/rophy/olr/issues/13)) |
| L13 | LOB after-image unavailable for unchanged cols | Oracle LogMiner / [Debezium docs](https://debezium.io/documentation/reference/stable/connectors/oracle.html) | Skip unavailable in validator |

### OLR Bugs (should be fixed)

Expand Down
11 changes: 8 additions & 3 deletions tests/dbz-twin/rac/fuzz-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,13 @@ action_up() {
action_run() {
local duration_min="${1:-30}"
local duration_sec=$(( duration_min * 60 ))
local skip_lob="${SKIP_LOB:-0}"

echo "=== Running fuzz workload for ${duration_min} minutes ==="
if [[ "$skip_lob" == "1" ]]; then
echo "=== Running fuzz workload for ${duration_min} minutes (LOB skipped) ==="
else
echo "=== Running fuzz workload for ${duration_min} minutes ==="
fi

local work_dir
work_dir=$(mktemp -d /tmp/fuzz_rac_XXXXXX)
Expand All @@ -202,12 +207,12 @@ SQL
# Create runner scripts
cat > "$work_dir/fuzz_node1.sql" <<SQL
SET SERVEROUTPUT ON SIZE UNLIMITED
EXEC FUZZ_WKL.run(p_duration_secs => ${duration_sec}, p_seed => 42, p_node_id => 1);
EXEC FUZZ_WKL.run(p_duration_secs => ${duration_sec}, p_seed => 42, p_node_id => 1, p_skip_lob => ${skip_lob});
EXIT;
SQL
cat > "$work_dir/fuzz_node2.sql" <<SQL
SET SERVEROUTPUT ON SIZE UNLIMITED
EXEC FUZZ_WKL.run(p_duration_secs => ${duration_sec}, p_seed => 137, p_node_id => 2);
EXEC FUZZ_WKL.run(p_duration_secs => ${duration_sec}, p_seed => 137, p_node_id => 2, p_skip_lob => ${skip_lob});
EXIT;
SQL

Expand Down
31 changes: 21 additions & 10 deletions tests/dbz-twin/rac/perf/fuzz-workload.sql
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ CREATE OR REPLACE PACKAGE olr_test.FUZZ_WKL AS
PROCEDURE run(
p_duration_secs IN NUMBER DEFAULT 1800,
p_seed IN NUMBER DEFAULT 1,
p_node_id IN NUMBER DEFAULT 1
p_node_id IN NUMBER DEFAULT 1,
p_skip_lob IN NUMBER DEFAULT 0 -- 1 = skip LOB table operations
);
END FUZZ_WKL;
/
Expand All @@ -170,6 +171,7 @@ CREATE OR REPLACE PACKAGE BODY olr_test.FUZZ_WKL AS
g_rollback_cnt PLS_INTEGER := 0;
g_lob_cnt PLS_INTEGER := 0;
g_total_ops PLS_INTEGER := 0;
g_skip_lob PLS_INTEGER := 0; -- 1 = skip LOB table operations

-- Per-table ID tracking for UPDATE/DELETE targeting.
-- Stores the last inserted ID for each table so UPDATE/DELETE can
Expand Down Expand Up @@ -637,8 +639,13 @@ CREATE OR REPLACE PACKAGE BODY olr_test.FUZZ_WKL AS
v_count PLS_INTEGER;
BEGIN
-- Pick table (weighted)
-- 30% scalar, 10% wide, 15% lob, 10% partitioned, 10% nopk,
-- 10% maxstr, 5% interval, 10% null-heavy
-- With LOB: 30% scalar, 10% wide, 15% lob, 10% part, 10% nopk, 10% maxstr, 5% interval, 10% null
-- Without LOB: 35% scalar, 12% wide, 0% lob, 12% part, 12% nopk, 12% maxstr, 7% interval, 10% null
-- When g_skip_lob=1, remap the 15% LOB range to other tables
IF g_skip_lob = 1 AND v_table_dice > 40 AND v_table_dice <= 55 THEN
-- Remap LOB range (41-55) to scalar
v_table_dice := rand_int(1, 30);
END IF;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
IF v_table_dice <= 30 THEN
-- FUZZ_SCALAR
v_count := rand_int(1, 20);
Expand Down Expand Up @@ -707,7 +714,8 @@ CREATE OR REPLACE PACKAGE BODY olr_test.FUZZ_WKL AS
PROCEDURE run(
p_duration_secs IN NUMBER DEFAULT 1800,
p_seed IN NUMBER DEFAULT 1,
p_node_id IN NUMBER DEFAULT 1
p_node_id IN NUMBER DEFAULT 1,
p_skip_lob IN NUMBER DEFAULT 0
) IS
v_start TIMESTAMP := SYSTIMESTAMP;
v_deadline TIMESTAMP := SYSTIMESTAMP + NUMTODSINTERVAL(p_duration_secs, 'SECOND');
Expand All @@ -722,6 +730,7 @@ CREATE OR REPLACE PACKAGE BODY olr_test.FUZZ_WKL AS
g_event_seq := 0;
g_insert_cnt := 0; g_update_cnt := 0; g_delete_cnt := 0;
g_rollback_cnt := 0; g_lob_cnt := 0; g_total_ops := 0;
g_skip_lob := p_skip_lob;

DBMS_RANDOM.SEED(p_seed);

Expand All @@ -737,12 +746,14 @@ CREATE OR REPLACE PACKAGE BODY olr_test.FUZZ_WKL AS
VALUES (v_seed_id, 'SEED', DBMS_RANDOM.STRING('x', 20), 0);
track_id(g_scalar_ids, g_scalar_id_cnt, v_seed_id);
END LOOP;
FOR i IN 1..5 LOOP
v_seed_id := next_id;
INSERT INTO olr_test.FUZZ_LOB (id, event_id, label, content)
VALUES (v_seed_id, 'SEED', 'seed', 'seed');
track_id(g_lob_ids, g_lob_id_cnt, v_seed_id);
END LOOP;
IF g_skip_lob = 0 THEN
FOR i IN 1..5 LOOP
v_seed_id := next_id;
INSERT INTO olr_test.FUZZ_LOB (id, event_id, label, content)
VALUES (v_seed_id, 'SEED', 'seed', 'seed');
track_id(g_lob_ids, g_lob_id_cnt, v_seed_id);
END LOOP;
END IF;
FOR i IN 1..20 LOOP
v_seed_id := next_id;
v_seed_region := REGIONS(rand_int(1, 5));
Expand Down
47 changes: 38 additions & 9 deletions tests/dbz-twin/rac/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,11 @@ def merge_lob_records(records):
def compare_values(lm_cols, olr_cols, table, section='after'):
"""Compare two normalized column dicts. Returns list of diff strings.

section: 'before' or 'after'. LOB unavailable markers are only skipped
in 'before' images — Oracle doesn't provide old LOB values in redo.
In 'after' images, unavailable markers indicate a real problem.
LOB unavailable markers are skipped in both before and after images.
Oracle LogMiner only includes LOB column values when they are explicitly
changed by the SQL statement — unchanged LOB columns are emitted as
__debezium_unavailable_value. This is documented Debezium behavior, not
a bug. See: KNOWN-LIMITATIONS.md L13.
"""
diffs = []
all_keys = set(lm_cols.keys()) | set(olr_cols.keys())
Expand All @@ -97,8 +99,8 @@ def compare_values(lm_cols, olr_cols, table, section='after'):
vb = olr_cols.get(key)
if key not in lm_cols or key not in olr_cols:
continue # Supplemental logging differences
if section == 'before' and (is_unavailable(va) or is_unavailable(vb)):
continue # LOB before-image unavailable (Oracle limitation)
if is_unavailable(va) or is_unavailable(vb):
continue # LOB unavailable Oracle/Debezium limitation (L1, L13)
if va != vb:
diffs.append(f" {key}: LM={va!r} OLR={vb!r}")
return diffs
Expand All @@ -119,12 +121,15 @@ def main():
conn.execute("PRAGMA journal_mode=WAL")

cursor_by_node = {'N1': '', 'N2': ''} # Per-node watermark
safe_frontier = {} # Last frontier before idle-timeout widening
total_validated = 0
total_matched = 0
total_mismatches = 0
total_lob_known = 0 # Known LOB issues (expected)
total_missing_lm = 0
total_missing_olr = 0
total_tail_olr = 0 # OLR ahead of LM at drain time (not a bug)
total_tail_lm = 0 # LM ahead of OLR at drain time (not a bug)
last_new_events = time.time()
prev_lm_count = 0
prev_olr_count = 0
Expand Down Expand Up @@ -170,6 +175,9 @@ def main():
if time.time() - last_new_events > IDLE_TIMEOUT:
print(f"[validator] Idle timeout ({IDLE_TIMEOUT}s). "
f"Final validation pass...", flush=True)
# Save safe frontier before widening — events beyond this
# are tail lag (OLR or LM ahead), not real mismatches.
safe_frontier = dict(node_frontiers)
# Widen frontier to max of both sides per node to catch
# truly missing events (one side never delivered them).
for node_prefix in ('N1', 'N2'):
Expand Down Expand Up @@ -214,6 +222,12 @@ def main():
in_lm = eid in lm_ids
in_olr = eid in olr_ids

# Check if this event is beyond the safe frontier (tail lag)
node_prefix = eid[:2]
is_tail = (safe_frontier
and node_prefix in safe_frontier
and eid > safe_frontier[node_prefix])

# Determine table from whichever side has the event
if in_lm:
tbl_row = conn.execute(
Expand All @@ -228,7 +242,9 @@ def main():

if in_lm and not in_olr:
total_missing_olr += 1
if is_lob:
if is_tail:
total_tail_lm += 1
elif is_lob:
total_lob_known += 1
else:
total_mismatches += 1
Expand All @@ -238,7 +254,9 @@ def main():

if in_olr and not in_lm:
total_missing_lm += 1
if is_lob:
if is_tail:
total_tail_olr += 1
elif is_lob:
total_lob_known += 1
else:
total_mismatches += 1
Expand Down Expand Up @@ -338,9 +356,12 @@ def main():

# Progress report
frontier_str = ','.join(f'{k}={v}' for k, v in sorted(cursor_by_node.items()))
tail_str = (f" tail_olr={total_tail_olr} tail_lm={total_tail_lm}"
if total_tail_olr or total_tail_lm else "")
print(f"[validator] validated={total_validated} matched={total_matched} "
f"mismatches={total_mismatches} lob_known={total_lob_known} "
f"missing_olr={total_missing_olr} extra_olr={total_missing_lm} "
f"missing_olr={total_missing_olr} extra_olr={total_missing_lm}"
f"{tail_str} "
f"lm_total={lm_count} olr_total={olr_count} "
f"frontier={frontier_str}", flush=True)

Expand All @@ -359,15 +380,23 @@ def main():
print(f" LOB known issues: {total_lob_known}", flush=True)
print(f" Missing from OLR: {total_missing_olr}", flush=True)
print(f" Extra in OLR: {total_missing_lm}", flush=True)
if total_tail_olr or total_tail_lm:
print(f" Tail (OLR ahead): {total_tail_olr}", flush=True)
print(f" Tail (LM ahead): {total_tail_lm}", flush=True)

if total_mismatches > 0:
print(f"\n RESULT: FAIL ({total_mismatches} unexpected mismatches)",
flush=True)
sys.exit(1)
else:
print(f"\n RESULT: PASS", flush=True)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
qualifiers = []
if total_lob_known > 0:
print(f" (with {total_lob_known} known LOB issues)", flush=True)
qualifiers.append(f"{total_lob_known} known LOB issues")
if total_tail_olr + total_tail_lm > 0:
qualifiers.append(f"{total_tail_olr + total_tail_lm} tail events")
if qualifiers:
print(f" ({', '.join(qualifiers)})", flush=True)
sys.exit(0)


Expand Down
Loading