Skip to content

Commit 9ba7225

Browse files
committed
修復 decoder 失真率過高問題
1 parent 92c9e0a commit 9ba7225

1 file changed

Lines changed: 123 additions & 52 deletions

File tree

src/cpp/decoder.cpp

Lines changed: 123 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <cstring>
66
#include <cmath>
77
#include <algorithm>
8+
#include <iostream> // 用於 debug 輸出
89

910
namespace jpeg {
1011

@@ -18,7 +19,6 @@ JPEGDecoder::~JPEGDecoder() {
1819
}
1920

2021
bool JPEGDecoder::decodeFile(const std::string& filename) {
21-
2222
std::ifstream file(filename, std::ios::binary | std::ios::ate);
2323
if (!file.is_open()) {
2424
return false;
@@ -57,6 +57,8 @@ uint16_t JPEGDecoder::readWord() {
5757
uint8_t JPEGDecoder::readMarker() {
5858
uint8_t byte = readByte();
5959
if (byte != 0xFF) {
60+
// 在某些情況下,可能會讀到多餘的 padding,這裡做一個簡單的容錯
61+
// 但標準 JPEG 應該嚴格檢查。為了穩定性,這裡若非 FF 則報錯。
6062
throw std::runtime_error("Expected marker");
6163
}
6264

@@ -71,6 +73,7 @@ uint8_t JPEGDecoder::readMarker() {
7173

7274
void JPEGDecoder::skipSegment() {
7375
uint16_t length = readWord();
76+
if (length < 2) throw std::runtime_error("Invalid segment length");
7477
data_pos_ += length - 2;
7578
}
7679

@@ -83,7 +86,7 @@ bool JPEGDecoder::parse() {
8386
}
8487

8588
// 解析各個 segments
86-
while (true) {
89+
while (data_pos_ < jpeg_data_.size()) {
8790
marker = readMarker();
8891

8992
switch (marker) {
@@ -118,6 +121,8 @@ bool JPEGDecoder::parse() {
118121
}
119122
}
120123
} catch (const std::exception& e) {
124+
// 在實際應用中,可以 print e.what() 幫助除錯
125+
// std::cerr << "JPEG Error: " << e.what() << std::endl;
121126
return false;
122127
}
123128

@@ -246,9 +251,10 @@ bool JPEGDecoder::processSOS() {
246251
size_t scan_data_end = data_pos_;
247252
while (scan_data_end < jpeg_data_.size()) {
248253
if (jpeg_data_[scan_data_end] == 0xFF) {
254+
if (scan_data_end + 1 >= jpeg_data_.size()) break;
249255
uint8_t next = jpeg_data_[scan_data_end + 1];
250256
if (next != 0x00 && !(next >= 0xD0 && next <= 0xD7)) {
251-
// 找到下一個 marker
257+
// 找到下一個 marker (非 stuffing 0x00 且非 RST)
252258
break;
253259
}
254260
}
@@ -261,7 +267,7 @@ bool JPEGDecoder::processSOS() {
261267
BitStream bs(jpeg_data_.data() + scan_data_start, scan_data_size);
262268

263269
// 計算 MCU 的數量
264-
int max_h_sample = 1, max_v_sample = 1;
270+
int max_h_sample = 0, max_v_sample = 0;
265271
for (int i = 0; i < num_components_; ++i) {
266272
max_h_sample = std::max(max_h_sample, components_[i].h_sample);
267273
max_v_sample = std::max(max_v_sample, components_[i].v_sample);
@@ -276,15 +282,34 @@ bool JPEGDecoder::processSOS() {
276282
image_data_.resize(width_ * height_ * 3);
277283

278284
// 儲存 Y, Cb, Cr 分量
285+
// 注意:這裡假設記憶體足夠。對於大圖,這種做法可能會佔用較多記憶體。
279286
std::vector<std::vector<uint8_t>> y_data(mcu_rows * mcu_cols);
280287
std::vector<std::vector<uint8_t>> cb_data(mcu_rows * mcu_cols);
281288
std::vector<std::vector<uint8_t>> cr_data(mcu_rows * mcu_cols);
282289

283290
// 解碼所有 MCU
284291
int16_t prev_dc[3] = {0, 0, 0};
292+
int mcus_processed = 0;
285293

286294
for (int mcu_row = 0; mcu_row < mcu_rows; ++mcu_row) {
287295
for (int mcu_col = 0; mcu_col < mcu_cols; ++mcu_col) {
296+
297+
// 處理 Restart Interval
298+
if (restart_interval_ > 0 && mcus_processed > 0 && mcus_processed % restart_interval_ == 0) {
299+
// 根據 JPEG 標準,遇到 RST 時需要重置 DC 預測值
300+
prev_dc[0] = 0;
301+
prev_dc[1] = 0;
302+
prev_dc[2] = 0;
303+
304+
// BitStream::fillBuffer 裡面的邏輯目前會跳過 RST marker (FF Dx),
305+
// 但為了嚴謹,這裡 BitStream 應該要有一個 "reset/align" 的動作來丟棄
306+
// buffer 裡剩餘的 fractional bits。
307+
// 由於目前 BitStream 介面沒有提供 align 功能,
308+
// 我們依賴 BitStream 在 fillBuffer 時自動處理 marker。
309+
// *注意*:若圖片有 RST,這部分是 BitStream 類別潛在的改進點。
310+
bs.reset(bs.getBitPosition() / 8 + (bs.getBitPosition() % 8 ? 1 : 0)); // 簡易模擬 byte alignment
311+
}
312+
288313
int mcu_index = mcu_row * mcu_cols + mcu_col;
289314

290315
// 為每個分量解碼區塊
@@ -302,6 +327,7 @@ bool JPEGDecoder::processSOS() {
302327
IDCT::transform8x8(block, pixels);
303328

304329
// 儲存到對應的分量
330+
// 注意:對於 4:2:0,Y 分量會有 4 個 block,這裡依序存入
305331
if (comp == 0) { // Y
306332
y_data[mcu_index].insert(y_data[mcu_index].end(), pixels, pixels + 64);
307333
} else if (comp == 1) { // Cb
@@ -312,6 +338,7 @@ bool JPEGDecoder::processSOS() {
312338
}
313339
}
314340
}
341+
mcus_processed++;
315342
}
316343
}
317344

@@ -346,10 +373,12 @@ void JPEGDecoder::decodeBlock(BitStream& bs, int component_id, int16_t* prev_dc,
346373
}
347374

348375
void JPEGDecoder::ycbcrToRgb(int y, int cb, int cr, uint8_t& r, uint8_t& g, uint8_t& b) {
376+
// 轉換公式 (標準 JPEG)
349377
int r_val = y + 1.402 * (cr - 128);
350378
int g_val = y - 0.344136 * (cb - 128) - 0.714136 * (cr - 128);
351379
int b_val = y + 1.772 * (cb - 128);
352380

381+
// Clamp to 0-255
353382
r = (r_val < 0) ? 0 : (r_val > 255) ? 255 : r_val;
354383
g = (g_val < 0) ? 0 : (g_val > 255) ? 255 : g_val;
355384
b = (b_val < 0) ? 0 : (b_val > 255) ? 255 : b_val;
@@ -358,60 +387,102 @@ void JPEGDecoder::ycbcrToRgb(int y, int cb, int cr, uint8_t& r, uint8_t& g, uint
358387
void JPEGDecoder::upsample(const std::vector<std::vector<uint8_t>>& y_blocks,
359388
const std::vector<std::vector<uint8_t>>& cb_blocks,
360389
const std::vector<std::vector<uint8_t>>& cr_blocks) {
361-
// 簡化版:假設 4:4:4 或 4:2:0
362-
// 這裡實作簡單的 nearest neighbor upsampling
363-
364-
if (num_components_ == 1) {
365-
// Grayscale
366-
for (int row = 0; row < height_; ++row) {
367-
for (int col = 0; col < width_; ++col) {
368-
int mcu_col = col / 8;
369-
int mcu_row = row / 8;
370-
int block_x = col % 8;
371-
int block_y = row % 8;
372-
int mcu_index = mcu_row * ((width_ + 7) / 8) + mcu_col;
390+
391+
// 1. 計算 MCU 的尺寸
392+
int max_h = 0, max_v = 0;
393+
for(int i=0; i<num_components_; ++i) {
394+
max_h = std::max(max_h, components_[i].h_sample);
395+
max_v = std::max(max_v, components_[i].v_sample);
396+
}
397+
int mcu_width = max_h * 8;
398+
int mcu_height = max_v * 8;
399+
400+
// MCU 在水平方向上的個數
401+
int mcu_stride = (width_ + mcu_width - 1) / mcu_width;
402+
403+
// 2. 逐像素處理 (Nearest Neighbor Upsampling)
404+
for (int row = 0; row < height_; ++row) {
405+
for (int col = 0; col < width_; ++col) {
406+
407+
// 找出目前像素屬於哪一個 MCU
408+
int mcu_col = col / mcu_width;
409+
int mcu_row = row / mcu_height;
410+
int mcu_idx = mcu_row * mcu_stride + mcu_col;
411+
412+
// 確保沒有越界
413+
if (mcu_idx >= static_cast<int>(y_blocks.size())) break;
414+
415+
// 計算像素在該 MCU 內部的相對座標
416+
int x_rel = col % mcu_width;
417+
int y_rel = row % mcu_height;
418+
419+
uint8_t r = 0, g = 0, b = 0;
420+
int y_val = 0, cb_val = 128, cr_val = 128;
421+
422+
// --- 讀取 Y 分量 ---
423+
{
424+
const auto& comp = components_[0]; // 假設第一個是 Y
425+
// 將 MCU 相對座標映射到 Component 的採樣座標
426+
// Y 分量通常是全解析度,所以 mapping 1:1 (如果 h_sample == max_h)
427+
int comp_x = (x_rel * comp.h_sample * 8) / mcu_width;
428+
int comp_y = (y_rel * comp.v_sample * 8) / mcu_height;
373429

374-
if (mcu_index < static_cast<int>(y_blocks.size()) &&
375-
!y_blocks[mcu_index].empty()) {
376-
uint8_t y = y_blocks[mcu_index][block_y * 8 + block_x];
377-
int pixel_index = (row * width_ + col) * 3;
378-
image_data_[pixel_index] = y;
379-
image_data_[pixel_index + 1] = y;
380-
image_data_[pixel_index + 2] = y;
430+
// 找出是在 Component 的哪一個 8x8 Block 以及 Block 內的哪個 Pixel
431+
int blk_x = comp_x / 8;
432+
int blk_y = comp_y / 8;
433+
int pixel_x = comp_x % 8;
434+
int pixel_y = comp_y % 8;
435+
436+
// 計算在 flat vector 中的 index
437+
// Block 排列順序:先水平,後垂直 (Raster scan inside MCU)
438+
int block_index = blk_y * comp.h_sample + blk_x;
439+
int pixel_idx = block_index * 64 + pixel_y * 8 + pixel_x;
440+
441+
if (pixel_idx < static_cast<int>(y_blocks[mcu_idx].size())) {
442+
y_val = y_blocks[mcu_idx][pixel_idx];
381443
}
382444
}
383-
}
384-
} else {
385-
// YCbCr 轉 RGB
386-
for (int row = 0; row < height_; ++row) {
387-
for (int col = 0; col < width_; ++col) {
388-
int mcu_col = col / 8;
389-
int mcu_row = row / 8;
390-
int block_x = col % 8;
391-
int block_y = row % 8;
392-
int mcu_index = mcu_row * ((width_ + 7) / 8) + mcu_col;
445+
446+
// --- 讀取 Cb 分量 ---
447+
if (num_components_ > 1 && !cb_blocks[mcu_idx].empty()) {
448+
const auto& comp = components_[1];
449+
int comp_x = (x_rel * comp.h_sample * 8) / mcu_width;
450+
int comp_y = (y_rel * comp.v_sample * 8) / mcu_height;
393451

394-
if (mcu_index < static_cast<int>(y_blocks.size()) &&
395-
!y_blocks[mcu_index].empty()) {
396-
uint8_t y = y_blocks[mcu_index][block_y * 8 + block_x];
397-
uint8_t cb = 128, cr = 128;
398-
399-
if (!cb_blocks[mcu_index].empty()) {
400-
cb = cb_blocks[mcu_index][block_y * 8 + block_x];
401-
}
402-
if (!cr_blocks[mcu_index].empty()) {
403-
cr = cr_blocks[mcu_index][block_y * 8 + block_x];
404-
}
405-
406-
uint8_t r, g, b;
407-
ycbcrToRgb(y, cb, cr, r, g, b);
408-
409-
int pixel_index = (row * width_ + col) * 3;
410-
image_data_[pixel_index] = r;
411-
image_data_[pixel_index + 1] = g;
412-
image_data_[pixel_index + 2] = b;
452+
int pixel_idx = (comp_y / 8 * comp.h_sample + comp_x / 8) * 64 + (comp_y % 8) * 8 + (comp_x % 8);
453+
454+
if (pixel_idx < static_cast<int>(cb_blocks[mcu_idx].size())) {
455+
cb_val = cb_blocks[mcu_idx][pixel_idx];
456+
}
457+
}
458+
459+
// --- 讀取 Cr 分量 ---
460+
if (num_components_ > 2 && !cr_blocks[mcu_idx].empty()) {
461+
const auto& comp = components_[2];
462+
int comp_x = (x_rel * comp.h_sample * 8) / mcu_width;
463+
int comp_y = (y_rel * comp.v_sample * 8) / mcu_height;
464+
465+
int pixel_idx = (comp_y / 8 * comp.h_sample + comp_x / 8) * 64 + (comp_y % 8) * 8 + (comp_x % 8);
466+
467+
if (pixel_idx < static_cast<int>(cr_blocks[mcu_idx].size())) {
468+
cr_val = cr_blocks[mcu_idx][pixel_idx];
413469
}
414470
}
471+
472+
// 轉換為 RGB
473+
if (num_components_ == 1) {
474+
// Grayscale
475+
int pos = (row * width_ + col) * 3;
476+
image_data_[pos] = y_val;
477+
image_data_[pos + 1] = y_val;
478+
image_data_[pos + 2] = y_val;
479+
} else {
480+
ycbcrToRgb(y_val, cb_val, cr_val, r, g, b);
481+
int pos = (row * width_ + col) * 3;
482+
image_data_[pos] = r;
483+
image_data_[pos + 1] = g;
484+
image_data_[pos + 2] = b;
485+
}
415486
}
416487
}
417488
}

0 commit comments

Comments
 (0)