修復 decoder 失真率過高問題

5000user5000 · 5000user5000 · commit 9ba72250a4de · 2025-12-01T23:28:09.000+08:00
diff --git a/src/cpp/decoder.cpp b/src/cpp/decoder.cpp
@@ -5,6 +5,7 @@
 #include <cstring>
 #include <cmath>
 #include <algorithm>
+#include <iostream> // 用於 debug 輸出
 
 namespace jpeg {
 
@@ -18,7 +19,6 @@ JPEGDecoder::~JPEGDecoder() {
 }
 
 bool JPEGDecoder::decodeFile(const std::string& filename) {
-
     std::ifstream file(filename, std::ios::binary | std::ios::ate);
     if (!file.is_open()) {
         return false;
@@ -57,6 +57,8 @@ uint16_t JPEGDecoder::readWord() {
 uint8_t JPEGDecoder::readMarker() {
     uint8_t byte = readByte();
     if (byte != 0xFF) {
+        // 在某些情況下，可能會讀到多餘的 padding，這裡做一個簡單的容錯
+        // 但標準 JPEG 應該嚴格檢查。為了穩定性，這裡若非 FF 則報錯。
         throw std::runtime_error("Expected marker");
     }
     
@@ -71,6 +73,7 @@ uint8_t JPEGDecoder::readMarker() {
 
 void JPEGDecoder::skipSegment() {
     uint16_t length = readWord();
+    if (length < 2) throw std::runtime_error("Invalid segment length");
     data_pos_ += length - 2;
 }
 
@@ -83,7 +86,7 @@ bool JPEGDecoder::parse() {
         }
         
         // 解析各個 segments
-        while (true) {
+        while (data_pos_ < jpeg_data_.size()) {
             marker = readMarker();
             
             switch (marker) {
@@ -118,6 +121,8 @@ bool JPEGDecoder::parse() {
             }
         }
     } catch (const std::exception& e) {
+        // 在實際應用中，可以 print e.what() 幫助除錯
+        // std::cerr << "JPEG Error: " << e.what() << std::endl;
         return false;
     }
     
@@ -246,9 +251,10 @@ bool JPEGDecoder::processSOS() {
     size_t scan_data_end = data_pos_;
     while (scan_data_end < jpeg_data_.size()) {
         if (jpeg_data_[scan_data_end] == 0xFF) {
+            if (scan_data_end + 1 >= jpeg_data_.size()) break;
             uint8_t next = jpeg_data_[scan_data_end + 1];
             if (next != 0x00 && !(next >= 0xD0 && next <= 0xD7)) {
-                // 找到下一個 marker
+                // 找到下一個 marker (非 stuffing 0x00 且非 RST)
                 break;
             }
         }
@@ -261,7 +267,7 @@ bool JPEGDecoder::processSOS() {
     BitStream bs(jpeg_data_.data() + scan_data_start, scan_data_size);
     
     // 計算 MCU 的數量
-    int max_h_sample = 1, max_v_sample = 1;
+    int max_h_sample = 0, max_v_sample = 0;
     for (int i = 0; i < num_components_; ++i) {
         max_h_sample = std::max(max_h_sample, components_[i].h_sample);
         max_v_sample = std::max(max_v_sample, components_[i].v_sample);
@@ -276,15 +282,34 @@ bool JPEGDecoder::processSOS() {
     image_data_.resize(width_ * height_ * 3);
     
     // 儲存 Y, Cb, Cr 分量
+    // 注意：這裡假設記憶體足夠。對於大圖，這種做法可能會佔用較多記憶體。
     std::vector<std::vector<uint8_t>> y_data(mcu_rows * mcu_cols);
     std::vector<std::vector<uint8_t>> cb_data(mcu_rows * mcu_cols);
     std::vector<std::vector<uint8_t>> cr_data(mcu_rows * mcu_cols);
     
     // 解碼所有 MCU
     int16_t prev_dc[3] = {0, 0, 0};
+    int mcus_processed = 0;
     
     for (int mcu_row = 0; mcu_row < mcu_rows; ++mcu_row) {
         for (int mcu_col = 0; mcu_col < mcu_cols; ++mcu_col) {
+            
+            // 處理 Restart Interval
+            if (restart_interval_ > 0 && mcus_processed > 0 && mcus_processed % restart_interval_ == 0) {
+                // 根據 JPEG 標準，遇到 RST 時需要重置 DC 預測值
+                prev_dc[0] = 0;
+                prev_dc[1] = 0;
+                prev_dc[2] = 0;
+                
+                // BitStream::fillBuffer 裡面的邏輯目前會跳過 RST marker (FF Dx)，
+                // 但為了嚴謹，這裡 BitStream 應該要有一個 "reset/align" 的動作來丟棄
+                // buffer 裡剩餘的 fractional bits。
+                // 由於目前 BitStream 介面沒有提供 align 功能，
+                // 我們依賴 BitStream 在 fillBuffer 時自動處理 marker。
+                // *注意*：若圖片有 RST，這部分是 BitStream 類別潛在的改進點。
+                bs.reset(bs.getBitPosition() / 8 + (bs.getBitPosition() % 8 ? 1 : 0)); // 簡易模擬 byte alignment
+            }
+
             int mcu_index = mcu_row * mcu_cols + mcu_col;
             
             // 為每個分量解碼區塊
@@ -302,6 +327,7 @@ bool JPEGDecoder::processSOS() {
                         IDCT::transform8x8(block, pixels);
                         
                         // 儲存到對應的分量
+                        // 注意：對於 4:2:0，Y 分量會有 4 個 block，這裡依序存入
                         if (comp == 0) {  // Y
                             y_data[mcu_index].insert(y_data[mcu_index].end(), pixels, pixels + 64);
                         } else if (comp == 1) {  // Cb
@@ -312,6 +338,7 @@ bool JPEGDecoder::processSOS() {
                     }
                 }
             }
+            mcus_processed++;
         }
     }
     
@@ -346,10 +373,12 @@ void JPEGDecoder::decodeBlock(BitStream& bs, int component_id, int16_t* prev_dc,
 }
 
 void JPEGDecoder::ycbcrToRgb(int y, int cb, int cr, uint8_t& r, uint8_t& g, uint8_t& b) {
+    // 轉換公式 (標準 JPEG)
     int r_val = y + 1.402 * (cr - 128);
     int g_val = y - 0.344136 * (cb - 128) - 0.714136 * (cr - 128);
     int b_val = y + 1.772 * (cb - 128);
     
+    // Clamp to 0-255
     r = (r_val < 0) ? 0 : (r_val > 255) ? 255 : r_val;
     g = (g_val < 0) ? 0 : (g_val > 255) ? 255 : g_val;
     b = (b_val < 0) ? 0 : (b_val > 255) ? 255 : b_val;
@@ -358,60 +387,102 @@ void JPEGDecoder::ycbcrToRgb(int y, int cb, int cr, uint8_t& r, uint8_t& g, uint
 void JPEGDecoder::upsample(const std::vector<std::vector<uint8_t>>& y_blocks,
                            const std::vector<std::vector<uint8_t>>& cb_blocks,
                            const std::vector<std::vector<uint8_t>>& cr_blocks) {
-    // 簡化版：假設 4:4:4 或 4:2:0
-    // 這裡實作簡單的 nearest neighbor upsampling
-    
-    if (num_components_ == 1) {
-        // Grayscale
-        for (int row = 0; row < height_; ++row) {
-            for (int col = 0; col < width_; ++col) {
-                int mcu_col = col / 8;
-                int mcu_row = row / 8;
-                int block_x = col % 8;
-                int block_y = row % 8;
-                int mcu_index = mcu_row * ((width_ + 7) / 8) + mcu_col;
+    
+    // 1. 計算 MCU 的尺寸
+    int max_h = 0, max_v = 0;
+    for(int i=0; i<num_components_; ++i) {
+        max_h = std::max(max_h, components_[i].h_sample);
+        max_v = std::max(max_v, components_[i].v_sample);
+    }
+    int mcu_width = max_h * 8;
+    int mcu_height = max_v * 8;
+    
+    // MCU 在水平方向上的個數
+    int mcu_stride = (width_ + mcu_width - 1) / mcu_width;
+
+    // 2. 逐像素處理 (Nearest Neighbor Upsampling)
+    for (int row = 0; row < height_; ++row) {
+        for (int col = 0; col < width_; ++col) {
+            
+            // 找出目前像素屬於哪一個 MCU
+            int mcu_col = col / mcu_width;
+            int mcu_row = row / mcu_height;
+            int mcu_idx = mcu_row * mcu_stride + mcu_col;
+
+            // 確保沒有越界
+            if (mcu_idx >= static_cast<int>(y_blocks.size())) break;
+
+            // 計算像素在該 MCU 內部的相對座標
+            int x_rel = col % mcu_width;
+            int y_rel = row % mcu_height;
+
+            uint8_t r = 0, g = 0, b = 0;
+            int y_val = 0, cb_val = 128, cr_val = 128;
+
+            // --- 讀取 Y 分量 ---
+            {
+                const auto& comp = components_[0]; // 假設第一個是 Y
+                // 將 MCU 相對座標映射到 Component 的採樣座標
+                // Y 分量通常是全解析度，所以 mapping 1:1 (如果 h_sample == max_h)
+                int comp_x = (x_rel * comp.h_sample * 8) / mcu_width;
+                int comp_y = (y_rel * comp.v_sample * 8) / mcu_height;
                 
-                if (mcu_index < static_cast<int>(y_blocks.size()) && 
-                    !y_blocks[mcu_index].empty()) {
-                    uint8_t y = y_blocks[mcu_index][block_y * 8 + block_x];
-                    int pixel_index = (row * width_ + col) * 3;
-                    image_data_[pixel_index] = y;
-                    image_data_[pixel_index + 1] = y;
-                    image_data_[pixel_index + 2] = y;
+                // 找出是在 Component 的哪一個 8x8 Block 以及 Block 內的哪個 Pixel
+                int blk_x = comp_x / 8;
+                int blk_y = comp_y / 8;
+                int pixel_x = comp_x % 8;
+                int pixel_y = comp_y % 8;
+
+                // 計算在 flat vector 中的 index
+                // Block 排列順序：先水平，後垂直 (Raster scan inside MCU)
+                int block_index = blk_y * comp.h_sample + blk_x;
+                int pixel_idx = block_index * 64 + pixel_y * 8 + pixel_x;
+                
+                if (pixel_idx < static_cast<int>(y_blocks[mcu_idx].size())) {
+                    y_val = y_blocks[mcu_idx][pixel_idx];
                 }
             }
-        }
-    } else {
-        // YCbCr 轉 RGB
-        for (int row = 0; row < height_; ++row) {
-            for (int col = 0; col < width_; ++col) {
-                int mcu_col = col / 8;
-                int mcu_row = row / 8;
-                int block_x = col % 8;
-                int block_y = row % 8;
-                int mcu_index = mcu_row * ((width_ + 7) / 8) + mcu_col;
+
+            // --- 讀取 Cb 分量 ---
+            if (num_components_ > 1 && !cb_blocks[mcu_idx].empty()) {
+                const auto& comp = components_[1];
+                int comp_x = (x_rel * comp.h_sample * 8) / mcu_width;
+                int comp_y = (y_rel * comp.v_sample * 8) / mcu_height;
                 
-                if (mcu_index < static_cast<int>(y_blocks.size()) &&
-                    !y_blocks[mcu_index].empty()) {
-                    uint8_t y = y_blocks[mcu_index][block_y * 8 + block_x];
-                    uint8_t cb = 128, cr = 128;
-                    
-                    if (!cb_blocks[mcu_index].empty()) {
-                        cb = cb_blocks[mcu_index][block_y * 8 + block_x];
-                    }
-                    if (!cr_blocks[mcu_index].empty()) {
-                        cr = cr_blocks[mcu_index][block_y * 8 + block_x];
-                    }
-                    
-                    uint8_t r, g, b;
-                    ycbcrToRgb(y, cb, cr, r, g, b);
-                    
-                    int pixel_index = (row * width_ + col) * 3;
-                    image_data_[pixel_index] = r;
-                    image_data_[pixel_index + 1] = g;
-                    image_data_[pixel_index + 2] = b;
+                int pixel_idx = (comp_y / 8 * comp.h_sample + comp_x / 8) * 64 + (comp_y % 8) * 8 + (comp_x % 8);
+                
+                if (pixel_idx < static_cast<int>(cb_blocks[mcu_idx].size())) {
+                    cb_val = cb_blocks[mcu_idx][pixel_idx];
+                }
+            }
+
+            // --- 讀取 Cr 分量 ---
+            if (num_components_ > 2 && !cr_blocks[mcu_idx].empty()) {
+                const auto& comp = components_[2];
+                int comp_x = (x_rel * comp.h_sample * 8) / mcu_width;
+                int comp_y = (y_rel * comp.v_sample * 8) / mcu_height;
+                
+                int pixel_idx = (comp_y / 8 * comp.h_sample + comp_x / 8) * 64 + (comp_y % 8) * 8 + (comp_x % 8);
+                
+                if (pixel_idx < static_cast<int>(cr_blocks[mcu_idx].size())) {
+                    cr_val = cr_blocks[mcu_idx][pixel_idx];
                 }
             }
+            
+            // 轉換為 RGB
+            if (num_components_ == 1) {
+                // Grayscale
+                int pos = (row * width_ + col) * 3;
+                image_data_[pos] = y_val;
+                image_data_[pos + 1] = y_val;
+                image_data_[pos + 2] = y_val;
+            } else {
+                ycbcrToRgb(y_val, cb_val, cr_val, r, g, b);
+                int pos = (row * width_ + col) * 3;
+                image_data_[pos] = r;
+                image_data_[pos + 1] = g;
+                image_data_[pos + 2] = b;
+            }
         }
     }
 }