Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions 0295.Find-Median-from-Data-Stream/memo.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# 295. Find Median from Data Stream

## step1

二分探索が真っ先に思いつきとりあえず書いてみると通った。
計算量を見積もると毎回O(n)。

## step2

solutionをチラ見してheapを使うことを見て書いた。heapを使うことを自力で思いつくようにならなければな。
heapqライブラリの使い方の確認になった。

時間計算量 O(logn)

> Find Median from Data Stream の RMQ BIT 平方分割

> データをビンソートし、中央値を求めることを考えました。ビンソートして中央値を求めるとき、 BIT または平方分割すると、 O(log n) や O(sqrt(n)) で求められるという話です。

分からないのでLLMに書かせてみる。要復習。

Binary Index Treeで区間の個数を持っておくのか。

#### 平方分割:

https://zenn.dev/student_blog/articles/72055eaeb62216

クエリをO(\sqrt{N})の計算量で処理できるため、配列全体を処理する (O(N)) よりも効率的

計算量的にはセグメント木がO(logN)より劣るが、実装がかなり簡単だ。複数クエリの問題で汎用性が高そう。

https://github.com/shining-ai/leetcode/pull/64

> https://docs.python.org/3/library/bisect.html#bisect.insort_left

bisect.insort_left、こんな便利な関数があるのか

若干早くなった

https://github.com/potrue/leetcode/pull/75

> 取り合えず入れてしまってからサイズを調整するアプローチ

これも書いてみる。プログラムの実行速度は若干遅くなる。挿入回数が増える場合があるため。

### step3

BITを書いたが短期記憶で書いている感じがする。新しい問題を見た時に適用できるようになるにはまだまだといった感じ。
22 changes: 22 additions & 0 deletions 0295.Find-Median-from-Data-Stream/step1_bisect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import bisect


class MedianFinder:
def __init__(self):
self.data = []

def addNum(self, num: int) -> None:
index = bisect.bisect_left(self.data, num)
self.data.insert(index, num)

def findMedian(self) -> float:
middle_index = len(self.data) // 2
if len(self.data) % 2 == 0:
return (self.data[middle_index - 1] + self.data[middle_index]) / 2
return self.data[middle_index]


# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# obj.addNum(num)
# param_2 = obj.findMedian()
21 changes: 21 additions & 0 deletions 0295.Find-Median-from-Data-Stream/step2_bisect_insort_left.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import bisect


class MedianFinder:
def __init__(self):
self.data = []

def addNum(self, num: int) -> None:
bisect.insort_left(self.data, num)

def findMedian(self) -> float:
middle_index = len(self.data) // 2
if len(self.data) % 2 == 0:
return (self.data[middle_index - 1] + self.data[middle_index]) / 2
return self.data[middle_index]


# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# obj.addNum(num)
# param_2 = obj.findMedian()
53 changes: 53 additions & 0 deletions 0295.Find-Median-from-Data-Stream/step2_bit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
class MedianFinder:
def __init__(self):
# 値の範囲は -100,000 ~ 100,000
# 1ベースのインデックスにするため、+100001 のオフセットを適用
self.offset = 100001
self.MAX_VAL = 200005 # 十分なBITのサイズ

self.bit = [0] * (self.MAX_VAL + 1)
self.total_count = 0

def _add(self, idx: int, val: int) -> None:
"""BITに値を加算する"""
while idx <= self.MAX_VAL:
self.bit[idx] += val
idx += idx & -idx

def _find_kth(self, k: int) -> int:
"""累積和が k 以上になる最小のインデックス(値)を二分探索(倍増法)で探す"""
idx = 0
# MAX_VALを超えない最大の2のべき乗からスタート (2^17 = 131072, 2^18 = 262144)
shift = 1 << 17

while shift > 0:
if idx + shift <= self.MAX_VAL and self.bit[idx + shift] < k:
idx += shift
k -= self.bit[idx] # 既に超えた分のカウントを引く
shift >>= 1

# idx + 1 が、累積和が初めて k 以上になる位置
return idx + 1

def addNum(self, num: int) -> None:
# 負の数を考慮してインデックスをシフト
bit_idx = num + self.offset
self._add(bit_idx, 1)
self.total_count += 1

def findMedian(self) -> float:
if self.total_count % 2 == 1:
# 奇数個の場合:真ん中((n // 2) + 1 番目)の要素を返す
kth_idx = self._find_kth((self.total_count // 2) + 1)
return float(kth_idx - self.offset)
else:
# 偶数個の場合:(n // 2) 番目と (n // 2 + 1) 番目の要素の平均を返す
left_idx = self._find_kth(self.total_count // 2)
right_idx = self._find_kth((self.total_count // 2) + 1)
return (left_idx + right_idx - 2 * self.offset) / 2.0


# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# obj.addNum(num)
# param_2 = obj.findMedian()
34 changes: 34 additions & 0 deletions 0295.Find-Median-from-Data-Stream/step2_heap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import heapq


class MedianFinder:
def __init__(self):
self.low = []
self.high = []

def addNum(self, num: int) -> None:
if len(self.low) > len(self.high):
if num >= self.low[0]:
heapq.heappush(self.high, num)
else:
max_low = heapq.heappop_max(self.low)
heapq.heappush(self.high, max_low)
heapq.heappush_max(self.low, num)
else:
if not self.high or num <= self.high[0]:
heapq.heappush_max(self.low, num)
else:
min_high = heapq.heappop(self.high)
heapq.heappush_max(self.low, min_high)
heapq.heappush(self.high, num)

def findMedian(self) -> float:
if len(self.low) > len(self.high):
return self.low[0]
return (self.low[0] + self.high[0]) / 2


# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# obj.addNum(num)
# param_2 = obj.findMedian()
26 changes: 26 additions & 0 deletions 0295.Find-Median-from-Data-Stream/step2_heap_short.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import heapq


class MedianFinder:
def __init__(self):
self.low = []
self.high = []

def addNum(self, num: int) -> None:
heapq.heappush(self.high, num)
min_high = heapq.heappop(self.high)
heapq.heappush_max(self.low, min_high)
if len(self.low) > len(self.high) + 1:
max_low = heapq.heappop_max(self.low)
heapq.heappush(self.high, max_low)

def findMedian(self) -> float:
if len(self.low) > len(self.high):
return self.low[0]
return (self.low[0] + self.high[0]) / 2


# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# obj.addNum(num)
# param_2 = obj.findMedian()
58 changes: 58 additions & 0 deletions 0295.Find-Median-from-Data-Stream/step2_sq_decomposition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import math


class MedianFinder:
def __init__(self):
# 値の範囲は -100,000 ~ 100,000
# 負の数を扱うため、+100000 のオフセットを適用
self.offset = 100000
self.MAX_VAL = 200005

# ブロックサイズ(√200005 ≒ 447 なので、余裕を持って 450 とする)
self.block_size = int(math.sqrt(self.MAX_VAL)) + 1

# 各数字の出現回数を記録する配列 (1次元目)
self.counts = [0] * (self.MAX_VAL + 1)

# 各ブロック内の数字の総数を記録する配列 (2次元目)
self.blocks = [0] * (self.MAX_VAL // self.block_size + 1)

self.total_count = 0

def addNum(self, num: int) -> None:
idx = num + self.offset

# 該当する数字のカウントと、その数字が属するブロックのカウントを増やす
self.counts[idx] += 1
self.blocks[idx // self.block_size] += 1
self.total_count += 1

def _find_kth(self, k: int) -> int:
"""通算で k 番目の要素(インデックス)を平方分割のバケットを使って探す"""
block_idx = 0

# 1. まずは「ブロック単位」で大雑把にスキップする
# k番目の要素が、どのブロックに含まれているかを特定する
while block_idx < len(self.blocks) and k > self.blocks[block_idx]:
k -= self.blocks[block_idx]
block_idx += 1

# 2. ターゲットのブロックが見つかったら、その中を「1マスずつ」泥臭く探す
start_idx = block_idx * self.block_size
for idx in range(start_idx, start_idx + self.block_size):
k -= self.counts[idx]
if k <= 0:
return idx

return 0

def findMedian(self) -> float:
if self.total_count % 2 == 1:
# 奇数個の場合
kth_idx = self._find_kth((self.total_count // 2) + 1)
return float(kth_idx - self.offset)
else:
# 偶数個の場合
left_idx = self._find_kth(self.total_count // 2)
right_idx = self._find_kth((self.total_count // 2) + 1)
return (left_idx + right_idx - 2 * self.offset) / 2.0
42 changes: 42 additions & 0 deletions 0295.Find-Median-from-Data-Stream/step3_bit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
class MedianFinder:
def __init__(self):
self.offset = 100001
self.MAX_VAL = 200005
self.bit = [0] * (self.MAX_VAL + 1)
self.total_count = 0

def _add(self, idx: int, val: int) -> None:
while idx <= self.MAX_VAL:
self.bit[idx] += val
idx += idx & -idx

def _find_kth(self, k: int) -> int:
idx = 0
shift = 1 << 17

while shift > 0:
if idx + shift <= self.MAX_VAL and self.bit[idx + shift] < k:
idx += shift
k -= self.bit[idx]
shift >>= 1
return idx + 1

def addNum(self, num: int) -> None:
bit_idx = num + self.offset
self._add(bit_idx, 1)
self.total_count += 1

def findMedian(self) -> float:
if self.total_count % 2 == 1:
kth_idx = self._find_kth((self.total_count // 2) + 1)
return float(kth_idx - self.offset)
else:
left_idx = self._find_kth(self.total_count // 2)
right_idx = self._find_kth((self.total_count // 2) + 1)
return (left_idx + right_idx - 2 * self.offset) / 2.0


# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# obj.addNum(num)
# param_2 = obj.findMedian()
38 changes: 38 additions & 0 deletions 0295.Find-Median-from-Data-Stream/step3_sq_decomposition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import math


class MedianFinder:
def __init__(self):
self.offset = 100000
self.MAX_VAL = 200005
self.block_size = int(math.sqrt(self.MAX_VAL)) + 1
self.counts = [0] * (self.MAX_VAL + 1)
self.blocks = [0] * (self.MAX_VAL // self.block_size + 1)
self.total_count = 0

def addNum(self, num: int) -> None:
idx = num + self.offset
self.counts[idx] += 1
self.blocks[idx // self.block_size] += 1
self.total_count += 1

def _find_kth(self, k: int) -> int:
block_idx = 0
while block_idx < len(self.blocks) and k > self.blocks[block_idx]:
k -= self.blocks[block_idx]
block_idx += 1
start_idx = block_idx * self.block_size
for idx in range(start_idx, start_idx + self.block_size):
k -= self.counts[idx]
if k <= 0:
return idx
return 0

def findMedian(self) -> float:
if self.total_count % 2 == 1:
kth_idx = self._find_kth((self.total_count // 2) + 1)
return float(kth_idx - self.offset)
else:
left_idx = self._find_kth(self.total_count // 2)
right_idx = self._find_kth((self.total_count // 2) + 1)
return (left_idx + right_idx - 2 * self.offset) / 2.0