-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathClassification_Function.py
More file actions
146 lines (111 loc) · 4.28 KB
/
Classification_Function.py
File metadata and controls
146 lines (111 loc) · 4.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# By : Ayman Mutasim
import numpy as np
import rasterio
import matplotlib.pyplot as plt
# --------------------------------------------------------
# PRINT CENTROIDS
# --------------------------------------------------------
def show_cluster_centroids(centroids):
"""
Prints the final K-means centroids.
"""
print("\nFINAL FLOAT CENTROIDS FROM K-MEANS:")
print(np.array(centroids))
print(f"\nNumber of centroids: {len(centroids)}")
# --------------------------------------------------------
# K-MEANS (FLOAT CENTROIDS)
# --------------------------------------------------------
def kmeans_single_band(data_2d, k, max_iters=100, tolerance=1e-4, seed=42):
"""
Performs K-means clustering on a single-band raster array.
"""
x, y = data_2d.shape
reshaped = data_2d.reshape(-1, 1)
np.random.seed(seed)
centroids = reshaped[
np.random.choice(reshaped.shape[0], k, replace=False)
].flatten().astype(float)
for _ in range(max_iters):
distances = np.abs(reshaped - centroids)
closest = np.argmin(distances, axis=1)
new_centroids = np.zeros(k)
for i in range(k):
pts = reshaped[closest == i]
new_centroids[i] = pts.mean() if pts.size > 0 else centroids[i]
if np.all(np.abs(new_centroids - centroids) < tolerance):
break
centroids = new_centroids
ssd = np.sum(np.min(np.abs(reshaped - centroids), axis=1) ** 2)
return closest.reshape(x, y), centroids, ssd
# --------------------------------------------------------
# CURVATURE-BASED ELBOW DETECTION (KNEEDLE)
# --------------------------------------------------------
def find_elbow_kneedle(k_vals, ssd_vals):
"""
Finds optimal K using curvature-based elbow method.
"""
x = (k_vals - k_vals.min()) / (k_vals.max() - k_vals.min())
y = (ssd_vals - ssd_vals.min()) / (ssd_vals.max() - ssd_vals.min())
x1, y1 = x[0], y[0]
x2, y2 = x[-1], y[-1]
# Compute the norms
distances = np.abs(
(y2 - y1) * x
- (x2 - x1) * y
+ x2 * y1
- y2 * x1
) / np.sqrt((y2 - y1) ** 2 + (x2 - x1) ** 2)
# Plot the SSD for different values of k
plt.plot(k_vals, ssd_vals)
plt.title("Elbow method: optimal k selection")
plt.xlabel("Number of clusters (k)")
plt.ylabel("Sum of Squared Distances (SSD)")
plt.show()
return int(k_vals[np.argmax(distances)])
def save_classified_raster(classified_flat, labels, profile, optimal_k, output_filepath):
classified_raster = classified_flat.reshape(labels.shape)
# Explicit dtype
classified_raster = classified_raster.astype(np.float32)
out_profile = profile.copy()
out_profile.update(dtype="float32", count=1)
with rasterio.open(output_filepath, "w", **out_profile) as dst:
dst.write(classified_raster, 1)
plt.imshow(classified_raster)
plt.title("CLassified raster")
plt.show()
print("Classified raster saved.")
# --------------------------------------------------------
# MAIN CLASSIFICATION FUNCTION
# --------------------------------------------------------
def classify_single_band_raster(input_filepath, output_filepath, max_k=10):
"""
Classifies a single-band raster using K-means and elbow method.
Returns:
- labels (2D array)
- centroids (array)
- optimal_k (int)
- raster profile (dict)
"""
with rasterio.open(input_filepath) as src:
if src.count != 1:
raise ValueError("Raster must be single-band.")
data = src.read(1).astype(float)
profile = src.profile
flat = data.flatten()
ssd_vals = []
k_vals = np.arange(1, max_k + 1)
for k in k_vals:
if k == 1:
ssd = np.sum((flat - flat.mean()) ** 2)
else:
_, _, ssd = kmeans_single_band(data.copy(), k)
ssd_vals.append(ssd)
ssd_vals = np.array(ssd_vals)
optimal_k = find_elbow_kneedle(k_vals, ssd_vals)
labels, centroids, _ = kmeans_single_band(data, optimal_k)
classified_flat = np.zeros(labels.size, float)
labels_flat = labels.flatten()
for i in range(len(classified_flat)):
classified_flat[i] = centroids[labels_flat[i]]
save_classified_raster(classified_flat, labels, profile, optimal_k, output_filepath)
return labels, centroids, optimal_k