-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathk-means.sql
More file actions
70 lines (64 loc) · 1.65 KB
/
k-means.sql
File metadata and controls
70 lines (64 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
create table points(pid int primary key, x float, y float);
create table centroids(cid int primary key, x float, y float);
create table clusters(pid int references points(pid), cid int);
create or replace function init(k int)
returns void as $$
declare
ctr int = 0;
p points%rowtype;
begin
truncate centroids;
truncate clusters;
loop
insert into centroids values ((ctr+1),(select random()*(10-1)+1),(select random()*(10-1)+1));
ctr = ctr+1;
if ctr=k then
exit;
end if;
end loop;
truncate clusters;
for p in select * from points loop
insert into clusters values(p.pid,0);
end loop;
end;
$$ language plpgsql;
create or replace function kmeans(k int)
returns table(p int, c int) as $$
declare
p points%rowtype;
c centroids%rowtype;
dist float;
temp float;
nearest int;
ctr integer = 0;
Begin
perform init(k);
loop
for p in select * from points
loop
dist = 1000;
for c in select * from centroids
loop
temp = ((p.x-c.x)^2+(p.y-c.y)^2)^.5;
if temp<dist then
dist = temp;
nearest = c.cid;
end if;
end loop;
update clusters set cid=nearest where pid=p.pid;
end loop;
for c in select * from centroids
loop
if c.cid in (select clust.cid from clusters clust) then
update centroids set x=(select avg(po.x) from clusters clust inner join points po on clust.pid=po.pid where clust.cid=c.cid group by clust.Cid), y=(select avg(po.y) from clusters clust inner join points po on clust.pid=po.pid where clust.cid=c.cid group by clust.Cid) where c.cid=cid;
end if;
end loop;
ctr = ctr+1;
if ctr=200 then
exit;
end if;
end loop;
return query
select * from clusters;
end;
$$language plpgsql;