Support_Vector_Machine/linear_svm.py at main · sushantnair/Support_Vector_Machine · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# -*- coding: utf-8 -*-
"""EXPT8_SVM.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1N1k1wfEAKeUjpj2Ri2hnoa3hkGotEIv5
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


data_points = pd.read_csv('D:/Example1_Finding_Hyperplane_Linear_SVM.csv')
data_points

data_class_values_dict = dict()

data_classes = data_points.columns.tolist()
data_classes

for data_class in data_classes:
    print(data_points[f'{data_class}'])
    for data_point in data_points[f'{data_class}']:
        print(f'Original Datapoint: {data_point}')
        horz = np.array([float(x) for x in data_point.strip('()').split(',')])
        print(f'Horizontal NumPy Array:\n{horz}')
        # vert = horz.reshape(-1,1)
        # print(f'Vertical NumPy Array:\n{vert}')
        if data_class not in data_class_values_dict:
            data_class_values_dict[f'{data_class}'] = [horz]
        else:
            data_class_values_dict[f'{data_class}'].append(horz)

print(f'{data_class_values_dict}')

# Plot the data points
for data_class, data_points in data_class_values_dict.items():
    for data_point in data_points:
        plt.plot(data_point[0], data_point[1], 'o', label=data_class)
plt.show()

# Find the Euclidean Distance between two points
def euclidean(point1, point2):
    return(np.sqrt(np.sum((point1 - point2)**2)))

# frontier_points: A dictionary exactly of the same structure as data_class_values_dict, except that the values for each class include
#                  only the frontier points
# min_distances  : A list which stores the minimum distance between a given point and all other points for all points.

def find_frontier_points(data_class_values_dict):
    frontier_points = dict()
    for data_class in data_class_values_dict:
        min_distances = list()
        for data_point in data_class_values_dict[data_class]:
            # Calculate distance between current point and all points of other class(es)
            distances = [euclidean(data_point, other_data_point) for other_data_class in data_class_values_dict if other_data_class != data_class for other_data_point in data_class_values_dict[other_data_class]]
            # Get minimum distance between current point and all points of other class(es)
            min_dist = min(distances)
            min_distances.append(min_dist)
        # Find the indices of the leading points from the data_class_values_dict dictionary with minimum distance
        leading_points_indices = [i for i, dist in enumerate(min_distances) if dist == min(min_distances)]
        # Extract the leading points based on their indices
        leading_points = [data_class_values_dict[data_class][i] for i in leading_points_indices]
        frontier_points[data_class] = leading_points
    return frontier_points

frontier_points = find_frontier_points(data_class_values_dict)
print(frontier_points)

for data_class, points in data_class_values_dict.items():
    for point in points:
        plt.plot(point[0], point[1], 'o', label=data_class)
    for point in frontier_points[data_class]:
        plt.plot(point[0], point[1], '+', label=f'{data_class} Frontier', markersize=20)
    plt.title('Frontier Points are marked with \'+\' symbol.')
plt.show()

class_number_dict = dict()
choice = input('Do you want default values for example or automatic values? Press Y else N: ')
if choice == 'Y':
    class_number_dict = {'Positive': 1, 'Negative': -1}
else:
    for i, data_class in enumerate(data_classes):
        class_number_dict[data_class] = i+1

print(class_number_dict)

# Convert Horizontal NumPy to Vertical NumPy
# Next, augment each vector with 1 as bias input
# temp_frontier_points = dict()
temp_frontier_points = list()
for data_class, frontier_points in frontier_points.items():
    print(data_class, frontier_points)
    for frontier_point in frontier_points:
        horz_frontier_point = frontier_point
        print(f'Horizontal Frontier Point:\n{horz_frontier_point}')
        vert_frontier_point = horz_frontier_point.reshape(-1,1)
        print(f'Vertical Frontier Point:\n{vert_frontier_point}')
        augm_frontier_point = np.vstack((vert_frontier_point, [[1.]]))
        print(f'Augmented Frontier Point:\n{augm_frontier_point}')
        '''
        if data_class not in temp_frontier_points:
            temp_frontier_points[data_class] = [augm_frontier_point]
        else:
            temp_frontier_points[data_class].append(augm_frontier_point)
        '''
        # A shift in approach: instad of frontier_points = {'Positive': [array([3., 1., 1.]), array([3., -1., 1.])], 'Negative': [array([1., 0., 1.])]}
        # we have frontier_points = {'array([3., 1., 1.])': 'Positive', 'array([3., -1., 1.])': 'Positive', 'array([1., 0., 1.])' = 'Negative'}
        # so that we get the form of s1_bar, s2_bar, s3_bar for convenience in future operations
        '''
        temp_frontier_points[augm_frontier_point] = data_class
        '''
        # Unfortunately, NumPy arrays cannot become key of a dictionary, unless it is converted to bit notation.
        # Why not try list of tuples? frontier_points = [(array([3., 1., 1.]), 'Positive'), (array([3., -1., 1.]), 'Positive'), (array([1., 0., 1.]), 'Negative')]
        temp_frontier_points.append((augm_frontier_point, data_class))

frontier_points = temp_frontier_points
print(frontier_points)

equation_coefficients = list()
rhs_coefficients = list()
# Refer to Reference 1 in References Section for explanation

for i, frontier_point_tuple_i in enumerate(frontier_points):
    print(f's{i+1}_bar:\n{frontier_point_tuple_i[0]}\n{frontier_point_tuple_i[1]}\n{class_number_dict[frontier_point_tuple_i[1]]}\n\n')
    sub_equation_coefficients = list()
    for j, frontier_point_tuple_j in enumerate(frontier_points):
        print(f's{j+1}_bar . s{i+1}_bar:\n{frontier_point_tuple_j[0]} . {frontier_point_tuple_i[0]}')
        equation_coefficient = np.dot(frontier_point_tuple_j[0].T, frontier_point_tuple_i[0])
        print(equation_coefficient[0][0])
        sub_equation_coefficients.append(equation_coefficient[0][0])
    print(sub_equation_coefficients)
    rhs_coefficients.append(class_number_dict[frontier_point_tuple_i[1]])
    equation_coefficients.append(sub_equation_coefficients)
print(f'Equation Coefficients: {equation_coefficients}')
print(f'RHS Coefficients: {rhs_coefficients}')

# Solve System of Equations to get value of α
α = np.linalg.solve(equation_coefficients, rhs_coefficients)
print(f'α: {α}')

# Get the value of 'w' and 'b'
w = 0
for i, frontier_point_tuple in enumerate(frontier_points):
    w += α[i] * frontier_point_tuple[0]

for i, item in enumerate(w):
    # print(round(item[0], 2))
    w[i] = round(item[0], 2)

b = w[-1].item()
print(f'b: {b}')

w = w[:-1]
print(f'w: {w}')

# Time for classification!
point = [4, 1]
horz_point = np.array(point)
vert_point = horz_point.reshape(-1,1)
print(vert_point)
dot = np.dot(w.T, vert_point)
print(dot[0][0])
res = dot + b
print(res[0][0])
if res > 0:
    print(f'Positive Class')
else:
    print(f'Negative Class')