Skip to content

Commit f3250e1

Browse files
authored
Add files via upload
1 parent da983b6 commit f3250e1

18 files changed

+232
-0
lines changed

Diff for: kMeans/kMeansClustering.py

+232
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
import numpy as np
2+
import cv2
3+
import os,sys,glob,pdb
4+
from matplotlib import pyplot as plt
5+
6+
disp = True
7+
8+
###
9+
#numpy.random.normal(loc=0.0, scale=1.0, size=None)
10+
###
11+
############################# DATA GENERATION #############################
12+
def gen_data(data_points,loc):
13+
#print('data_points: ', data_points)
14+
#print('loc: ', loc)
15+
x_center = loc[0]
16+
y_center = loc[1]
17+
x = np.random.normal(size= data_points, loc= x_center)*100
18+
y = np.random.normal(size= data_points, loc= y_center)*100
19+
data= np.vstack((x,y))
20+
#print(x.shape)
21+
#print(y.shape)
22+
#print(data.shape)
23+
#k=0
24+
#print(x[k])
25+
#print(y[k])
26+
#print(data)
27+
#print(x.shape,y.shape)
28+
#exit()
29+
data = data.astype(np.int32)
30+
x,y = data
31+
#print('x.shape: ', x.shape)
32+
#print('y.shape', y.shape)
33+
#print(x)
34+
#pdb.set_trace()
35+
return x, y
36+
37+
results_dir= 'results'
38+
if not os.path.exists(results_dir):
39+
os.mkdir(results_dir)
40+
41+
#...................................
42+
x_data = np.array((),dtype=np.int32)
43+
y_data = np.array((),dtype=np.int32)
44+
#...................................
45+
x, y = gen_data(100,[5,5]) #plt.hist(x) shows that is a normal distribution
46+
plt.scatter(x, y)
47+
x_data = np.hstack((x_data, x))
48+
y_data = np.hstack((y_data, y))
49+
#...................................
50+
x, y = gen_data(50,[-2,4])
51+
plt.scatter(x, y)
52+
x_data = np.hstack((x_data, x))
53+
y_data = np.hstack((y_data, y))
54+
#...................................
55+
x, y = gen_data(300,[2,3])
56+
plt.scatter(x, y)
57+
x_data = np.hstack((x_data, x))
58+
y_data = np.hstack((y_data, y))
59+
#...................................
60+
#plt.scatter(x_data,y_data)
61+
plt.title('k-Means Clustering')
62+
plt.xlabel('Some Feature x')
63+
plt.ylabel('Some Feature y')
64+
plt.grid()
65+
plt.savefig(results_dir + '/generated_data.jpg')
66+
plt.show()
67+
print('>>>>>>>>>>>>>>>>>>>>>>>>>> DATA POINTS GENERATED')
68+
69+
############################# k-Means #############################
70+
71+
#find out the extremes of our data space
72+
73+
x_min= np.min(x_data)
74+
x_max= np.max(x_data)
75+
y_min= np.min(y_data)
76+
y_max= np.max(y_data)
77+
print('x_min, x_max, y_min, y_max: ', x_min, x_max, y_min, y_max)
78+
79+
#set the value of 'k'
80+
color_list = ['Blue', 'Green', 'Yellow', 'Pink', 'Purple']
81+
k= 3 #you can change the value of k till 5 because the color_list that i have defined has only 5 colors. you can add more colors to the color_list and then change k to higher values.
82+
83+
#randomly initialize k number of center points
84+
kcenters = []
85+
86+
for i in range(k):
87+
88+
x_center= np.random.randint(x_min, x_max + 1)
89+
y_center= np.random.randint(y_min, y_max + 1)
90+
center = [x_center, y_center]
91+
kcenters.append(center)
92+
93+
print('k center point {} initialized at ({}, {})'.format(i+1, x_center, y_center))
94+
95+
print('kcenters: ', kcenters)
96+
97+
def display_kcenters(kcenters):
98+
for i in range(len(kcenters)):
99+
plt.scatter(kcenters[i][0], kcenters[i][1], color='Red')
100+
#plt.show()
101+
102+
initial_data_point_color = 'Gray'
103+
display_kcenters(kcenters)
104+
plt.scatter(x_data, y_data, color= initial_data_point_color)
105+
plt.title('before kMeans Clustering')
106+
plt.xlabel('Some Feature x')
107+
plt.ylabel('Some Feature y')
108+
plt.grid()
109+
plt.savefig(results_dir + '/before_kmeans.jpg')
110+
plt.show()
111+
112+
color_info_list = [initial_data_point_color]*len(x_data)
113+
114+
#print(color_info_list)
115+
#print(len(color_info_list))
116+
117+
print('>>>>>>>>>>>>>>>>>>>>>>>>>> KCENTERS INITIALIZED')
118+
119+
def assign_cluster(x, y, kcenters):
120+
distances = []
121+
for i in range(len(kcenters)):
122+
#print(i)
123+
dist = np.sqrt(np.square(x - kcenters[i][0]) + np.square(y - kcenters[i][1]))
124+
#print(dist)
125+
distances.append(dist)
126+
distances = np.array(distances)
127+
#print(distances)
128+
#print(type(distances))
129+
130+
return np.argmin(distances)
131+
132+
def get_mean_shit_value(old_point, new_point):
133+
#pdb.set_trace()
134+
mean_shift_value = np.sqrt( np.square(old_point[0] - new_point[0]) + np.square(old_point[1] - new_point[1]) )
135+
return mean_shift_value
136+
137+
138+
iterations = 100
139+
stop=[False]*k
140+
141+
for n in range(iterations):
142+
#iterate over the data points and assign them to one of the k clusters
143+
print('iteration number {}'.format(n))
144+
145+
for i, (x, y) in enumerate(zip(x_data, y_data)):
146+
147+
#print(type(x)) #<type 'numpy.int32'>
148+
cluster_index= assign_cluster(x, y, kcenters)
149+
150+
color_info_list[i] = color_list[cluster_index]
151+
#print('point {} goes {}'.format(i+1, color_list[cluster_index]))
152+
153+
154+
plt.scatter(x_data, y_data, color= color_info_list)
155+
display_kcenters(kcenters)
156+
plt.title('k-Means iter {}'.format(n))
157+
plt.xlabel('Some Feature x')
158+
plt.ylabel('Some Feature y')
159+
plt.grid()
160+
plt.savefig(results_dir + '/iter{}.jpg'.format(n))
161+
plt.show()
162+
163+
#calculate the new position of the centers
164+
165+
for i in range(len(kcenters)):
166+
167+
x_coords_list=[]
168+
y_coords_list= []
169+
for j in range(len(x_data)):
170+
if color_info_list[j] == color_list[i]:
171+
x_coords_list.append(x_data[j])
172+
y_coords_list.append(y_data[j])
173+
174+
x_mean = np.mean(x_coords_list).astype(np.int32)
175+
y_mean = np.mean(y_coords_list).astype(np.int32)
176+
#print(type(x_mean))
177+
#print('kcenters[{}] shifted from {} to {}'.format(i, kcenters[i],[x_mean,y_mean]))
178+
179+
mean_shift_value = get_mean_shit_value(kcenters[i], [x_mean, y_mean])
180+
if disp:
181+
print('mean_shift_value: ', mean_shift_value)
182+
183+
if mean_shift_value == 0:
184+
stop[i] = True
185+
#print(stop)
186+
#update the center
187+
kcenters[i] = [x_mean, y_mean]
188+
189+
if disp:
190+
print('...................................')
191+
192+
193+
#check if all the cluster centers have converged
194+
if sum(stop) == k:
195+
print('all the {} cluster centers have converged on the {}th iteration'.format(k, n))
196+
exit()
197+
198+
199+
200+
'''
201+
data = np.array([
202+
[1, 2],
203+
[2, 3],
204+
[3, 6],
205+
])
206+
x, y = data.T
207+
plt.scatter(x,y)
208+
'''
209+
210+
'''
211+
def save_plt(m,c,epoch_id):
212+
213+
214+
# Plot
215+
plt.scatter(x_list, y_list, color = 'cyan')#, s=area, c=colors, alpha=0.5)
216+
plt.title('fake data')
217+
plt.xlabel('x')
218+
plt.ylabel('y')
219+
plt.grid()
220+
#plt.show()
221+
222+
x_temp = x_max - x_min
223+
line_coord1 = [x_min , x_max ]
224+
line_coord2 = [x_min*m_fake + c_fake, m_fake*x_max + c_fake]
225+
plt.plot(line_coord1, line_coord2 , color = 'green')#, 'k-')
226+
227+
line_coord1 = [x_min , x_max]
228+
line_coord2 = [x_min*m + c , m*x_max + c]
229+
plt.plot(line_coord1, line_coord2 , color = 'pink')#, 'k-')
230+
plt.savefig('result/res{}.png'.format(epoch_id))
231+
#plt.show()
232+
'''

Diff for: kMeans/results/before_kmeans.jpg

72.1 KB
Loading

Diff for: kMeans/results/generated_data.jpg

86.5 KB
Loading

Diff for: kMeans/results/iter0.jpg

91.4 KB
Loading

Diff for: kMeans/results/iter1.jpg

90.4 KB
Loading

Diff for: kMeans/results/iter10.jpg

83.8 KB
Loading

Diff for: kMeans/results/iter11.jpg

83.6 KB
Loading

Diff for: kMeans/results/iter12.jpg

83.4 KB
Loading

Diff for: kMeans/results/iter13.jpg

83.4 KB
Loading

Diff for: kMeans/results/iter14.jpg

83.4 KB
Loading

Diff for: kMeans/results/iter2.jpg

89.3 KB
Loading

Diff for: kMeans/results/iter3.jpg

88.7 KB
Loading

Diff for: kMeans/results/iter4.jpg

87.4 KB
Loading

Diff for: kMeans/results/iter5.jpg

86.5 KB
Loading

Diff for: kMeans/results/iter6.jpg

85.5 KB
Loading

Diff for: kMeans/results/iter7.jpg

85 KB
Loading

Diff for: kMeans/results/iter8.jpg

84.4 KB
Loading

Diff for: kMeans/results/iter9.jpg

83.9 KB
Loading

0 commit comments

Comments
 (0)