def kmedoids(X, k,
starting_medoids=None,
max_steps=50):
if starting_medoids is None:
medoids = init_medoids(X, k)
print(medoids)
else:
medoids = starting_medoids
converged = False
labels = np.zeros(len(X))
i = 1
# Delete a file if it exists
if os.path.exists("KMedoids_Results.txt"):
os.remove("KMedoids_Results.txt")
f = open("KMedoids_Results.txt", "a")
start_time = time.time()
f.write('The program starting time ',start_time)
while (not converged) and (i <= max_steps):
old_medoids = np.copy(medoids)
# print('The old medoid value is ',old_medoids)
S = compute_distance_1(X, medoids)
# print(S)
labels = assign_cluster_labels(S)
# print(labels)
medoids = update_cluster_medoids_intercost(X,medoids,S,labels,2)
# print('updated medoids',medoids)
converged = has_converged(old_medoids, medoids)
#print(converged)
print ("iteration", i, "Centers are = ",medoids)
i += 1
f.write('The program completion time',time.time())
starting_medoids=None,
max_steps=50):
if starting_medoids is None:
medoids = init_medoids(X, k)
print(medoids)
else:
medoids = starting_medoids
converged = False
labels = np.zeros(len(X))
i = 1
# Delete a file if it exists
if os.path.exists("KMedoids_Results.txt"):
os.remove("KMedoids_Results.txt")
f = open("KMedoids_Results.txt", "a")
start_time = time.time()
f.write('The program starting time ',start_time)
while (not converged) and (i <= max_steps):
old_medoids = np.copy(medoids)
# print('The old medoid value is ',old_medoids)
S = compute_distance_1(X, medoids)
# print(S)
labels = assign_cluster_labels(S)
# print(labels)
medoids = update_cluster_medoids_intercost(X,medoids,S,labels,2)
# print('updated medoids',medoids)
converged = has_converged(old_medoids, medoids)
#print(converged)
print ("iteration", i, "Centers are = ",medoids)
i += 1
f.write('The program completion time',time.time())
return labels,medoids
def update_cluster_medoids_intercost(X, medoids,S,labels,p):
r,c=X.shape
out_medoids = np.copy(medoids)
total_cost=np.sum(np.min(S,axis=1))
for i in set(labels):
cluster_points = np.unique(X[labels == i],axis=0)
for datap in cluster_points:
old_medoid=medoids[i]
# compute the new cost with swapping the medoid with new point
medoids[i]=datap
# Compute the cost with the new medoid whether it improves the distance or not
new_cost= np.sum(np.min(compute_distance_1(X,medoids),axis=1))
#print('The new cost is ',new_cost)
#print('The new medoid is ',datap)
if new_cost < total_cost :
#accept the data point swap for medoid
total_cost = new_cost
out_medoids[i] = datap
#print('The medoids are',medoids)
else:
# reverse the datapoint swap
medoids[i]=old_medoid
#print('Inside else part medoids',medoids)
return out_medoids
r,c=X.shape
out_medoids = np.copy(medoids)
total_cost=np.sum(np.min(S,axis=1))
for i in set(labels):
cluster_points = np.unique(X[labels == i],axis=0)
for datap in cluster_points:
old_medoid=medoids[i]
# compute the new cost with swapping the medoid with new point
medoids[i]=datap
# Compute the cost with the new medoid whether it improves the distance or not
new_cost= np.sum(np.min(compute_distance_1(X,medoids),axis=1))
#print('The new cost is ',new_cost)
#print('The new medoid is ',datap)
if new_cost < total_cost :
#accept the data point swap for medoid
total_cost = new_cost
out_medoids[i] = datap
#print('The medoids are',medoids)
else:
# reverse the datapoint swap
medoids[i]=old_medoid
#print('Inside else part medoids',medoids)
return out_medoids
def update_cluster_medoids_intracost(X, medoids,labels,k,p):
r, c = X.shape
medoids=np.zeros((k,c))
# print('the k value is ',k)
for i in range(0,k):
medoid_idx=labels==i
cluster=X[medoid_idx]
if(cluster.size)==0:
break
total_cost=np.unique(cluster,axis=0)
total_cost=sc.pdist(total_cost,'euclidean')
total_cost=sc.squareform(total_cost)
total_cost=total_cost.sum(axis=1)
minid=np.argmin(total_cost,axis=0)
medoids[i]=cluster[minid]
return medoids
r, c = X.shape
medoids=np.zeros((k,c))
# print('the k value is ',k)
for i in range(0,k):
medoid_idx=labels==i
cluster=X[medoid_idx]
if(cluster.size)==0:
break
total_cost=np.unique(cluster,axis=0)
total_cost=sc.pdist(total_cost,'euclidean')
total_cost=sc.squareform(total_cost)
total_cost=total_cost.sum(axis=1)
minid=np.argmin(total_cost,axis=0)
medoids[i]=cluster[minid]
return medoids
testing_points=np.array([[7,6],
[2,6],
[3,8],
[8,5],
[7,4],
[4,7],
[6,2],
[7,3],
[6,4],
[3,4]
])
starting_medoids=np.array([[4,7],[8,5]])
[2,6],
[3,8],
[8,5],
[7,4],
[4,7],
[6,2],
[7,3],
[6,4],
[3,4]
])
starting_medoids=np.array([[4,7],[8,5]])
Comments
Post a Comment