Python 安排我的集群显示
我在三维数组的数据上运行k-means聚类算法,看起来是这样的(从1910年到2000年,每年都有51x200维的数组,这51维是我们的状态),这是1916年的例子,只针对前三行,即3x200) 我每年都会对50X200的阵列单独执行算法 我的问题是 问题1:一年一个地做是一件痛苦的事情,我是否可以在所有的年份里做一次算法,并将其绘制成3D图? 我想成为第三维度是年份,并为所有数据的集群绘制一个3D图 问题2:每年读取一次所有数据,然后每年单独获取一次结果,因为目前我每年都要拆分数据文本文件,这是一件痛苦的事情 我的代码:Python 安排我的集群显示,python,plot,3d,cluster-analysis,k-means,Python,Plot,3d,Cluster Analysis,K Means,我在三维数组的数据上运行k-means聚类算法,看起来是这样的(从1910年到2000年,每年都有51x200维的数组,这51维是我们的状态),这是1916年的例子,只针对前三行,即3x200) 我每年都会对50X200的阵列单独执行算法 我的问题是 问题1:一年一个地做是一件痛苦的事情,我是否可以在所有的年份里做一次算法,并将其绘制成3D图? 我想成为第三维度是年份,并为所有数据的集群绘制一个3D图 问题2:每年读取一次所有数据,然后每年单独获取一次结果,因为目前我每年都要拆分数据文本文件,这
from __future__ import division
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
import numpy
from numpy.random import *
import numpy as np
import random
from functools import partial
from sklearn.metrics import pairwise_distances_argmin_min
from scipy.spatial.distance import pdist,squareform
import pandas as pd
from functools import partial
from matplotlib import pyplot
def read_from_file(filename):
with open(filename) as f:
data = []
for line in f:
data.append([float(x) for x in line.split()[1:]])
return data #returned the data we read.
def main():
data = read_from_file("Data_Fcopy.txt") # This data is in scope for the initialize function now. :)
km = KMeans(n_clusters=9,init= 'random').fit(data)
print km
centers = km.cluster_centers_
labels = km.labels_
n = km.fit_transform(data)
#print n
print labels
numpy.set_printoptions(threshold=numpy.nan)
#print centers
numpy.set_printoptions(threshold=numpy.nan)
paired_data = []
for x in data:
closest, ignored = pairwise_distances_argmin_min(x, centers)
paired_data.append(closest)
new_list = [x+1 for x in paired_data]
#print paired_data
S = pd.DataFrame(new_list, columns=['x' 'center'])
print (S.to_string())
if __name__ == "__main__":
main() # This is the general (awkward) pattern for main functions in python.
from __future__ import division
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
import numpy
from numpy.random import *
import numpy as np
import random
from functools import partial
from sklearn.metrics import pairwise_distances_argmin_min
from scipy.spatial.distance import pdist,squareform
import pandas as pd
from functools import partial
from matplotlib import pyplot
def read_from_file(filename):
with open(filename) as f:
data = []
for line in f:
data.append([float(x) for x in line.split()[1:]])
return data #returned the data we read.
def main():
data = read_from_file("Data_Fcopy.txt") # This data is in scope for the initialize function now. :)
km = KMeans(n_clusters=9,init= 'random').fit(data)
print km
centers = km.cluster_centers_
labels = km.labels_
n = km.fit_transform(data)
#print n
print labels
numpy.set_printoptions(threshold=numpy.nan)
#print centers
numpy.set_printoptions(threshold=numpy.nan)
paired_data = []
for x in data:
closest, ignored = pairwise_distances_argmin_min(x, centers)
paired_data.append(closest)
new_list = [x+1 for x in paired_data]
#print paired_data
S = pd.DataFrame(new_list, columns=['x' 'center'])
print (S.to_string())
if __name__ == "__main__":
main() # This is the general (awkward) pattern for main functions in python.