Package qubx :: Package fast :: Module kmeans
[hide private]
[frames] | no frames]

Source Code for Module qubx.fast.kmeans

  1  """Compiled routines for sampled data. 
  2   
  3  Copyright 2008-2011 Research Foundation State University of New York  
  4  This file is part of QUB Express.                                           
  5   
  6  QUB Express is free software; you can redistribute it and/or modify           
  7  it under the terms of the GNU General Public License as published by  
  8  the Free Software Foundation, either version 3 of the License, or     
  9  (at your option) any later version.                                   
 10   
 11  QUB Express is distributed in the hope that it will be useful,                
 12  but WITHOUT ANY WARRANTY; without even the implied warranty of        
 13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         
 14  GNU General Public License for more details.                          
 15   
 16  You should have received a copy of the GNU General Public License,    
 17  named LICENSE.txt, in the QUB Express program directory.  If not, see         
 18  <http://www.gnu.org/licenses/>.                                       
 19   
 20  """ 
 21  from qubx.fast.fast_utils import * 
 22  from qubx.util_types import * 
 23  from math import * 
 24   
 25  qubfast.qub_kmeans_create.argtypes = (c_int, c_float_p) 
 26  qubfast.qub_kmeans_create.restype = c_void_p 
 27  qubfast.qub_kmeans_free.argtypes = (c_void_p,) 
 28  qubfast.qub_kmeans_free.restype = None 
 29  qubfast.qub_kmeans_add_series.argtypes = (c_void_p, c_float_p) 
 30  qubfast.qub_kmeans_add_series.restype = None 
 31  qubfast.qub_kmeans_cluster.argtypes = (c_void_p, c_int_p) 
 32  qubfast.qub_kmeans_cluster.restype = c_double 
 33  qubfast.qub_kmeans_cluster_trials.argtypes = (c_void_p, c_int_p, c_int, c_int) 
 34  qubfast.qub_kmeans_cluster_trials.restype = c_double 
 35   
36 -class Clusterer(object):
37 - def __init__(self, series, weights):
38 self.Nseries = len(series) 39 self.Ndata = series and len(series[0]) or 0 40 ww = numpy.array(weights, dtype='float32') 41 self.obj = qubfast.qub_kmeans_create(self.Ndata, cdata(ww, c_float_p)) 42 self.series = series 43 self.weights = weights 44 for ser in series: 45 qubfast.qub_kmeans_add_series(self.obj, cdata(ser, c_float_p))
46 - def __del__(self):
47 qubfast.qub_kmeans_free(self.obj)
48 - def cluster(self, colors):
49 cc = numpy.array(colors, dtype='int32') 50 ssd = qubfast.qub_kmeans_cluster(self.obj, cdata(cc, c_int_p)) 51 colors[:] = cc[:] 52 return ssd
53 - def cluster_trials(self, colors, k, trials_per_k):
54 cc = numpy.array(colors, dtype='int32') 55 ssd = qubfast.qub_kmeans_cluster_trials(self.obj, cdata(cc, c_int_p), k, trials_per_k) 56 colors[:] = cc[:] 57 return ssd
58 - def ll_aicc(self, colors, k):
59 if not len(self.series): return 0.0, 0.0 60 N = len(self.series[0]) 61 if not N: 62 return 0.0, 0.0 63 means = [] 64 for j, ser in enumerate(self.series): 65 means_j = [] 66 means.append(means_j) 67 for c in xrange(1,k+1): 68 if c in colors: 69 tot = 0.0 70 tot_wt = 0.0 71 for i, ci in enumerate(colors): 72 if ci == c: 73 tot += ser[i] 74 tot_wt += self.weights[i] 75 means_j.append( tot / tot_wt ) 76 else: 77 means_j.append( 0.0 ) 78 stds = [] 79 for j, ser in enumerate(self.series): 80 stds_j = [] 81 stds.append(stds_j) 82 for c in xrange(1,k+1): 83 tot = 0.0 84 tot_wt = 0.0 85 for i, ci in enumerate(colors): 86 if ci == c: 87 dev = ser[i] - means[j][c-1] 88 tot += self.weights[i] * dev * dev 89 tot_wt += self.weights[i] 90 if tot_wt: 91 tot /= tot_wt 92 if tot: 93 stds_j.append( sqrt(tot) ) 94 elif means[j][c-1]: 95 stds_j.append( means[j][c-1] ) 96 else: 97 stds_j.append( 1.0 ) 98 LL = 0.0 99 for j, ser in enumerate(self.series): 100 LL -= N*0.5*log(2*pi) 101 tot = 0.0 102 for i,c in enumerate(colors): 103 dev = ser[i] - means[j][c-1] 104 tot += self.weights[i] * (dev*dev) / (2*stds[j][c-1]*stds[j][c-1]) 105 std = stds[j][c-1] 106 if std > 0.0: 107 tot += log(stds[j][c-1]) 108 LL -= tot 109 nf = (k+1)*len(self.series) 110 aic = 2*nf - 2*LL 111 if (N - nf - 1): 112 return LL, (2*nf*(nf+1)*(1.0/(N-nf-1)) + aic) 113 else: 114 return LL, aic
115