1 """
2 Curve Fitting.
3
4 Copyright 2007-2011 Research Foundation State University of New York
5 This file is part of QUB Express.
6
7 QUB Express is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 QUB Express is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License,
18 named LICENSE.txt, in the QUB Express program directory. If not, see
19 <http://www.gnu.org/licenses/>.
20
21 """
22
23 from itertools import izip, count, repeat
24
25 try:
26 import multiprocessing
27 cpu_count = multiprocessing.cpu_count()
28 except:
29 cpu_count = 1
30
31 import random
32 import re
33 import numpy
34 import scipy
35 import scipy.special
36 import scipy.optimize
37 import scipy.linalg
38 import scipy.integrate
39 import sys
40 import linecache
41 import traceback
42 import qubx.fast.fit
43 erfc = scipy.special.erfc
44 from math import ceil, pi, sqrt, log, exp, log10
45 from numpy import sin, mean
46 from qubx.accept import acceptF, acceptODEs, acceptODEorF
47 from qubx.maths import FracPos
48 from qubx.util_types import UNSET_VALUE, memoize
49 from scipy.optimize import *
50
51 USE_POOL_THRESHOLD = 1024
52
53 Curves = {}
54 Fitters = {}
55
57 """Defines the interface of a curve object; curves need not subclass, but they must behave like it.
58 Including optional first constructor argument "expr"
59
60 @ivar name: index (string) in Curves
61 @ivar expr: str visible to user, hopefully python-evaluable
62 @ivar params: list of parameter names
63 @ivar lo: list of parameter minimum values, or UNSET_VALUE
64 @ivar hi: list of parameter maximum values, or UNSET_VALUE
65 @ivar can_fit: list of bool: True to include this param in fitting
66 @ivar param_defaults: list of parameter default values, or empty list
67 @ivar pool: multiprocessing.pool or None, possibly assigned by controlling thread
68 @ivar can_resample: True unless resampling for display is a bad idea
69 """
70 - def __init__(self, expr="0.0", locals={}):
71 """
72 @param expr: optional string to further specify curve
73 @param locals: optional dict of names available in expr
74 """
75 self.name = 'BaseCurve'
76 self.expr = expr
77 self.params = []
78 self.lo = []
79 self.hi = []
80 self.can_fit = []
81 self.param_defaults = []
82 self.pool = None
83 self.can_resample = True
84 self.last_fit = numpy.array([])
86 """Prepares the curve to eval() with extra named data series.
87 @param var_names: list of str; named vars available for eqn, corresponding to vvv
88 """
89 pass
90 - def eval(self, param_vals, xx, vvv=[]):
91 """Returns yy = f[param_vals](xx, *vvv). For vvv, you should previously have set_vars(v_names).
92 @param param_vals: list of float, corresponding to self.params
93 @param xx: numpy.array of x values
94 @param vvv: variable series corresponding to set_vars; list of numpy.array
95 @return: numpy.array of y values
96 """
97 return numpy.zeros(shape=xx.shape)
98
100 """Default fitter iteration callback; does nothing."""
101 return 1
103 """Default fitter status callback; prints to stdout."""
104 print msg
105
107 """Defines the interface of a fitter object; fitters need not subclass, but they must behave like it.
108 @ivar max_iter: maximum iterations
109 @ivar toler: how close to get before stopping
110 @ivar name: index (string) in Fitters
111 """
112 - def __init__(self, max_iter=200, toler=1e-10):
117 """Returns improved param_vals, which minimize the sum-squared-residual between yy and curve.eval(param_vals, xx, vvv).
118 @param curve: L{BaseCurve}
119 @param param_vals: list of float corresponding to curve.params
120 @param xx: numpy.array(dtype='float32') of x coords
121 @param yy: numpy.array(dtype='float32') of y coords
122 @param vvv: list of numpy.array(dtype='float32') of additional named data series, available to curve expr
123 @param ww: numpy.array(dtype='float32') of weighting factors, per sample, or None
124 @param on_iter: each improvement in param_vals, calls on_iter(param_vals, iter) -> True to continue iterating
125 @param on_status: text output callback: on_status(msg)
126 @return: (fitted param_vals, sum_sqr_residual, iterations, fit curve array)
127 """
128 return param_vals, 0.0, 0, yy
129 - def stop(self, cancel_exception):
130 """By default fitting is stopped by KeyboardInterrupt; this method can do additional stuff, and can call
131 cancel_exception() to inhibit the default behavior."""
132 pass
133
135 """Fittable curve from a parsed Python expression; either simple algebraic, or system of ODEs; see L{acceptODEs}."""
136 - def __init__(self, expr="a * x**k + b", locals=None, allow_params=True, allow_ode=True):
137 """
138 @param expr: either the right-hand-side of "y=whatever*stuff(x)", or a system of ODEs; see L{acceptODEs}
139 @param allow_params: whether to look for undefined names and add them to self.params
140 @param allow_ode: whether to allow system of ODEs
141 """
142 BaseCurve.__init__(self)
143 self.name = 'Custom'
144 self.expr = expr
145 self.allow_params = allow_params
146 self.allow_ode = allow_ode
147 self.locals = locals or {}
148 self.avail = []
149 self.func = self.ode = None
150 self.setup_f()
152 """Tells the name and position of extra data series; if in expr these names will refer to vvv instead of params.
153 @param var_names: list of str, one per vvv you will be passing to eval()
154 """
155 self.avail = var_names[:]
156 self.setup_f()
158 """Parses expr; called automatically when needed."""
159 accept = self.allow_ode and acceptODEorF or acceptF
160 obj = accept(static=["x"], avail=self.avail, custom=self.allow_params, locals=self.locals)(self.expr)
161 params, lo, hi, can_fit = self.params, self.lo, self.hi, self.can_fit
162 param_by_name = dict([(nm, (l,h,c)) for nm,l,h,c in izip(params, lo, hi, can_fit)])
163 self.params = []
164 self.lo = []
165 self.hi = []
166 self.can_fit = []
167 def add_param(nm):
168 self.params.append(nm)
169 if nm in param_by_name:
170 l,h,c = param_by_name[nm]
171 else:
172 l,h,c = UNSET_VALUE, UNSET_VALUE, True
173 self.lo.append(l)
174 self.hi.append(h)
175 self.can_fit.append(c)
176 if obj.f:
177 self.func = obj
178 self.ode = None
179 self.can_resample = True
180 else:
181 self.func = None
182 self.ode = obj
183 self.can_resample = False
184 for nm in obj.ynames:
185 add_param('%s0' % nm)
186 for nm in obj.args:
187 if (nm != 'x') and not (nm in self.avail):
188 add_param(nm)
189 - def eval(self, param_vals, xx, vvv=[]):
190 """Returns the numpy.array(dtype='float32', shape=xx.shape) calculated for xx given param_vals and extra named series.
191 @param param_vals: list of float corresponding to L{BaseCurve.params}
192 @param xx: numpy.array(dtype='float32') of x coords
193 @param vvv: list of numpy.array(dtype='float32', shape=xx.shape); extra data series, must have already declared their names using set_vars
194 """
195 N = len(xx)
196 pval_by_name = dict([(nm,val) for nm, val in izip(self.params, param_vals)])
197 if self.func:
198 success = False
199 if self.pool and (N > USE_POOL_THRESHOLD) and (cpu_count > 1):
200 try:
201 self.last_fit = ff = self.pool_eval(xx, vvv, self.func, pval_by_name, self.avail)
202 success = True
203 except:
204 traceback.print_exc()
205 if not success:
206 self.last_fit = ff = sub_eval(xx, vvv, self.func, pval_by_name, self.avail)
207 return ff
208 elif self.ode:
209 Na = len(self.ode.args)
210 Ny = len(self.ode.ynames)
211 args = [None] * (Na + Ny)
212 for i in xrange(1,Na):
213 if self.ode.args[i] in pval_by_name:
214 args[i] = pval_by_name[ self.ode.args[i] ]
215
216
217
218
219 frac_pos = FracPos(xx)
220 def subsampler(dd):
221 def subsample():
222 ix = frac_pos.ix
223 fi = frac_pos.frac_ix
224 if (ix < (frac_pos.n-1)) and fi:
225 return dd[ix] + fi*(dd[ix+1]-dd[ix])
226 else:
227 return dd[ix]
228 return subsample
229 sers = [None] * Na
230 for i in xrange(1, Na):
231 if args[i] is None:
232 sers[i] = subsampler( vvv[self.avail.index( self.ode.args[i] )] )
233 def dyy(yy, x):
234 args[0] = x
235 if sers:
236 frac_pos.find(x)
237 for i in xrange(Na):
238 if sers[i]:
239 args[i] = sers[i]()
240 args[-Ny:] = yy
241 return numpy.array(self.ode.dy(*args))
242 y0 = numpy.array(param_vals[:Ny])
243
244 self.last_fit = numpy.array(scipy.integrate.odeint(dyy, y0, xx)[:,0], dtype='float32')
245 return self.last_fit
246 else:
247 self.last_fit = numpy.zeros(shape=xx.shape, dtype='float32')
248 return self.last_fit
249 - def pool_eval(self, xx, vvv, func, pval_by_name, avail):
250 try:
251 N = len(xx)
252 Nsub = N / cpu_count + 1
253 argses = []
254 for i in xrange(0, N, Nsub):
255 i2 = min(i+Nsub, N)
256 argses.append( (xx[i:i2], [vv[i:i2] for vv in vvv], func, pval_by_name, avail) )
257 ff = numpy.zeros(shape=xx.shape, dtype='float32')
258 i = 0
259 for fsub, info, err in self.pool.imap(pool_sub_eval, argses):
260 if info:
261 print info
262 if err:
263 print 'Pool Error: ', err
264 break
265 ff[i:i+len(fsub)] = fsub
266 i += len(fsub)
267 else:
268 return ff
269 except:
270 traceback.print_exc()
271
272 return sub_eval(xx, vvv, func, pval_by_name, avail)
273
274 Curves['Custom'] = Curve
275
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297 try:
298 info = ""
299 return sub_eval(*args), info, ""
300 except:
301 return numpy.zeros(shape=args[0].shape, dtype='float32'), "", traceback.format_exc()
302
303
304
305
306 -def sub_eval(xx, vvv, func, pval_by_name, avail):
307 N = len(xx)
308 argses = []
309 for a in func.args:
310 if a in pval_by_name:
311 argses.append([pval_by_name[a]]*N)
312 elif a == 'x':
313 argses.append(xx)
314 else:
315 argses.append( vvv[avail.index(a)] )
316 try:
317 ff = map(func, *argses)
318 ff = numpy.array(ff, dtype='float32')
319 ff[numpy.isnan(ff)] = 0.0
320 ff[numpy.isinf(ff)] = 0.0
321 except (ValueError, OverflowError, ZeroDivisionError):
322 ff = [0.0] * N
323 for i in xrange(N):
324 iargs = [arg[i] for arg in argses]
325 try:
326 ff[i] = func(*iargs)
327 except:
328 pass
329 ff = numpy.array(ff, dtype='float32')
330 return ff
331
333 - def __init__(self, expr="0.0", locals={}):
341 - def eval(self, param_vals, xx, vvv=[]):
346 Curves['Linear'] = LinearCurve
347
349 - def __init__(self, expr="0.0", locals={}):
350 BaseCurve.__init__(self)
351 self.name = 'Gaussian'
352 self.expr = "Amp * (1/(Std * sqrt(2*pi))) * exp( -(x - Mean)**2 / (2 * Std**2) ) + Baseline"
353 self.params = ['Amp', 'Mean', 'Std', 'Baseline']
354 self.lo = [UNSET_VALUE]*len(self.params)
355 self.hi = [UNSET_VALUE]*len(self.params)
356 self.can_fit = [True, True, True, False]
357 self.param_defaults = [1.0, 0.0, 1.0, 0.0]
358 - def eval(self, param_vals, xx, vvv=[]):
369 Curves['Gaussian'] = GaussianCurve
370
372 - def __init__(self, expr="0.0", locals={}):
373 BaseCurve.__init__(self)
374 self.name = 'Gaussian (2)'
375 self.expr = "Amp_i * (1/(Std_i * sqrt(2*pi))) * exp( -(x - Mean_i)**2 / (2 * Std_i**2) ) + Baseline"
376 self.params = ['Amp1', 'Mean1', 'Std1', 'Amp2', 'Mean2', 'Std2', 'Baseline']
377 self.lo = [UNSET_VALUE]*len(self.params)
378 self.hi = [UNSET_VALUE]*len(self.params)
379 self.can_fit = [True, True, True, True, True, True, False]
380 self.param_defaults = [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]
381 - def eval(self, param_vals, xx, vvv=[]):
397 Curves['Gaussian (2)'] = DblGaussianCurve
398
400 - def __init__(self, expr="0.0", locals={}):
401 BaseCurve.__init__(self)
402 self.name = 'Gaussian (3)'
403 self.expr = "Amp_i * (1/(Std_i * sqrt(2*pi))) * exp( -(x - Mean_i)**2 / (2 * Std_i**2) ) + Baseline"
404 self.params = ['Amp1', 'Mean1', 'Std1', 'Amp2', 'Mean2', 'Std2', 'Amp3', 'Mean3', 'Std3', 'Baseline']
405 self.lo = [UNSET_VALUE]*len(self.params)
406 self.hi = [UNSET_VALUE]*len(self.params)
407 self.can_fit = [True, True, True, True, True, True, True, True, True, False]
408 self.param_defaults = [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0]
409 - def eval(self, param_vals, xx, vvv=[]):
430 Curves['Gaussian (3)'] = TriGaussianCurve
431
433 - def __init__(self, expr="0.0", locals={}):
434 BaseCurve.__init__(self)
435 self.name = 'Gaussian (4)'
436 self.expr = "Amp_i * (1/(Std_i * sqrt(2*pi))) * exp( -(x - Mean_i)**2 / (2 * Std_i**2) ) + Baseline"
437 self.params = ['Amp1', 'Mean1', 'Std1', 'Amp2', 'Mean2', 'Std2', 'Amp3', 'Mean3', 'Std3', 'Amp4', 'Mean4', 'Std4', 'Baseline']
438 self.lo = [UNSET_VALUE]*len(self.params)
439 self.hi = [UNSET_VALUE]*len(self.params)
440 self.can_fit = [True, True, True, True, True, True, True, True, True, True, True, True, False]
441 self.param_defaults = [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0]
442 - def eval(self, param_vals, xx, vvv=[]):
468 Curves['Gaussian (4)'] = QuadGaussianCurve
469
471 - def __init__(self, expr="0.0", locals={}):
472 BaseCurve.__init__(self)
473 self.name = 'Gaussian (5)'
474 self.expr = "Amp_i * (1/(Std_i * sqrt(2*pi))) * exp( -(x - Mean_i)**2 / (2 * Std_i**2) ) + Baseline"
475 self.params = ['Amp1', 'Mean1', 'Std1', 'Amp2', 'Mean2', 'Std2', 'Amp3', 'Mean3', 'Std3', 'Amp4', 'Mean4', 'Std4', 'Amp5', 'Mean5', 'Std5', 'Baseline']
476 self.lo = [UNSET_VALUE]*len(self.params)
477 self.hi = [UNSET_VALUE]*len(self.params)
478 self.can_fit = [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False]
479 self.param_defaults = [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0]
480 - def eval(self, param_vals, xx, vvv=[]):
511 Curves['Gaussian (5)'] = QuintGaussianCurve
512
513 GaussianCurves = [GaussianCurve, DblGaussianCurve, TriGaussianCurve, QuadGaussianCurve, QuintGaussianCurve]
514
515
517 - def __init__(self, expr="0.0", locals={}):
518 BaseCurve.__init__(self)
519 self.name = 'GaussianUnit'
520 self.expr = "normalized Amp * (1/(Std * sqrt(2*pi))) * exp( -(x - Mean)**2 / (2 * Std**2) ) + Baseline"
521 self.params = ['Amp', 'Mean', 'Std', 'Baseline']
522 self.lo = [UNSET_VALUE]*len(self.params)
523 self.hi = [UNSET_VALUE]*len(self.params)
524 self.can_fit = [True, True, True, False]
525 self.can_resample = False
526 self.param_defaults = [1.0, 0.0, 1.0, 0.0]
527 - def eval(self, param_vals, xx, vvv=[]):
540 Curves['GaussianUnit'] = GaussianUnitCurve
541
543 - def __init__(self, expr="0.0", locals={}):
544 BaseCurve.__init__(self)
545 self.name = 'GaussianUnit (2)'
546 self.expr = "normalized Amp_i * (1/(Std_i * sqrt(2*pi))) * exp( -(x - Mean_i)**2 / (2 * Std_i**2) ) + Baseline"
547 self.params = ['Amp1', 'Mean1', 'Std1', 'Amp2', 'Mean2', 'Std2', 'Baseline']
548 self.lo = [UNSET_VALUE]*len(self.params)
549 self.hi = [UNSET_VALUE]*len(self.params)
550 self.can_fit = [True, True, True, True, True, True, False]
551 self.can_resample = False
552 self.param_defaults = [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]
553 - def eval(self, param_vals, xx, vvv=[]):
571 Curves['GaussianUnit (2)'] = DblGaussianUnitCurve
572
574 - def __init__(self, expr="0.0", locals={}):
575 BaseCurve.__init__(self)
576 self.name = 'GaussianUnit (3)'
577 self.expr = "normalized Amp_i * (1/(Std_i * sqrt(2*pi))) * exp( -(x - Mean_i)**2 / (2 * Std_i**2) ) + Baseline"
578 self.params = ['Amp1', 'Mean1', 'Std1', 'Amp2', 'Mean2', 'Std2', 'Amp3', 'Mean3', 'Std3', 'Baseline']
579 self.lo = [UNSET_VALUE]*len(self.params)
580 self.hi = [UNSET_VALUE]*len(self.params)
581 self.can_fit = [True, True, True, True, True, True, True, True, True, False]
582 self.can_resample = False
583 self.param_defaults = [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0]
584 - def eval(self, param_vals, xx, vvv=[]):
607 Curves['GaussianUnit (3)'] = TriGaussianUnitCurve
608
610 - def __init__(self, expr="0.0", locals={}):
611 BaseCurve.__init__(self)
612 self.name = 'GaussianUnit (4)'
613 self.expr = "normalized Amp_i * (1/(Std_i * sqrt(2*pi))) * exp( -(x - Mean_i)**2 / (2 * Std_i**2) ) + Baseline"
614 self.params = ['Amp1', 'Mean1', 'Std1', 'Amp2', 'Mean2', 'Std2', 'Amp3', 'Mean3', 'Std3', 'Amp4', 'Mean4', 'Std4', 'Baseline']
615 self.lo = [UNSET_VALUE]*len(self.params)
616 self.hi = [UNSET_VALUE]*len(self.params)
617 self.can_fit = [True, True, True, True, True, True, True, True, True, True, True, True, False]
618 self.can_resample = False
619 self.param_defaults = [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0]
620 - def eval(self, param_vals, xx, vvv=[]):
648 Curves['GaussianUnit (4)'] = QuadGaussianUnitCurve
649
651 - def __init__(self, expr="0.0", locals={}):
652 BaseCurve.__init__(self)
653 self.name = 'GaussianUnit (5)'
654 self.expr = "normalized Amp_i * (1/(Std_i * sqrt(2*pi))) * exp( -(x - Mean_i)**2 / (2 * Std_i**2) ) + Baseline"
655 self.params = ['Amp1', 'Mean1', 'Std1', 'Amp2', 'Mean2', 'Std2', 'Amp3', 'Mean3', 'Std3', 'Amp4', 'Mean4', 'Std4', 'Amp5', 'Mean5', 'Std5', 'Baseline']
656 self.lo = [UNSET_VALUE]*len(self.params)
657 self.hi = [UNSET_VALUE]*len(self.params)
658 self.can_fit = [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False]
659 self.can_resample = False
660 self.param_defaults = [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0]
661 - def eval(self, param_vals, xx, vvv=[]):
694 Curves['GaussianUnit (5)'] = QuintGaussianUnitCurve
695
696 GaussianUnitCurves = [GaussianUnitCurve, DblGaussianUnitCurve, TriGaussianUnitCurve, QuadGaussianUnitCurve, QuintGaussianUnitCurve]
697
698
700 - def __init__(self, expr="0.0", locals={}):
701 BaseCurve.__init__(self)
702 self.name = 'Declining Exponential'
703 self.expr = "a + (b-a)*(1 - exp(-(x-x0) / Tau)) + Slope*(x-x0)"
704 self.params = ['a', 'b', 'Tau', 'Slope', 'x0']
705 self.lo = [UNSET_VALUE]*len(self.params)
706 self.hi = [UNSET_VALUE]*len(self.params)
707 self.can_fit = [True, True, True, False, False]
708 self.param_defaults = [1.0, 0.0, 0.1, 0.0, 0.0]
709 - def eval(self, param_vals, xx_in, vvv=[]):
723 Curves['Declining Exponential'] = DecExpoCurve
724
725
727 - def __init__(self, expr="0.0", locals={}):
728 BaseCurve.__init__(self)
729 self.name = 'Double Declining Exponential'
730 self.expr = "a + (b-c)*(1 - exp(-(x-x0) / Tau1)) + c*(1-exp(-(x-x0) / Tau2)) + Slope*(x-x0)"
731 self.params = ['a', 'b', 'c', 'Tau1', 'Tau2', 'Slope', 'x0']
732 self.lo = [UNSET_VALUE]*len(self.params)
733 self.hi = [UNSET_VALUE]*len(self.params)
734 self.can_fit = [True, True, True, True, True, False, False]
735 self.param_defaults = [10.0, 1.0, 5.0, 1.0, 0.1, 0.0, 0.0]
736 - def eval(self, param_vals, xx_in, vvv=[]):
747 Curves['Double Declining Exponential'] = DblDecExpoCurve
748
749
750
752 if len(xx) < 2:
753 return 1.0
754 min_dx = xx[1] - xx[0]
755 for i in xrange(2, len(xx)):
756 dx = xx[i] - xx[i-1]
757 if dx < min_dx:
758 min_dx = dx
759 return numpy.exp( min_dx * numpy.log(logbase) )
760
762 - def __init__(self, expr="0.0", locals={}):
763 BaseCurve.__init__(self)
764 self.name = 'Exponential (log x)'
765 self.expr = "area[x, r*x] under Amp * exp( -x / Tau ) with LogBase(x) binning"
766 self.params = ['Amp', 'Tau', 'LogBase']
767 self.lo = [UNSET_VALUE]*len(self.params)
768 self.hi = [UNSET_VALUE]*len(self.params)
769 self.can_fit = [True, True, False]
770 self.param_defaults = [1.0, 1.0, 10.0]
771 self.can_resample = False
772 - def eval(self, param_vals, xx, vvv=[]):
785 Curves['Exponential (log x)'] = LogExpoCurve
786
787
789 - def __init__(self, expr="0.0", locals={}):
790 BaseCurve.__init__(self)
791 self.name = 'Exponential (log x) (2)'
792 self.expr = "area[x, r*x] under Amp * exp( -x / Tau ) with LogBase(x) binning"
793 self.params = ['Amp', 'Tau', 'Amp2', 'Tau2', 'LogBase']
794 self.lo = [UNSET_VALUE]*len(self.params)
795 self.hi = [UNSET_VALUE]*len(self.params)
796 self.can_fit = [True, True, True, True, False]
797 self.param_defaults = [1.0, 1.0, 0.1, 10.0, 10.0]
798 self.can_resample = False
799 - def eval(self, param_vals, xx, vvv=[]):
800 amp, tau, amp2, tau2, logbase = param_vals
801 r = calc_r(xx, logbase)
802 xp = numpy.power(logbase, xx)
803 xp /= r
804 xc = xp / (- tau)
805 yy = numpy.exp(xc)
806 xc *= r
807 yy -= numpy.exp(xc)
808 yy *= amp * tau
809 yy_all = yy
810
811 xc = xp / (- tau2)
812 yy = numpy.exp(xc)
813 xc *= r
814 yy -= numpy.exp(xc)
815 yy *= amp2 * tau2
816 yy_all += yy
817
818 self.last_fit = yy_all
819 return yy_all
820 Curves['Exponential (log x) (2)'] = LogExpoCurve2
821
822
824 - def __init__(self, expr="0.0", locals={}):
825 BaseCurve.__init__(self)
826 self.name = 'Exponential (log x) (3)'
827 self.expr = "area[x, r*x] under Amp * exp( -x / Tau ) with LogBase(x) binning"
828 self.params = ['Amp', 'Tau', 'Amp2', 'Tau2', 'Amp3', 'Tau3', 'LogBase']
829 self.lo = [UNSET_VALUE]*len(self.params)
830 self.hi = [UNSET_VALUE]*len(self.params)
831 self.can_fit = [True, True, True, True, True, True, False]
832 self.param_defaults = [1.0, 1.0, 0.1, 10.0, 0.05, 20.0, 10.0]
833 self.can_resample = False
834 - def eval(self, param_vals, xx, vvv=[]):
835 amp, tau, amp2, tau2, amp3, tau3, logbase = param_vals
836 r = calc_r(xx, logbase)
837 xp = numpy.power(logbase, xx)
838 xp /= r
839 xc = xp / (- tau)
840 yy = numpy.exp(xc)
841 xc *= r
842 yy -= numpy.exp(xc)
843 yy *= amp * tau
844 yy_all = yy
845
846 xc = xp / (- tau2)
847 yy = numpy.exp(xc)
848 xc *= r
849 yy -= numpy.exp(xc)
850 yy *= amp2 * tau2
851 yy_all += yy
852
853 xc = xp / (- tau3)
854 yy = numpy.exp(xc)
855 xc *= r
856 yy -= numpy.exp(xc)
857 yy *= amp3 * tau3
858 yy_all += yy
859
860 self.last_fit = yy_all
861 return yy_all
862 Curves['Exponential (log x) (3)'] = LogExpoCurve3
863
864
866 - def __init__(self, expr="0.0", locals={}):
867 BaseCurve.__init__(self)
868 self.name = 'Exponential (log x) (4)'
869 self.expr = "area[x, r*x] under Amp * exp( -x / Tau ) with LogBase(x) binning"
870 self.params = ['Amp', 'Tau', 'Amp2', 'Tau2', 'Amp3', 'Tau3', 'Amp4', 'Tau4', 'LogBase']
871 self.lo = [UNSET_VALUE]*len(self.params)
872 self.hi = [UNSET_VALUE]*len(self.params)
873 self.can_fit = [True, True, True, True, True, True, True, True, True, True, False]
874 self.param_defaults = [10.0, 0.1, 1.0, 1.0, 0.1, 10.0, 0.05, 20.0, 10.0]
875 self.can_resample = False
876 - def eval(self, param_vals, xx, vvv=[]):
877 amp, tau, amp2, tau2, amp3, tau3, amp4, tau4, logbase = param_vals
878 r = calc_r(xx, logbase)
879 xp = numpy.power(logbase, xx)
880 xp /= r
881 xc = xp / (- tau)
882 yy = numpy.exp(xc)
883 xc *= r
884 yy -= numpy.exp(xc)
885 yy *= amp * tau
886 yy_all = yy
887
888 xc = xp / (- tau2)
889 yy = numpy.exp(xc)
890 xc *= r
891 yy -= numpy.exp(xc)
892 yy *= amp2 * tau2
893 yy_all += yy
894
895 xc = xp / (- tau3)
896 yy = numpy.exp(xc)
897 xc *= r
898 yy -= numpy.exp(xc)
899 yy *= amp3 * tau3
900 yy_all += yy
901
902 xc = xp / (- tau4)
903 yy = numpy.exp(xc)
904 xc *= r
905 yy -= numpy.exp(xc)
906 yy *= amp4 * tau4
907 yy_all += yy
908
909 self.last_fit = yy_all
910 return yy_all
911 Curves['Exponential (log x) (4)'] = LogExpoCurve4
912
914 - def __init__(self, expr="0.0", locals={}):
915 BaseCurve.__init__(self)
916 self.name = 'Exponential (log x) (5)'
917 self.expr = "area[x, r*x] under Amp * exp( -x / Tau ) with LogBase(x) binning"
918 self.params = ['Amp', 'Tau', 'Amp2', 'Tau2', 'Amp3', 'Tau3', 'Amp4', 'Tau4', 'Amp5', 'Tau5', 'LogBase']
919 self.lo = [UNSET_VALUE]*len(self.params)
920 self.hi = [UNSET_VALUE]*len(self.params)
921 self.can_fit = [True, True, True, True, True, True, True, True, True, True, False]
922 self.param_defaults = [10.0, 0.1, 5.0, 0.2, 1.0, 1.0, 0.1, 10.0, 0.05, 20.0, 10.0]
923 self.can_resample = False
924 - def eval(self, param_vals, xx, vvv=[]):
925 amp, tau, amp2, tau2, amp3, tau3, amp4, tau4, amp5, tau5, logbase = param_vals
926 r = calc_r(xx, logbase)
927 xp = numpy.power(logbase, xx)
928 xp /= r
929 xc = xp / (- tau)
930 yy = numpy.exp(xc)
931 xc *= r
932 yy -= numpy.exp(xc)
933 yy *= amp * tau
934 yy_all = yy
935
936 xc = xp / (- tau2)
937 yy = numpy.exp(xc)
938 xc *= r
939 yy -= numpy.exp(xc)
940 yy *= amp2 * tau2
941 yy_all += yy
942
943 xc = xp / (- tau3)
944 yy = numpy.exp(xc)
945 xc *= r
946 yy -= numpy.exp(xc)
947 yy *= amp3 * tau3
948 yy_all += yy
949
950 xc = xp / (- tau4)
951 yy = numpy.exp(xc)
952 xc *= r
953 yy -= numpy.exp(xc)
954 yy *= amp4 * tau4
955 yy_all += yy
956
957 xc = xp / (- tau5)
958 yy = numpy.exp(xc)
959 xc *= r
960 yy -= numpy.exp(xc)
961 yy *= amp5 * tau5
962 yy_all += yy
963
964 self.last_fit = yy_all
965 return yy_all
966 Curves['Exponential (log x) (5)'] = LogExpoCurve5
967
968
970 - def __init__(self, expr="0.0", locals={}):
979 - def eval(self, param_vals, xx, vvv=[]):
985 Curves['Hill'] = HillCurve
986
988 - def __init__(self, eqn=None, locals={}):
989 Curve.__init__(self, eqn or '10**(x*Hill) / (10**(x*Hill) + EC50**Hill) * hi("y")', locals)
990 Curves['Hill (normalized)'] = HillCurveNorm
991
992
994 - def __init__(self, expr="0.0", locals={}):
1003 - def eval(self, param_vals, xx, vvv=[]):
1012 Curves['Lorentzian'] = LorentzianCurve
1013
1014
1015
1016
1017 -class Stop(Exception):
1019 Exception.__init__(self, 'stop requested')
1020
1023 Exception.__init__(self, 'out of bounds: param[%i] = %f' % (ix, val))
1024
1025
1027 """Minimizes the sum-squared-residual using scipy.optimize.fmin."""
1028 - def __init__(self, max_iter=200, toler=1e-10):
1052 def simplex_func(xk):
1053 ssd = 0.0
1054 try:
1055 apply_xk(xk)
1056 ff[0] = curve.eval(params_out, xx, vvv)
1057 ssd += sum((weights * (yy - ff[0]))**2)
1058 except Stop:
1059 raise
1060 except OverflowError:
1061 ssd = 1e1024
1062 except OutOfBounds, oob:
1063 ssd = 1e1024
1064 except KeyboardInterrupt:
1065 raise Stop()
1066 except:
1067 traceback.print_exc()
1068 return ssd
1069 def simplex_iter(xk):
1070 iter[0] += 1
1071 try:
1072 apply_xk(xk)
1073 except OutOfBounds:
1074 pass
1075 if not on_iter(params_out, iter[0]):
1076 raise Stop()
1077 try:
1078 params_opt, ssr, iterations, funcalls, warnflag = \
1079 fmin(simplex_func, params, maxfun=10*self.max_iter, xtol=self.toler, ftol=self.toler, maxiter=self.max_iter,
1080 full_output=1, disp=1, retall=0, callback=simplex_iter)
1081 try:
1082 apply_xk(params_opt)
1083 except OutOfBounds:
1084 pass
1085 except Stop:
1086 iterations = iter[0]
1087 for xi, i in enumerate(useCpar):
1088 params_out[i] = params[xi]
1089 except:
1090 traceback.print_exc()
1091 return params_out, ssr, iterations, ff[0]
1092 Fitters['Simplex'] = SimplexFitter
1093
1095 """Minimizes sum-squared-residual using lmmin from L{qubx.fast}."""
1096 - def __init__(self, max_iter=200, toler=1e-10):
1105 Fitters['Levenburg-Marquardt'] = LMFitter
1106
1108 """Minimizes sum-squared-residual using L{SimplexFitter} then L{LMFitter}; using half the max_iter for each algorithm."""
1109 - def __init__(self, max_iter=400, toler=1e-10):
1116 self.__on_iter_user = on_iter
1117 self.__phase = 0
1118 self.__simplex_iter = 0
1119 self.simplex.max_iter = self.lm.max_iter = self.max_iter/2
1120 self.simplex.toler = self.lm.toler = self.toler
1121 result = self.simplex(curve, param_vals, xx, yy, vvv, ww, self.__on_iter, on_status)
1122 if self.__phase != -1:
1123 self.__phase = 1
1124 param_vals, ssr, iterations, ff = result
1125 result = self.lm(curve, param_vals, xx, yy, vvv, ww, self.__on_iter, on_status)
1126 self.__on_iter_user = None
1127 self.__phase = -1
1128 return result
1130 if self.__phase == 1:
1131 rtn = self.__on_iter_user(param_vals, self.__simplex_iter + iter)
1132 else:
1133 self.__simplex_iter = iter
1134 rtn = self.__on_iter_user(param_vals, iter)
1135 return (rtn and (self.__phase >= 0)) and 1 or 0
1136 - def stop(self, cancel_exception):
1138
1139 Fitters['Simplex-LM'] = Simplex_LM_Fitter
1140
1141
1142 D1_DIFF_CENTRAL = False
1143 D2_DIFF_CENTRAL = True
1144 DIFF_DEV = 1e-7
1145
1147 """Returns the numpy.array(shape=(i,j)) of d(dResidual)/dP_i,dP_j, for i,j in len(non-const-params).
1148
1149 @return: (hessian_matrix, sum_sqr_residual)
1150 """
1151
1152 usePar = [i for i,c in enumerate(curve.can_fit) if c]
1153 m = len(usePar)
1154
1155
1156 savePars = tuple([param_vals[i] for i in usePar])
1157 thisPars = param_vals[:]
1158
1159
1160 steps = [dev * max(1.0, abs(savePars[u])) for u in xrange(m)]
1161 def step(u, pars, sgn=1):
1162 spars = [p for p in pars]
1163 spars[u] += sgn*steps[u]
1164 return tuple(spars)
1165
1166 def setPars(pars):
1167 for u,p in enumerate(pars):
1168 thisPars[usePar[u]] = p
1169
1170 def ssr_f(pars):
1171 setPars(pars)
1172 ff = curve.eval(thisPars, xx, vvv)
1173 try:
1174 if not (ww is None):
1175 residual = sum( (ww * (yy - ff))**2 )
1176 else:
1177 residual = sum( (yy - ff)**2 )
1178 except KeyboardInterrupt:
1179 raise
1180 except:
1181 residual = 1e1024
1182 return residual
1183 ssr = memoize(ssr_f, 'ssr')
1184 d1_fwd_one = lambda u, pars: ((ssr(step(u, pars)) - ssr(pars)) / steps[u])
1185
1186 d1_central_one = lambda u, pars: ((ssr(step(u, pars)) - ssr(step(u, pars, -1))) / (2*steps[u]))
1187
1188 d1 = memoize( D1_DIFF_CENTRAL and d1_central_one or d1_fwd_one, 'd1' )
1189
1190
1191 h = memoize(
1192 (D2_DIFF_CENTRAL and (lambda u,v: ((d1(u, step(v, savePars)) - d1(u, step(v, savePars, -1))) / (2*steps[v]))))
1193 or (lambda u,v: ((d1(u, step(v, savePars)) - d1(u, savePars)) / steps[v])),
1194 'hessian' )
1195
1196 H = numpy.array([ [(u<=v) and h(u,v) or h(v,u) for v in xrange(m)] for u in xrange(m) ])
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206 return H, ssr_f(savePars)
1207
1208 -def Covariance(curve, param_vals, xx, yy, vvv, ww):
1209 """Returns the covariance matrix[i][j] (inverse Hessian), for curve param indices i,j."""
1210 usePar = [i for i,c in enumerate(curve.can_fit) if c]
1211 m = len(usePar)
1212 cov, ss = Hessian(curve, param_vals, xx, yy, vvv, ww)
1213 if m: cov = numpy.linalg.pinv(0.5 * cov)
1214
1215
1216
1217 Np = len(curve.params)
1218 all = numpy.matrix(numpy.zeros(shape=(Np,Np)))
1219 for u in xrange(m):
1220 for v in xrange(m):
1221 i, j = usePar[u], usePar[v]
1222 all[i,j] = cov[u,v]
1223 return all
1224
1226 """Minimally after Gill-Murray:
1227 if covariance matrix is not positive definite, some diagonals are negative,
1228 with unknown meaning. here we look for trouble and if necessary replace
1229 the covariance matrix with a generalized cholesky factorization."""
1230 min_eig = min(numpy.linalg.eigh(var)[0])
1231 if min_eig < 1e-10: min_eig = -1e-10
1232 diff = -1.1 * min_eig
1233 pvar = var
1234 if (min_eig > 0.0) and (min(var.diagonal()) >= 0.0):
1235 return var, False
1236 while min_eig < 0.0:
1237 d = diff * numpy.identity(var.shape[0])
1238 pvar = var + d
1239 min_eig = min(numpy.linalg.eig(pvar)[0])
1240 diff *= 2
1241
1242 v = numpy.linalg.cholesky(pvar)
1243 return numpy.dot(v.T, v), True
1244
1245
1247 """Returns (cross_correlation, is_pseudo, std_err_est, ssr, r2)
1248 cross_correlation matrix[i][j] for curve param indices i,j
1249 is_pseudo whether Hessian was non-positive-definite, requiring Cholesky factorization
1250 std_err_est error estimates for each curve param
1251 ssr sum-sqr residual
1252 r2 R^2
1253 """
1254 usePar = [i for i,c in enumerate(curve.can_fit) if c]
1255 m = len(usePar)
1256 Np = len(curve.params)
1257 if not m:
1258 return numpy.matrix(numpy.zeros(shape=(Np,Np))), False, [0.0]*Np, 0.0, 0.0
1259 cov, ssr = Hessian(curve, param_vals, xx, yy, vvv, ww)
1260 ymean = mean(yy)
1261 sstot = sum((yy - ymean)**2)
1262 if sstot:
1263 r2 = 1.0 - (ssr / sstot)
1264 else:
1265 r2 = 0.0
1266
1267 if not numpy.isfinite(cov).all():
1268 return None, False, [0.0]*Np, ssr, r2
1269 if m: cov = numpy.linalg.pinv(cov)
1270 try:
1271 cov, pseudo = pseudovar(cov)
1272 except numpy.linalg.LinAlgError:
1273 pseudo = 2
1274
1275 std_err_est = [x for x in numpy.sqrt(cov.diagonal())]
1276 std_err_all = numpy.zeros(shape=(Np,))
1277 corr = numpy.matrix(numpy.zeros(shape=(Np,Np)))
1278 for u in xrange(m):
1279 i = usePar[u]
1280 for v in xrange(m):
1281 j = usePar[v]
1282 denom = sqrt(abs(cov[u,u]*cov[v,v]))
1283 corr[i,j] = denom and (cov[u,v] / denom) or cov[u,v]
1284 std_err_all[i] = std_err_est[u]
1285
1286 return corr, pseudo, std_err_all, ssr, r2
1287
1288
1290 """Counts the number of runs of y>f and y<f; returns probability it's from random noise."""
1291
1292
1293
1294
1295
1296 n = min(len(yy), len(ff))
1297 if n < 2: return 0.0
1298 sgn = (yy > ff)
1299 count = 0
1300 nplus = 0
1301 last = None
1302 for s in sgn:
1303 if s:
1304 nplus += 1
1305 if s != last:
1306 last = s
1307 count += 1
1308 nminus = n - nplus
1309 mean = nplus*nminus*2.0/n + 1
1310 sd = (mean-1)*(mean-2)/(n-1)
1311 z = sd and abs((mean - count) / sd) or 0.0
1312 return erfc(z/sqrt(2))
1313
1314
1316 def on_iter(params, iterations):
1317 print params, iterations
1318 return 1
1319 fit = Simplex_LM_Fitter()
1320 m, b = 2.0, -4.0
1321 xx = numpy.arange(10, dtype='float32')
1322 yy = numpy.arange(10, dtype='float32')
1323 yy *= m
1324 yy += b
1325 curve = Curve("m*x + b", allow_params=True)
1326 params = [1.0, 1.0]
1327 params, ssr, iterations, ff = fit(curve, params, xx, yy, on_iter=on_iter)
1328 print params, ssr, iterations
1329 print ff
1330 print yy
1331 print
1332 weigh = Curve('1.0/(x+1)')
1333 ww = weigh.eval([], xx)
1334 params, ssr, iterations, ff = fit(curve, params, xx, yy, ww=ww, on_iter=on_iter)
1335 print params, ssr, iterations
1336 print ff
1337 print yy
1338 print
1339
1340 xx *= 2*pi/10.0
1341 yy = sin(xx)
1342 curve = Curve("a' = b; b' = -a", allow_ode=True)
1343 params = [2.0, -1.0]
1344 print
1345 print Correlation(curve, params, xx, yy, [], None)
1346 print
1347 params, ssr, iterations, ff = fit(curve, params, xx, yy, on_iter=on_iter)
1348 print params, ssr, iterations
1349 print ff
1350 print yy
1351 print
1352 print 'Correlation:'
1353 print Correlation(curve, params, xx, yy, [], None)
1354 print 'P(runs):',RunsProb(yy, ff)
1355 print
1356
1357 if __name__ == '__main__':
1358 main()
1359