1 """
2 Classes to represent tabular information.
3 - L{Table} defines the interface
4 - L{SimpleTable} lets you define fields and store information
5 - L{ObjectTable} presents a collection of existing objects
6 - L{TableUndo} helps interface with L{qubx.undo}.
7
8 All tables have fields 'Index' (maintained automatically as the row number)
9 and 'Group' (default 0, meant for future classification purposes, see QUB:Select).
10
11 Copyright 2008-2015 Research Foundation State University of New York
12 This file is part of QUB Express.
13
14 QUB Express is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
18
19 QUB Express is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License,
25 named LICENSE.txt, in the QUB Express program directory. If not, see
26 <http://www.gnu.org/licenses/>.
27
28 """
29
30 import collections
31 import cStringIO
32 from itertools import *
33 from math import *
34 import numpy
35 from qubx.util_types import *
36 from qubx.accept import *
37 import qubx.notebook
38 import qubx.pyenv
39 import traceback
40
41 MAX_FIELDS = 256
42 MAX_BOOLS = 16
43
44 COPY_ROWS_ALL, COPY_ROWS_GROUP, COPY_ROWS_CRITERIA, COPY_ROWS_CHECKED = (0, 1, 2, 3)
45
50
53
54
55
56 run_later = run_now
57
59 """Represents the data in one row of a L{Table}, with .field_name access, and a dict of fields.
60
61 @ivar fields: dict: field_name -> value
62 """
63 - def __init__(self, fields, safenames=False):
64 Anon.__init__(self, **fields)
65 self.__dict__['fields'] = fields
66 if safenames:
67 for k, v in fields.items():
68 safek = SafeName(k)
69 if ((k != safek) and (not (safek in self.__dict__))):
70 self.__dict__[safek] = v
72 raise Exception('TableRow is read-only.')
73
74
76 """Base class for info that can be represented as a table.
77
78 @ivar label: the table's name
79 @ivar global_name: how to locate self in global namespace, if possible
80 @ivar sortable: (def False) whether the user should be able to sort by column
81 @ivar size: the number of rows
82 @ivar fields: list of field names
83 @ivar fields_independent: list of field names marked independent e.g. 'Index'
84 @ivar default: dict: field name -> default value
85 @ivar accept: dict: field name -> conversion_function(string)->value; can raise Exception on invalid input
86 @ivar format: dict: field name -> format_function(value)->string
87 @ivar units: dict: field name -> units_string
88 @ivar choices: dict: field name -> (list of values to choose among) or None
89 @ivar checked: list[row index] of bool, should row be included in batch actions
90 @ivar user_can_remove: list[row index] of bool, can remove via gui; def True
91 @ivar mean: {field_name : f(group=None) -> mean value of field for all segments or group index}
92 @ivar std: {field_name : f(group=None) -> std deviation of field for all segments or group index}
93 @ivar median: {field_name : f(group=None) -> median value of field for all segments or group index}
94 @ivar mode: {field_name : f(group=None) -> most common value of field for all segments or group index}
95 @ivar groups_occupied: f(include_zero=False) -> sorted list of 'Group' values, no duplicates, possibly no 0
96 @ivar rows_in_group: f(group) -> list of row indices with Group==group
97 @ivar count_in_group: f(group) -> number of rows with Group==group
98 @ivar field_stats: {field_name : [mean, std, median, mode]}
99 @ivar group_stats: [groups_occupied, rows_in_group, count_in_group]
100 @ivar all_stats: field_stats + group_stats
101 @ivar OnAddField: L{WeakEvent}(name) called when a field is added
102 @ivar OnRemovingField: L{WeakEvent}(name) called when a field is being removed
103 @ivar OnInsert: L{WeakEvent}(index, undoing) called when a row is added (read-only please)
104 @ivar OnAfterInsert: L{WeakEvent}(index, undoing) called after insertion and all OnInsert callbacks (.set(...) if you like)
105 @ivar OnRemoving: L{WeakEvent}(index, undoing) called when a row is about to be removed
106 @ivar OnRemoved: L{WeakEvent}(index, undoing) called when a row was just removed
107 @ivar OnSet: L{WeakEvent}(index, field_name, val, prev, undoing) called when one field of a row is changed
108 @ivar OnSelect: L{WeakEvent}(index, field_name or None, sender or None) called when a row is highlighted
109 @ivar OnDoubleClick: L{WeakEvent}(index, field_name or None) called (e.g. by TableView) when a row is double-clicked
110 @ivar OnChecked: L{WeakEvent}(index, checked) called on set_checked
111 """
112 __slots__ = ['__ref', 'label', '__global_name', 'sortable', 'fields', 'fields_independent', 'default', 'accept', 'format',
113 'units', 'choices', 'OnAddField', 'OnRemovingField', 'OnInsert', 'OnAfterInsert', 'OnRemoving', 'OnRemoved',
114 'OnSet', 'OnSelect', 'OnDoubleClick', 'OnChecked', 'max_fields', 'max_bools', 'checked', 'user_can_remove',
115 'mean', 'std', 'median', 'mode', 'groups_occupied', 'rows_in_group', 'count_in_group', 'field_stats',
116 'group_stats', 'all_stats', 'notebook', 'notebook_items', 'custom']
118 self.__ref = Reffer()
119 self.label = ''
120 self.__global_name = ''
121 self.sortable = sortable
122 self.fields = ['Index', 'Group']
123 self.fields_independent = ['Index']
124 self.default = {'Group' : 0}
125 self.accept = {'Index' : acceptNothing, 'Group' : acceptIntGreaterThanOrEqualTo(0)}
126 self.format = {'Index' : str, 'Group' : str}
127 self.units = {'Index' : "", 'Group' : ""}
128 self.choices = {'Index' : None, 'Group' : None}
129 self.OnAddField = WeakEvent()
130 self.OnRemovingField = WeakEvent()
131 self.OnInsert = WeakEvent()
132 self.OnAfterInsert = WeakEvent()
133 self.OnRemoving = WeakEvent()
134 self.OnRemoved = WeakEvent()
135 self.OnSet = WeakEvent()
136 self.OnSelect = WeakEvent()
137 self.OnDoubleClick = WeakEvent()
138 self.OnChecked = WeakEvent()
139 self.max_fields = MAX_FIELDS
140 self.max_bools = MAX_BOOLS
141 self.checked = []
142 self.user_can_remove = []
143 self.mean = {}
144 self.std = {}
145 self.median = {}
146 self.mode = {}
147 self.groups_occupied = memoize(self.__groups_occupied)
148 self.rows_in_group = memoize(self.__rows_in_group)
149 self.count_in_group = memoize(self.__count_in_group)
150 self.field_stats = collections.defaultdict(lambda: [])
151 self.group_stats = [self.groups_occupied, self.rows_in_group, self.count_in_group]
152 self.all_stats = [x for x in self.group_stats]
153 self.add_field_to_stats('Index')
154 self.add_field_to_stats('Group')
155 self.OnInsert += self.__ref(self.__onInsert)
156 self.OnRemoved += self.__ref(self.__onRemoved)
157 self.notebook = {}
158 self.notebook_items = []
159 self.add_notebook_item('Default', qubx.notebook.NbTable('Table', self.global_name and ('%s.notebook["Default"]'%self.global_name) or '',
160 self.nb_get_caption,
161 self.nb_get_shape, self.nb_get_headers,
162 self.nb_get_row, self.nb_get_col, self.nb_get_col_format,
163 self.nb_get_type))
164 self.custom = Anon()
165
175 global_name = property(lambda self: self.__global_name, lambda self, x: self.set_global_name(x))
176 size = property(lambda self: self.get_size())
180 if hasattr(key, '__iter__'):
181 r,f = key
182 return self.get(r, f)
183 else:
184 return self.get_row(key)
195 if not self.size: return ""
196 def format_item(item):
197 if isinstance(item, float):
198 return '%.6g' % item
199 else:
200 return str(item)
201 def format_row(items):
202 return ' '.join(['%10s'%format_item(item) for item in items])
203 return '\n'.join([' '.join([('%10s' % s) for s in self.fields])] +
204 [format_row([self.get(r, field) for field in self.fields]) for r in xrange(self.size)])
205 - def to_text(self, separator='\t'):
206 if not self.size: return ""
207 def format_item(item):
208 field, val = item
209 if isinstance(val, float):
210 return '%.12g' % val
211 else:
212 return self.format[field](val)
213 def format_row(items):
214 return separator.join([format_item(item) for item in items])
215 return '\n'.join([separator.join(self.fields)] +
216 [format_row([(field, self.get(r, field)) for field in self.fields]) for r in xrange(self.size)])
217 - def from_text(self, txt, keep_fields=False):
218 headers, types, columns = read_table_text(txt)
219 if (not columns) or (not columns[0]): return False
220 self.clear()
221 self.max_fields = max(self.max_fields, 2*len(headers))
222 for i,h in enumerate(headers):
223 if not h:
224 headers[i] = default_column_name(i)
225 if not (keep_fields is True):
226 for field in reversed(self.fields):
227 if field in ('Index', 'Group'):
228 continue
229 if keep_fields and (field in keep_fields):
230 continue
231 self.remove_field(field)
232 for h,t in izip(headers, types):
233 if not (h in self.fields):
234 self.add_field(h, t(), t, (t == float) and '%.6g' or str, '')
235 for items in izip(*columns):
236 self.append(dict(izip(headers, items)))
237 return True
246 """equivalent to table.insert(table.size, entry)"""
247 self.insert(self.size, entry)
248 - def insert(self, i, entry, undoing=False):
249 raise Exception('abstract method needs definition')
250 - def remove(self, i, undoing=False):
251 raise Exception('abstract method needs definition')
252 - def get(self, i, field_name):
253 raise Exception('abstract method needs definition')
254 - def set(self, i, field_name, val, undoing=False):
255 raise Exception('abstract method needs definition')
256 - def select(self, i, field_name=None, sender=None):
257 """Triggers OnSelect.
258
259 This mechanism is provided so that, if there are two or more views on a table, you
260 can select an item in one and it will be selected in the other(s). A view calls select with
261 sender=self so it can ignore its own selection activity.
262 """
263 if qubx.pyenv.env.globals['DEBUG']:
264 print 'select\t%d\t%s\t%s\t%s' % (i, self.label, field_name, sender)
265 traceback.print_stack()
266 print
267 run_later(self.OnSelect, i, field_name, sender)
269 """Removes all rows."""
270 for i in reversed(xrange(self.size)):
271 self.remove(i)
272 - def get_row(self, i, safenames=False):
279 """Returns a list of L{TableRow}."""
280 all = []
281 for i in xrange(self.size):
282 all.append(self.get_row(i))
283 return all
284 - def index(self, name, raise_error=True):
285 """Returns the index of the first row with "Name"==name."""
286 for i in xrange(self.size):
287 if self.get(i, 'Name') == name:
288 return i
289 if raise_error:
290 raise KeyError(name)
291 else:
292 return -1
306 """Returns the value of field, in the first row with "Name"=name; or raises KeyError."""
307 return self.get(self.index(name), field)
309 """Returns a L{TableRow} for the first row with "Name"=name; or raises KeyError."""
310 return self.get_row(self.index(name))
326 return [i for i in xrange(row_count or self.size) if test_row(self.get_row(i, safenames=True))]
372 return [calc_row(self.get_row(r, safenames=True)) for r in ixs]
373
387 - def __mean(self, field_name, group=None):
388 tot = 0.0
389 cnt = 0
390 for i in self.rows_in_group(group):
391 try:
392 tot += self.get(i, field_name)
393 cnt += 1
394 except:
395 pass
396 return cnt and (tot / cnt) or 0.0
397 - def __std(self, field_name, group=None):
398 tot = 0.0
399 cnt = 0
400 mean = self.mean[field_name](group)
401 for i in self.rows_in_group(group):
402 try:
403 tot += (mean - self.get(i, field_name))**2
404 cnt += 1
405 except:
406 pass
407 return cnt and sqrt(tot / cnt) or 0.0
420 - def __mode(self, field_name, group=None):
421 counter = collections.defaultdict(lambda: 0)
422 for i in self.rows_in_group(group):
423 counter[self.get(i, field_name)] += 1
424 counted = collections.defaultdict(lambda: [])
425 maxcnt = 0
426 for val, cnt in counter.iteritems():
427 counted[cnt].append(val)
428 if cnt > maxcnt:
429 maxcnt = cnt
430 modes = counted[maxcnt]
431 if len(modes) > 1:
432 return sum(modes) / len(modes)
433 elif modes:
434 return modes[0]
435 else:
436 return None
438 groups = [self.get(i, 'Group') for i in xrange(self.size)]
439 return sorted(list(set([g for g in groups if include_zero or (g > 0)])))
446
458 field = self.fields[c]
459 return [self.get(r, field) for r in xrange(self.size)]
463 check_fields = self.fields if (fields is None) else fields
464 try:
465 for f in check_fields:
466 float(self.get(0, f))
467 return float
468 except:
469 return str
473
474
475
477 """Binds a L{Table} to a L{qubx.undo.UndoStack}.
478
479 Simply create a TableUndo object, and all edits are pushed to the undo stack.
480 You still have to seal_undo to mark each restore point (that way several edits
481 can be sealed as a single undo).
482
483 To disassociate the table and the undoStack, call .dispose().
484 """
485 __slots__ = ['table', 'undoStack', 'ref']
512 - def onSet(self, i, field, val, prev, undoing):
516
517
523
525 """Implements L{Table} by storing all rows and fields internally.
526
527 @ivar entries: list of entries; each entry is a dict from field_name to value
528 """
529 __slots__ = ['auto_add_fields', 'auto_default', 'auto_accept', 'auto_format', 'entries']
530 - def __init__(self, label, auto_add_fields=False, auto_default=0.0, auto_accept=acceptEval(), auto_format=auto_format, global_name="", sortable=False):
541 - def add_field(self, name, default, accept, format, units, choices=None, independent=False):
575 - def insert(self, i, entry, undoing=False):
576 i = min(self.size, max(0, i))
577 d = {'Index' : i}
578 for f in self.fields[1:]:
579 if entry.has_key(f):
580 d[f] = entry[f]
581 else:
582 d[f] = self.default[f]
583 self.entries.insert(i, d)
584 for stat in self.all_stats:
585 stat.reset()
586 self.OnInsert(i, undoing)
587 for j in xrange(i+1, self.size):
588 self.set(j, 'Index', j)
589 self.OnAfterInsert(i, undoing)
590 if self.auto_add_fields:
591 for k in sorted(entry.keys()):
592 if (k != 'fields') and not (k in self.fields):
593 self.set(i, k, entry[k])
594 - def remove(self, i, undoing=False, renumber=True):
605 - def get(self, i, field_name):
607 - def set(self, i, field_name, val, undoing=False):
623 - def get_row(self, i, safenames=False):
624 """Returns a the i'th L{TableRow}."""
625 return TableRow(self.entries[i], safenames)
627 clone = SimpleTable(self.label, self.auto_add_fields, self.auto_default, self.auto_accept, self.auto_format, self.global_name, self.sortable)
628 for field in self.fields:
629 if not (field in ['Index', 'Group']):
630 clone.add_field(field, self.default[field], self.accept[field], self.format[field],
631 self.units[field], self.choices[field])
632 for i in xrange(self.size):
633 clone.append(self.get_row(i).__dict__)
634 return clone
635
636
638 """Implements L{Table} as a list of pre-existing objects. You define a field with get and set functions on an object."""
639 __slots__ = ['__ref', 'entries', 'getter', 'setter', '__setting_field']
640 - def __init__(self, label, global_name=""):
651 - def add_field(self, name, default, accept, format, units, choices=None, get=get_none, set=set_none, independent=False):
675 - def insert(self, i, obj, undoing=False):
676 i = min(self.size, max(0, i))
677 self.entries.insert(i, obj)
678 try:
679 obj.group
680 except:
681 obj.group = 0
682 for stat in self.all_stats:
683 stat.reset()
684 self.OnInsert(i, undoing)
685 for j in xrange(i+1, self.size):
686 self.OnSet(j, 'Index', j, j-1, undoing)
687 obj.OnChangeField += self.__ref(self._onChangeField)
688 self.OnAfterInsert(i, undoing)
689 - def remove(self, i, undoing=False):
700 - def get(self, i, field_name):
701 obj = self.entries[i]
702 if field_name == 'Index':
703 return i
704 elif field_name == 'Group':
705 return obj.group
706 else:
707 return self.getter[field_name](obj)
708 - def set(self, i, field_name, val, undoing=False):
709 self.__setting_field = True
710 prev = self.get(i, field_name)
711 obj = self.entries[i]
712 if field_name == 'Group':
713 obj.group = val
714 for stat in chain(self.group_stats, self.field_stats[field_name]):
715 stat.reset()
716 self.OnSet(i, 'Group', val, prev, undoing)
717 elif self.setter[field_name]:
718 try:
719 self.setter[field_name](obj, self.accept[field_name](val))
720 for stat in self.field_stats[field_name]:
721 stat.reset()
722 self.OnSet(i, field_name, val, prev, undoing)
723 except FieldAcceptException:
724 pass
725 self.__setting_field = False
733
734
735 -class TextStats(object):
736 __slots__ = ['tokenize', 'convert', 'debug', 'min_fields', 'max_fields', 'numbers', 'headers', 'skip',
737 'col_count', 'signal_count']
738 - def __init__(self, tokenize, convert, debug=False):
739 self.tokenize = tokenize
740 self.convert = convert
741 self.debug = debug
742 self.min_fields = None
743 self.max_fields = 0
744 self.numbers = 0
745 self.headers = []
746 self.skip = 0
748 self.col_count = self.max_fields
749 self.signal_count = self.col_count
750 return (self.max_fields - (self.min_fields or 0)) + self.numbers + int(bool(self.headers))
754 - def add(self, line):
755 fields = self.tokenize(line)
756 if len(fields) and fields[0]:
757 if (self.min_fields is None) or (self.min_fields > len(fields)):
758 self.min_fields = len(fields)
759 self.max_fields = max(self.max_fields, len(fields))
760 if self.debug:
761 print fields
762 anything = False
763 anynum = 0
764 nonempty = [x for x in [xx.strip() for xx in fields] if x]
765 for x in nonempty:
766 anything = True
767 try:
768 self.convert(x)
769 anynum += 1
770 except ValueError:
771 pass
772 if not anything:
773 if (not self.numbers) and (not self.headers):
774
775 self.skip += 1
776 elif (anynum < len(nonempty)) and (self.numbers == 0):
777 if not self.headers:
778
779 self.headers = fields
780 self.skip += 1
781 self.numbers += anynum
782 elif not self.numbers:
783
784 self.skip += 1
785
786
788 return float(s.replace(',', '.'))
789
791 """Returns headers, types, columns."""
792 tok_tab = lambda s: s.split('\t')
793 re_comma = re.compile(r", *")
794 tok_comma = lambda s: re_comma.split(s)
795 formats = [TextStats(tok_tab, float), TextStats(tok_tab, float_decimal_comma),
796 TextStats(tok_comma, float), TextStats(tok_comma, float_decimal_comma)]
797 lines = txt.split('\n')
798
799 for line in lines[:min(len(lines), 10)]:
800 for format in formats:
801 format.add(line)
802 if not [format for format in formats if format.numbers]:
803 return [], [], []
804 scores = numpy.array([format.score() for format in formats])
805 format = formats[numpy.argmax(scores)]
806 iline = 0
807 tokenize = format.tokenize
808 convert = format.convert
809
810 headers = format.headers
811 while len(headers) < format.max_fields:
812 headers.append("")
813 columns = [[] for x in headers]
814 typmap = collections.defaultdict(lambda: float)
815 typmap['Index'] = typmap['Group'] = int
816 types = [typmap[x] for x in headers]
817 for line in lines[format.skip:]:
818 tokens = [t.strip() for t in tokenize(line)]
819 if not any(tokens):
820 continue
821 while len(tokens) < len(columns):
822 tokens.append('')
823 for i, tok in enumerate(tokens):
824 if not tok.strip():
825 columns[i].append(types[i]())
826 else:
827 try:
828 num = convert(tok)
829
830
831 columns[i].append(convert(tok))
832 except ValueError:
833 types[i] = str
834 columns[i].append(tok)
835 columns = [ [typ(val) for val in col] for col,typ in izip(columns, types) ]
836 return headers, types, columns
837
839 carry = i / 26
840 ones = i % 26
841 toBase26 = lambda d: chr(ord('A')+d)
842 if carry:
843 return toBase26(carry-1)+toBase26(ones)
844 else:
845 return toBase26(ones)
846