1 """Classes for reading and writing all QuB binary files.
2
3 A QuB Tree is a hierarchy of Nodes. Each node may have a name and some text or numeric data.
4 Specific QuB file types, e.g. QMF (qub model files), expect specific heirarchies and data types.
5
6 History:
7
8 In the late 1990s Dr. Lorin Milescu defined the binary layout and used it for QuB data files (QDF).
9 He wrote a class in Delphi pascal called "QFS" to read and modify open files on disk.
10
11 By the early 2000s we were collaborating on a DLL interface between his GUI, written in Delphi, and
12 older algorithms written in C. We found ourselves spending a lot of time defining struct- and pointer-based
13 containers for all the varieties of information shared between GUI and DLL. At the same time,
14 folks were asking us for new extensible formats for models and idealized data. Lorin naturally proposed
15 QFS. I was feeling some pressure to use human-readable ASCII text, so I proposed a file that could
16 be stored as binary or text, depending whether efficiency or transparency was foremost. Then I went
17 ahead and implemented it in C/C++, with bindings to Delphi. I never really got the text part right, but
18 no-one really cared.
19
20 So now we had two implementations: QFS, and "qubtree" or "native". We had some idea of changing
21 everything over to the newer one, but it would have meant a ton of extra work re-implementing the core of QuB.
22 When I made the Delphi bindings, I had our DLL quagmire in mind. qubtree.dll took care of memory management
23 and reference counting so you could pass trees willy-nilly between the EXE and DLL without headaches. And now
24 that everything boiled down to one or two ref-counted qubtree pointers, we weren't constantly breaking binary
25 compatibility between DLL and EXE. Similarly, I added Python bindings and rudimentary callback support, so we
26 could do a lot of stuff across the three languages without too much intricate boilerplate.
27
28 This unit can interface to the DLL/.so, and it introduces a third implementation -- in Python, using numpy and mmap.
29 This 'numpy' flavor behaves exactly like the 'native' one, except:
30 - it doesn't even try to ReadString or ReadText; like I said, always broken anyway
31 - it ignores lock/unlock since python's single-threaded for ref-counting purposes
32 - you can't edit string data one character at a time, due to a limitation in module mmap
33 - you can get direct access to numeric data as numpy.array: node.storage.data
34 - you can resize a node with empty data, by specifying a default type
35
36
37 Flavors:
38
39 By default this module still uses the 'native' flavor, so there are no surprises for existing programs.
40 If you don't have the compiled _qubtree library, it falls back on the 'numpy' flavor. Of course,
41 you must have numpy, available separately. If you have both, you can switch the default for new nodes:
42
43 >>> qubx.tree.CHOOSE_FLAVOR('numpy')
44
45 and even convert a tree between the two flavors:
46
47 >>> as_numpy = qubx.tree.AS_FLAVOR(native_node, 'numpy')
48
49
50 Limitations:
51
52 32 bits, except when signed/unsigned are treated inconsistently; then it's more like 31.
53 (max safe file size 2GB - 1)
54
55 Supported by no-one else
56
57 Not very efficient when you have lots of nodes.
58
59
60 U{Older qubtree documentation<http://www.qub.buffalo.edu/qubdoc>}
61
62
63 Copyright 2007-2014 Research Foundation State University of New York
64 This file is part of QUB Express.
65
66 QUB Express is free software; you can redistribute it and/or modify
67 it under the terms of the GNU General Public License as published by
68 the Free Software Foundation, either version 3 of the License, or
69 (at your option) any later version.
70
71 QUB Express is distributed in the hope that it will be useful,
72 but WITHOUT ANY WARRANTY; without even the implied warranty of
73 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
74 GNU General Public License for more details.
75
76 You should have received a copy of the GNU General Public License,
77 named LICENSE.txt, in the QUB Express program directory. If not, see
78 <http://www.gnu.org/licenses/>.
79
80 """
81
82 import struct
83 import array
84 import weakref
85 import mmap
86 import cStringIO as StringIO
87 import operator
88 import time
89 import traceback
90 import os
91 from itertools import izip
92 import collections
93 import sys
94
95 FLAVOR_DEFAULT = 'native'
96 FLAVORS_AVAIL = []
97 FLAVOR = FLAVOR_DEFAULT
98
99 have_lib = False
100 try:
101 import _qubtree
102 have_lib = True
103 except:
104 try:
105 import qubx.tree_native as _qubtree
106 have_lib = True
107 PNode = _qubtree.PNode
108 except:
109 traceback.print_exc()
110 pass
111 if have_lib:
112
113 FLAVORS_AVAIL.append('native')
114 Node = _qubtree.Node
115 NullNode = _qubtree.NullNode
116 Open = _qubtree.Open
117 ReadBytes = _qubtree.ReadBytes
118
119 try:
120 import numpy
121 have_numpy = True
122 FLAVORS_AVAIL.append('numpy')
123 except:
124 have_numpy = False
125
140
167
168
169
170 QTR_TYPE_EMPTY, \
171 QTR_TYPE_UNKNOWN, \
172 QTR_TYPE_POINTER, \
173 QTR_TYPE_STRING, \
174 QTR_TYPE_UCHAR, \
175 QTR_TYPE_CHAR, \
176 QTR_TYPE_USHORT, \
177 QTR_TYPE_SHORT, \
178 QTR_TYPE_UINT, \
179 QTR_TYPE_INT, \
180 QTR_TYPE_ULONG, \
181 QTR_TYPE_LONG, \
182 QTR_TYPE_FLOAT, \
183 QTR_TYPE_DOUBLE, \
184 QTR_TYPE_LDOUBLE = range(15)
185 TYPES = (QTR_TYPE_EMPTY,
186 QTR_TYPE_UNKNOWN,
187 QTR_TYPE_POINTER,
188 QTR_TYPE_STRING,
189 QTR_TYPE_UCHAR,
190 QTR_TYPE_CHAR,
191 QTR_TYPE_USHORT,
192 QTR_TYPE_SHORT,
193 QTR_TYPE_UINT,
194 QTR_TYPE_INT,
195 QTR_TYPE_ULONG,
196 QTR_TYPE_LONG,
197 QTR_TYPE_FLOAT,
198 QTR_TYPE_DOUBLE,
199 QTR_TYPE_LDOUBLE)
200
201
202 TYPENAMES = {QTR_TYPE_EMPTY: "empty",
203 QTR_TYPE_UNKNOWN: "unknown",
204 QTR_TYPE_POINTER : "pointer",
205 QTR_TYPE_STRING : "string",
206 QTR_TYPE_UCHAR : "unsigned 8-bit int",
207 QTR_TYPE_CHAR : "signed 8-bit int",
208 QTR_TYPE_USHORT : "unsigned 16-bit int",
209 QTR_TYPE_SHORT : "signed 16-bit int",
210 QTR_TYPE_UINT : "unsigned int",
211 QTR_TYPE_INT : "int",
212 QTR_TYPE_ULONG : "unsigned 64-bit int",
213 QTR_TYPE_LONG : "signed 64-bit int",
214 QTR_TYPE_FLOAT : "single-precision float",
215 QTR_TYPE_DOUBLE : "float",
216 QTR_TYPE_LDOUBLE : "extended-precision float"
217 }
218
219
220 TYPE_DEFAULTS = (None, None, None, ' ', 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0)
221
222
223 SIZE_OF_TYPE = (0, 0, 4, 1, 1, 1, 2, 2, 4, 4, 8, 8, 4, 8, 10)
224
225
226
239
241 d = dict( (child.name, tree_to_dict(child, False)) for child in children(tree) )
242 data = treedata_to_dict(tree.data)
243 if not (data is None):
244 d['__data'] = data
245 if is_root:
246 d['__root_name'] = tree.name
247 return d
248 elif is_root or not tree.child.isNull:
249 return d
250 elif '__data' in d:
251 return d['__data']
252 else:
253 return None
254
264
279
280
295
296
305
306
307
308
309
310 framedef = "<BBBBiiiiiiBBBB"
311 framelen = struct.calcsize(framedef)
312 frame_dataoff = 12
313
314 QTR_FLAG_PRELOAD = 0x2
315 QTR_FLAG_DATA_IN_NODE = 0x4
316 QTR_MAGIC = "QUB_(;-)_QFS"
317
319 """A light intermediary between a file and a reader of qubtree nodes, for debugging."""
320
322 """Sets up a dtree reader given an open file fi."""
323 self.fi = fi
325 """Returns the tuple of all fields of a qubtree node, located at offset off, except the name.
326
327 @return: (flags, a, b, type, size, count, dataPos, childPos, siblingPos, c, d, e, f, nameLen)
328 where a thru f are reserved (ignored) bytes.
329 """
330 self.fi.seek(off)
331 dat = self.fi.read(framelen)
332 fdef = framedef
333 while fdef:
334 try:
335 return struct.unpack(fdef, dat)
336 except:
337 traceback.print_exc()
338 return tuple()
340 """Returns the tuple of all fields of a qubtree.Node including the name, with flags upacked.
341
342 @return: (preload, data_in_node, type, size, count, dataPos, childPos, siblingPos, name)
343 """
344 raw = list(self.rawframe(off))
345 raw = raw + [None] * (14-len(raw))
346 flags, a, b, type, size, count, dPos, cPos, sPos, c, d, e, f, nameLen = raw
347 preload = bool(flags and (flags & 2))
348 dataIn = bool(flags and (flags & 4))
349 if not (type is None):
350 type = [None, None, None, "s", "B", 'b', 'H', 'h', 'I', 'i', 'L', 'l', 'f', 'd', None][type]
351 name = None
352 if nameLen == 0:
353 name = ""
354 elif nameLen:
355 try:
356 self.fi.seek(off+framelen)
357 name = self.fi.read(nameLen)
358 except:
359 pass
360 return (preload, dataIn, type, size, count, dPos, cPos, sPos, name)
361 - def hier(self, off=12, indent=""):
362 """Prints the tree of frames rooted at offset off, with indentation to show hierarchy."""
363 frame = self.frame(off)
364 print indent,'%d:'%off,frame
365 if frame[6]:
366 self.hier(frame[6], indent+' ')
367 if frame[7]:
368 self.hier(frame[7], indent)
369 - def data(self, off, count, format):
370 """Prints the array of count number of items at offset off, with type given as a format string such as in module struct."""
371 self.fi.seek(off)
372 arr = array.array(format)
373 arr.fromfile(self.fi, count)
374 return arr
375
376
377
378
379 if have_numpy:
380
381
382 TYPES_NUMPY = [None,
383 None,
384 numpy.dtype('<u4'),
385 numpy.dtype('<u1'),
386 numpy.dtype('<u1'),
387 numpy.dtype('<i1'),
388 numpy.dtype('<u2'),
389 numpy.dtype('<i2'),
390 numpy.dtype('<u4'),
391 numpy.dtype('<i4'),
392 numpy.dtype('<u8'),
393 numpy.dtype('<i8'),
394 numpy.dtype('<f4'),
395 numpy.dtype('<f8'),
396 numpy.dtype('complex128')]
397
398
399 TYPE_OF_NUMPY = collections.defaultdict(lambda:QTR_TYPE_DOUBLE)
400 for i in xrange(QTR_TYPE_STRING, len(TYPES_NUMPY)):
401 TYPE_OF_NUMPY[str(TYPES_NUMPY[i])] = i
402 TYPE_OF_NUMPY['bool'] = QTR_TYPE_INT
403
404 MAX_QUICK_READ_BYTES = 256*256*8
405 MAX_QUICK_WRITE_BYTES = 10 * 256*256*16
406 QTR_INITIAL_CAP = MAX_QUICK_READ_BYTES
407 ZERO_BATCH = 256*256*4
408
413
414
416 """Represents the data of a null node, which is empty and will stay empty."""
418 return '<QUBTree NULL data>'
429 node = property(get_node, doc="the null node whose data this is.")
432 preload = property(lambda self: True, set_none)
433 count = property(__len__)
434 rows = property(lambda self: 0)
435 cols = property(lambda self: 0)
436 type = property(lambda self: QTR_TYPE_EMPTY)
437 size = property(lambda self: 0)
438 loaded = property(lambda self: (-1, -1))
439 loadedRows = property(lambda self: (-1, -1))
440 - def setup(self, type, rows, cols):
446 - def loadRows(self, first, last, do_read=True):
454
456 """Represents the absence of a qubtree node.
457
458 Has all the properties and methods, so you can e.g. ask blithely if it has any data or children (no).
459 """
460 __slots__ = ['__data', '__weakref__']
464 return '<QUBTree Node : NULL>'
468 try:
469 if other.isNull:
470 return 0
471 else:
472 return -1
473 except KeyboardInterrupt:
474 raise
475 except:
476 return -1
485 name = property(lambda self: '', set_none)
486 data = property(lambda self: self.__data, set_none)
487 lineComment = property(lambda self: '', set_none)
488 isNull = property(lambda self: True)
489 child = property(lambda self: self)
490 sibling = property(lambda self: self)
491 parent = property(lambda self: self)
492 path = property(lambda self: '')
493 modified = property(lambda self: False)
494 root = property(lambda self: self)
495 - def clone(self, deep=True):
499 - def saveAsText(self, path):
509 - def find(self, name):
511 - def next(self, name):
515 - def append(self, childOrName):
519 - def insert(self, child, after=None):
527 - def lock(self, timeoutMS=None):
531
532
533 NullNode_numpy_instance = NullNode_numpy()
534
535
536
537
538
539 - class Data(object):
540 __slots__ = ['__node']
541 """A handler for the data in one qubtree node. Don't construct one for yourself; use node.data"""
543 """Constructs a data handler for a qubtree node. Don't call this directly."""
544 self.__node = weakref.ref(node)
546 return '<QUBTree data for %s>' % str(self.node).strip()
592 node = property(get_node, doc="the qubtree node whose data this is")
593 preload = property(lambda self: self.node.storage.preload, lambda self, x: self.node.storage.set_preload(x),
594 doc="whether all data is loaded into memory on file open")
595 count = property(__len__, doc="total data elements (rows * cols)")
596 rows = property(lambda self: self.count and self.node.storage.rows, doc="number of rows")
597 cols = property(lambda self: self.count and self.node.storage.cols, doc="number of columns")
598 type = property(lambda self: self.count and self.node.storage.dataType, doc="type of data elements, in QTR_TYPE_*")
599 size = property(lambda self: self.count and self.node.storage.dataSize, doc="size of one data element in bytes")
600 loaded = property(lambda self: self.node.storage.loaded, doc="(first, last) element loaded into memory")
601 loadedRows = property(lambda self: self.node.storage.loadedRows, doc="(first, last) row loaded into memory")
602 - def setup(self, type, rows, cols):
611 - def loadRows(self, first=-2, last=-2, do_read=True):
612 """Loads the rows from first up to and including last into memory. By default loads all.
613
614 In this numpy flavor, all data is always loaded (memory mapped), and loadRows just adjusts .loadedRows."""
615 if first == -2:
616 if self.rows:
617 f = 0
618 else:
619 f = -1
620 else:
621 f = first
622 if last == -2:
623 if self.count:
624 l = self.rows - 1
625 else:
626 l = -1
627 else:
628 l = last
629 self.node.storage.load_rows(f, l, do_read)
631 """Unloads any loaded data.
632
633 In this numpy flavor, all data is always loaded (memory mapped), and unloadRows just adjusts .loadedRows."""
634 self.node.storage.unload_rows(do_write)
644 """Returns a slice into the first..last rows of data. If possible, it will be a direct, mutable reference.
645 This may not be possible with some string data."""
646 return self.node.storage.get_rows(first, last)
648 """Returns an object that can manipulate the data in row i, and acts like a list."""
649 if 0 <= i < self.rows:
650 return DataRow(self, i)
651 raise IndexError()
653 """Returns an object that can manipulate the data in column j, and acts like a list."""
654 if 0 <= self.cols:
655 return DataCol(self, j)
656 raise IndexError()
657
659 """A handler for the data in one row of a qubtree node. Don't construct one for yourself; use node.data.row(i)"""
660 __slots__ = ['data', 'r']
662 """Don't call this directly."""
663 self.data = data
664 self.r = r
666 return '<QUBTree data for %s, row %i>' % (self.data.node.name, self.r)
668 return ' '.join([x for x in self])
672 r = self.r
673 d = self.data
674 l = d.cols
675 s = r * l
676 if isinstance(key, slice):
677 return [d[s+i] for i in xrange(*key.indices(l))]
678 elif -l <= key < 0:
679 return d[s+l+key]
680 elif 0 <= key < l:
681 return d[s+key]
682 else:
683 raise IndexError()
685 r = self.r
686 d = self.data
687 l = d.cols
688 s = r * l
689 if isinstance(key, slice):
690 for i,v in izip(xrange(*key.indices(l)), val):
691 d[s+i] = v
692 elif -l <= key < 0:
693 d[s+l+key] = val
694 elif 0 <= key < l:
695 d[s+key] = val
696 else:
697 raise IndexError()
698
700 """A handler for the data in one column of a qubtree node. Don't construct one for yourself; use node.data.col(j)"""
701 __slots__ = ['data', 'c']
703 """Don't call this directly."""
704 self.data = data
705 self.c = c
707 return '<QUBTree data for %s, col %i>' % (self.data.node.name, self.c)
709 return ' '.join([x for x in self])
713 c = self.c
714 d = self.data
715 l = d.rows
716 nc = d.cols
717 if isinstance(key, slice):
718 return [d[i*nc+c] for i in xrange(*key.indices(l))]
719 elif -l <= key < 0:
720 return d[(l+key)*nc+c]
721 elif 0 <= key < l:
722 return d[key*nc+c]
723 else:
724 raise IndexError()
726 c = self.c
727 d = self.data
728 l = d.rows
729 nc = d.cols
730 if isinstance(key, slice):
731 for i,v in izip(xrange(*key.indices(l)), val):
732 d[i*nc+c] = v
733 elif -l <= key < 0:
734 d[(l+key)*nc+c] = val
735 elif 0 <= key < l:
736 d[key*nc+c] = val
737 else:
738 raise IndexError()
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
757
758
760 """Abstract base class for managing a concrete representation of one qubtree node.
761
762 Subclasses NodeInFile and NodeInMem are instantiated as needed by class Node."""
763 __slots__ = ['__clean', 'OnDirtied', '__dirtied']
764 clean = property(lambda self: self.__clean, lambda self, x: self.set_clean(x),
765 doc="whether this node has an up-to-date representation on disk.")
766 OnDirtied = property(lambda self: self.get_ondirtied(), lambda self, x: self.set_ondirtied(x),
767 "client-provided f(storage) is called when clean becomes False.")
768 name = property(lambda self: self.get_name(), lambda self, x: self.set_name(x), doc="the node's name")
769 preload = property(lambda self: self.get_preload(), lambda self, x: self.set_preload(x),
770 doc="whether to load all data elements on file open")
771 dataCount = property(lambda self: self.get_dataCount(), doc="total number of data elements; rows*cols")
772 dataSize = property(lambda self: self.get_dataSize(), doc="size of one data element, in bytes")
773 dataType = property(lambda self: self.get_dataType(), doc="type of data, in QTR_TYPE_*")
774 dataPos = property(lambda self: self.get_dataPos(), doc="position of data in a file, in bytes")
775 childPos = property(lambda self: self.get_childPos(), lambda self, x: self.set_childPos(x),
776 doc="position of child node in file, in bytes, or 0")
777 siblingPos = property(lambda self: self.get_siblingPos(), lambda self, x: self.set_siblingPos(x),
778 doc="position of sibling node in file, in bytes, or 0")
779 rows = property(lambda self: self.get_rowcount(), doc="number of rows of data")
780 cols = property(lambda self: self.get_colcount(), doc="number of columns of data")
781 loaded = property(lambda self: self.get_loaded(), doc="indices of (first, last) loaded rows")
782 loadedRows = property(lambda self: self.get_loadedRows(), doc="indices of (first, last) loaded data elements")
783 data = property(lambda self: self.get_data(), doc="the actual data, mutable if possible. Depending on data type and storage type, can be an mmap.mmap, a slice of an mmap.mmap, a numpy.array, a numpy array onto a slice of an mmap.mmap, or a plain string.")
784 file = property(lambda self: self.get_file(), doc="the NodeFile containing this node, or None")
785 offset = property(lambda self: self.get_offset(), doc="the location of this node (frame) in file, in bytes")
790 """Called by subclass methods when they make a change to the node structure from what's on disk."""
791 if x == self.__clean: return
792 self.__clean = x
793 if not x:
794 self.OnDirtied(self)
796 return self.__dirtied() or ignore_dirtied
850
852 """Manages the attributes and data of a Node that's not in a file."""
853 __slots__ = ['__name', '__dataType', '__dataSize', '__dataCount', '__rows', '__cols', '__preload',
854 '__loadedFirst', '__loadedLast', '__data']
892
893
894
989 if (self.cols > 1 and isinstance(arr, list)) or (arr.shape[1] != self.cols):
990 raise ValueError()
991 if len(arr) != (last - first + 1)*self.cols:
992 raise ValueError()
993 self.__data[first:last+1] = arr
996
998 """Manages a memory-mapped qubtree file, either on disk or an anonymous buffer."""
1000 """
1001 Opens a file for qubtree use::
1002
1003 if path:
1004 opens the file, creating if write and not exists, and memory-maps it
1005 elif buf:
1006 copies buf into a new anonymous memory map
1007 else:
1008 sets up a new anonymous memory map with capacity in bytes
1009 """
1010 self.path = path
1011 self.buf = buf
1012 self.write = write
1013 self.mmap = None
1014 self.fileno = 0
1015 self.size = 0
1016 self.cap = 0
1017 if path:
1018 exists = os.path.exists(path)
1019 size = 0
1020 if exists:
1021
1022 size = os.stat(path)[6]
1023 if write:
1024 if not exists:
1025 size = len(QTR_MAGIC)
1026 cap = capacity
1027 open(path, 'wb').write(QTR_MAGIC)
1028 self.fileno = os.open(path, os.O_RDWR | os.O_CREAT)
1029 os.lseek(self.fileno, 0, os.SEEK_END)
1030 nzero = cap - size
1031 while nzero:
1032 batch = min(ZERO_BATCH, nzero)
1033 os.write(self.fileno, numpy.zeros(shape=(batch,), dtype='int8'))
1034 nzero -= batch
1035 else:
1036 self.fileno = os.open(path, os.O_RDWR)
1037 cap = size
1038 self.mmap = mmap.mmap(self.fileno, cap, access=mmap.ACCESS_WRITE)
1039 elif size:
1040 cap = size
1041 self.fileno = os.open(path, os.O_RDONLY)
1042 self.mmap = mmap.mmap(self.fileno, cap, access=mmap.ACCESS_READ)
1043 if self.mmap is None:
1044 size = cap = 0
1045 self.size = size
1046 self.cap = cap
1047 if self.mmap:
1048 return
1049 if buf and (len(buf) >= len(QTR_MAGIC)):
1050 self.size = len(buf)
1051 self.cap = max(capacity, self.size)
1052 self.mmap = mmap.mmap(-1, self.cap, access=mmap.ACCESS_WRITE)
1053 self.mmap[:len(buf)] = buf
1054 else:
1055 self.size = len(QTR_MAGIC)
1056 self.cap = max(capacity, self.size)
1057 self.mmap = mmap.mmap(-1, self.cap, access=mmap.ACCESS_WRITE)
1058 self.mmap[:len(QTR_MAGIC)] = QTR_MAGIC
1060 if self.mmap:
1061 self.mmap.close()
1062 self.mmap = None
1063 if self.fileno:
1064 print 'closing',self.path
1065 os.close(self.fileno)
1066 self.fileno = None
1068 """Flushes the memory map."""
1069 if self.mmap:
1070 self.mmap.flush()
1071 - def alloc(self, block_size):
1072
1073 offset = self.size
1074 oldcap = self.cap
1075 while (offset + block_size) > self.cap:
1076 self.cap *= 2
1077 if self.cap != oldcap:
1078 if self.fileno:
1079 self.mmap.close()
1080 os.lseek(self.fileno, 0, os.SEEK_END)
1081 os.write(self.fileno, "\0"*(self.cap - oldcap))
1082 self.mmap = mmap.mmap(self.fileno, self.cap, access=mmap.ACCESS_WRITE)
1083 else:
1084 newmap = mmap.mmap(-1, self.cap, access=mmap.ACCESS_WRITE)
1085 newmap[:self.size] = self.mmap[:self.size]
1086 self.mmap.close()
1087 self.mmap = newmap
1088
1089 self.size += block_size
1090 return offset
1091
1098
1100 """Manages the attributes and data of a Node inside a NodeFile."""
1101 __slots__ = ['__file', '__offset', '__data', '__dataType', '__dataSize', '__dataCount', '__rows', '__cols', '__preload',
1102 '__loadedFirst', '__loadedLast', '__dataPos', '__childPos', '__siblingPos', '__data_in_node', '__name']
1103 - def __init__(self, file, offset=0, template=None, name=""):
1317 if (self.__offset+framelen) > len(self.__file.mmap):
1318 print 'truncated or missing node at offset %i' % self.__offset
1319 raise Truncation(self.__offset)
1320 flags, a, b, type, size, count, dPos, cPos, sPos, c, d, e, f, nameLen \
1321 = struct.unpack(framedef, self.__file.mmap[self.__offset:self.__offset+framelen])
1322
1323 if (self.__offset+framelen+nameLen) > len(self.__file.mmap):
1324 print 'truncated or missing node name at offset %i' % (self.__offset+framelen)
1325 raise Truncation(self.__offset)
1326 self.__name = self.__file.mmap[self.__offset+framelen:self.__offset+framelen+nameLen]
1327
1328
1329 self.__dataType = min(type, QTR_TYPE_LDOUBLE-1)
1330
1331 if (type == QTR_TYPE_LONG) and (size == 4):
1332 type = self.__dataType = QTR_TYPE_INT
1333
1334
1335 self.__dataSize = SIZE_OF_TYPE[type]
1336 self.__rows = count
1337 self.__cols = self.__dataSize and (size / self.__dataSize) or 0
1338 self.__dataCount = self.__rows * self.__cols
1339 self.__childPos = cPos
1340 self.__siblingPos = sPos
1341 self.__preload = bool(flags and (flags & 2))
1342 if self.__preload and self.__dataCount:
1343 self.__loadedFirst = 0
1344 self.__loadedLast = self.__rows - 1
1345 else:
1346 self.__loadedFirst = self.__loadedLast = -1
1347
1348
1349 self.__data_in_node = bool(flags and (flags & 4))
1350 if self.__data_in_node:
1351 self.__dataPos = 0
1352 else:
1353 self.__dataPos = dPos
1354
1355 if type == QTR_TYPE_STRING:
1356 if ((self.__data_in_node and (self.__file.mmap[self.__offset+frame_dataoff+count-1] == '\0')) or
1357 (self.__file.mmap[self.__dataPos+count-1] == '\0')):
1358 self.__rows -= 1
1359 self.__dataCount -= 1
1360 self.map_data(read_only=True)
1361 self.clean = True
1363 dataPos = self.dataPos
1364 if self.__data_in_node:
1365 dataPos = struct.unpack("i", self.__file.mmap[self.__offset+frame_dataoff:self.__offset+frame_dataoff+4])[0]
1366 a = b = c = d = e = f = 0
1367 flags = (self.preload and 2 or 0) + (self.__data_in_node and 4 or 0)
1368
1369
1370 correction = 0
1371 if self.dataType == QTR_TYPE_STRING:
1372 correction = 1
1373 self.__file.mmap[self.__offset:self.__offset+framelen] = \
1374 struct.pack(framedef, flags, a, b, self.dataType, self.dataSize*self.cols, self.rows+correction,
1375 dataPos, self.childPos, self.siblingPos, c, d, e, f, min(255, len(self.__name)))
1376 self.clean = True
1377
1378
1380 __slots__ = ['dirty', 'on_set', 'on_clear']
1391
1392
1394 """Represents the (sub-)tree under a particular node in a qub tree, in an open file or in memory.
1395
1396 This is the 'numpy' flavor of qubx.tree.Node
1397 """
1398 __slots__ = ['__data', 'storage', '__parent', '__child', '__sibling', '__dirtee', '__weakref__']
1399 - def __init__(self, name="", storage=None):
1428
1430 return '<QUBTree Node : %s>' % self.name
1432 buf = StringIO.StringIO()
1433 self.__re_str(buf)
1434 return buf.getvalue()
1595
1596
1597
1598
1599
1600
1601
1602 name = property(lambda self: self.storage.name, lambda self, x: self.set_name(x), doc="the label")
1603 data = property(lambda self: self.__data, lambda self, x: self.set_data(x),
1604 doc="""
1605 read: the L{Data} object managing this node's data
1606
1607 write: replaces this node's data with the contents of a string,
1608 numpy.array (len(shape) <= 2),
1609 [list of] list of numbers,
1610 another Node.data, or None.""")
1611 lineComment = property(lambda self: self.get_lineComment(), lambda self, x: self.set_lineComment(x),
1612 doc="read/write the string data of the child named QTR_LINE_COMMENT")
1613 isNull = property(lambda self: False)
1614 child = property(lambda self: self.__child, doc="first child Node, or the NullNode")
1615 sibling = property(lambda self: self.__sibling, doc="sibling Node, or the NullNode")
1616 parent = property(lambda self: self.get_parent(), doc="parent Node, or the NullNode")
1617 path = property(lambda self: self.get_path(), doc="path of the open file containing this node, or ''")
1618 modified = property(lambda self: self.get_modified(), doc="whether this or any child... has changed since Open or save(as)")
1619 root = property(lambda self: self.get_root(), doc="the head of the tree, with no parent; possibly self.")
1624 if operator.isNumberType(x) and not operator.isSequenceType(x):
1625 x = numpy.array([x])
1626 if x.dtype == numpy.int64:
1627 x = numpy.array(x, dtype=numpy.int32)
1628 if x is None:
1629 self.data.setup(QTR_TYPE_EMPTY, 0, 0)
1630 elif isinstance(x, Data):
1631
1632 if x.type == QTR_TYPE_STRING:
1633 self.storage.set_string_data(str(x))
1634 else:
1635 self.data.setup(x.type, x.rows, x.cols)
1636 self.data[:] = x[:]
1637 elif isinstance(x, numpy.ndarray):
1638 if any(sh == 0 for sh in x.shape):
1639 self.data.clear()
1640 elif len(x.shape) == 1:
1641 self.data.setup(TYPE_OF_NUMPY[str(x.dtype)], x.shape[0], 1)
1642 self.storage.data[:,0] = x
1643 else:
1644 self.data.setup(TYPE_OF_NUMPY[str(x.dtype)], x.shape[0], x.shape[1])
1645 if self.storage.data.shape != x.shape:
1646 print self.storage.data.shape, x.shape, x
1647 traceback.print_stack()
1648 self.storage.data[:] = x
1649 elif (not isinstance(x, str)) \
1650 and operator.isSequenceType(x) and all(operator.isNumberType(z) for z in x):
1651
1652 if len(x):
1653 type = all(isinstance(k, int) or isinstance(k, numpy.int32) for k in x) and QTR_TYPE_INT or QTR_TYPE_DOUBLE
1654 self.data.setup(type, len(x), 1)
1655 self.storage.data[:,0] = x
1656 else:
1657 self.data.clear()
1658 elif (not isinstance(x, str)) \
1659 and operator.isSequenceType(x) \
1660 and all(operator.isSequenceType(z) and all(operator.isNumberType(zz) for zz in z) for z in x):
1661
1662 type = all(all(isinstance(k, int) for k in z) for z in x) and QTR_TYPE_INT or QTR_TYPE_DOUBLE
1663 self.data.setup(type, len(x), max(len(z) for z in x))
1664 for i,z in enumerate(x):
1665 self.data.row(i)[:len(z)] = z
1666 else:
1667 self.storage.set_string_data(str(x))
1684 - def clone(self, deep=True, file=None):
1708 """Returns whether anything's different from on disk. Adjusts child and sibling positions first in case anything's moved."""
1709 return self.__pre_save()
1710 - def saveAs(self, path, as_copy=False):
1729 - def saveAsText(self, path):
1730 """Writes a text representation of the (sub-)tree rooted here, to path on disk."""
1731 open(path, 'w').write(str(self))
1733 """When this node is the root of an open file (self.storage.file != None), writes any modifications to disk."""
1734 if self != self.root:
1735 raise Exception("Attempt to save non-root node.")
1736 if not self.storage.file:
1737 raise Exception('save() without saveAs()')
1738
1739 if self.__pre_save():
1740 self.__re_save()
1741 self.storage.file.flush()
1742 self.re_map_data()
1743 return True
1783 """If this node is the root of an open file, moves it into memory and close the file."""
1784 if not self.storage.file: return True
1785 if self != self.root:
1786 raise Exception("Attempt to close non-root node.")
1787 file = self.storage.file
1788 self.__re_store()
1789 file.close()
1790 return True
1798 """Returns the number of bytes needed to store the (sub-)tree rooted here."""
1799 return self.storage.measure() + sum(c.__re_measure() for c in children(self))
1805 """Returns the names of all child Nodes."""
1806 return [c.name for c in children(self)]
1807 - def find(self, name):
1815 - def next(self, name):
1824 """Returns the next sibling with name == self.name, or the NullNode."""
1825 return self.next(self.name)
1826 - def append(self, childOrName):
1832 - def insert(self, childOrName, after=None):
1833 """insert(str, after): inserts a new child node after the given child or first, with the given name, or '', and returns it
1834 append(Node, after): inserts an existing node after the given child or first, first removing it from its former parent if any, and returns it"""
1835 if childOrName == self: return self
1836 if isinstance(childOrName, NullNode_numpy): return childOrName
1837 if isinstance(childOrName, Node_numpy):
1838 c = childOrName
1839 if not c.parent.isNull:
1840 c.parent.remove(c, False)
1841 elif c.storage.file and (c.storage.file != self.storage.file):
1842 c.close()
1843 if c.storage.file != self.storage.file:
1844 c.__re_store(self.storage.file)
1845 c.__parent = weakref.ref(self)
1846 if after:
1847 c.__sibling = after.sibling
1848 after.__sibling = c
1849 else:
1850 c.__sibling = self.child
1851 self.__child = c
1852 return c
1853 else:
1854 return self.insert(Node_numpy(name=str(childOrName)), after)
1856 """Inserts a copy of other, after the last child, and returns it. If not deep, skip other's children."""
1857 return self.append( other.clone(deep, self.storage.file) )
1859 """Inserts a copy of other, after the given child or first, and returns it. If not deep, skip other's children."""
1860 return self.insert( other.clone(deep, self.storage.file), after )
1861 - def remove(self, child, re_store=True):
1884 """artifact from when data edits didn't go memmap straight to disk; had to be marked changed; does nothing."""
1885 pass
1886 - def lock(self, timeoutMS=None):
1887 """the native flavor has a mutex, mainly to protect the ref-count. This flavor does nothing."""
1888 pass
1890 """the native flavor has a mutex, mainly to protect the ref-count. This flavor does nothing."""
1891 pass
1892
1893
1895 """Manages a non-memory-mapped qubtree file on disk. Read-only.
1896
1897 Solves the (mainly 32-bit) problem of opening files that don't fit in address space, by
1898 leaving non-preload data on disk until node.data.read_rows().
1899 """
1901 """
1902 Opens a file for qubtree use.
1903
1904 """
1905 self.path = path
1906 self.write = write
1907 self.file = None
1908 self.size = 0
1909 try:
1910 exists = os.path.exists(path)
1911 size = 0
1912 if exists:
1913
1914 size = os.stat(path)[6]
1915 self.file = open(path, 'rb')
1916 self.size = size
1917 except KeyboardInterrupt:
1918 raise
1919 except:
1920 traceback.print_exc()
1922 if self.file:
1923 print 'closing',self.path
1924 self.file = None
1925
1926
1928 """Manages the attributes and data of a Node inside a NodeFile_NoMap. Read-only.
1929
1930 Solves the (mainly 32-bit) problem of opening files that don't fit in address space, by
1931 leaving non-preload data on disk until node.data.read_rows().
1932 """
1933 __slots__ = ['__file', '__offset', '__name', '__preload', '__data', '__dataCount', '__dataSize', '__dataType', '__dataPos',
1934 '__childPos', '__siblingPos', '__rows', '__cols', '__loadedFirst', '__loadedLast', '__data_in_node']
1936 """
1937 Instantiated by Node to either manage an existing node or create a new one within file:
1938
1939 reads in the node structure at offset bytes and set up access to its data.
1940 """
1941 NodeStorage.__init__(self)
1942 self.__file = file
1943 self.__offset = offset
1944 self.read()
1952 raise Exception('read only')
1956 raise Exception('read only')
1970 raise Exception('read only')
1974 raise Exception('read only')
2017 raise Exception('read only')
2019 raise Exception('read only')
2021 raise Exception('read only')
2023 raise Exception('use node.data.read()')
2025 raise Exception('use node.data.read()')
2040 raise Exception('read only')
2044 if (self.__offset+framelen) > self.__file.size:
2045 print 'truncated or missing node at offset %i' % self.__offset
2046 raise Truncation(self.__offset)
2047 fi = self.__file.file
2048 fi.seek(self.__offset)
2049 dat = fi.read(framelen)
2050 flags, a, b, type, size, count, dPos, cPos, sPos, c, d, e, f, nameLen \
2051 = struct.unpack(framedef, dat)
2052
2053 if (self.__offset+framelen+nameLen) > self.__file.size:
2054 print 'truncated or missing node name at offset %i' % (self.__offset+framelen)
2055 raise Truncation(self.__offset)
2056 fi.seek(self.__offset+framelen)
2057 self.__name = fi.read(nameLen)
2058
2059
2060 self.__dataType = min(type, QTR_TYPE_LDOUBLE-1)
2061
2062 if (type == QTR_TYPE_LONG) and (size == 4):
2063 type = self.__dataType = QTR_TYPE_INT
2064
2065
2066 self.__dataSize = SIZE_OF_TYPE[type]
2067 self.__rows = count
2068 self.__cols = self.__dataSize and (size / self.__dataSize) or 0
2069 self.__dataCount = self.__rows * self.__cols
2070 self.__childPos = cPos
2071 self.__siblingPos = sPos
2072 self.__preload = bool(flags and (flags & 2))
2073 if self.__preload and self.__dataCount:
2074 self.__loadedFirst = 0
2075 self.__loadedLast = self.__rows - 1
2076 else:
2077 self.__loadedFirst = self.__loadedLast = -1
2078
2079 self.__data_in_node = bool(flags and (flags & 4))
2080 if self.__data_in_node:
2081 self.__dataPos = 0
2082 else:
2083 self.__dataPos = dPos
2084
2085 if type == QTR_TYPE_STRING:
2086 if self.__data_in_node:
2087 fi.seek(self.__offset+frame_dataoff+count-1)
2088 else:
2089 fi.seek(self.__dataPos+count-1)
2090 if fi.read(1) == '\0':
2091 self.__rows -= 1
2092 self.__dataCount -= 1
2093 self.map_data()
2094 self.clean = True
2096 raise Exception('read only')
2097
2098
2099
2101 """Represents the (sub-)tree under a particular node in a qub tree. Read-only.
2102
2103 Solves the (mainly 32-bit) problem of opening files that don't fit in address space, by
2104 leaving non-preload data on disk until node.data.read_rows().
2105
2106 Instantiated by Open_numpy when memory runs out.
2107 """
2108 __slots__ = ['__data', 'storage', '__parent', '__child', '__sibling', '__dirtee', '__weakref__']
2126 return '<QUBTree Node : %s>' % self.name
2128 buf = StringIO.StringIO()
2129 self.__re_str(buf)
2130 return buf.getvalue()
2293 name = property(lambda self: self.storage.name, lambda self, x: self.set_name(x), doc="the label")
2294 data = property(lambda self: self.__data, lambda self, x: self.set_data(x),
2295 doc="""read: the L{Data} object managing this node's data.""")
2296 lineComment = property(lambda self: self.get_lineComment(), lambda self, x: self.set_lineComment(x),
2297 doc="read/write the string data of the child named QTR_LINE_COMMENT")
2298 isNull = property(lambda self: False)
2299 child = property(lambda self: self.__child, doc="first child Node, or the NullNode")
2300 sibling = property(lambda self: self.__sibling, doc="sibling Node, or the NullNode")
2301 parent = property(lambda self: self.get_parent(), doc="parent Node, or the NullNode")
2302 path = property(lambda self: self.get_path(), doc="path of the open file containing this node, or ''")
2303 modified = property(lambda self: self.get_modified(), doc="whether this or any child... has changed since Open or save(as)")
2304 root = property(lambda self: self.get_root(), doc="the head of the tree, with no parent; possibly self.")
2309 raise Exception('read only')
2323 - def clone(self, deep=True, file=None):
2336 """Returns whether anything's different from on disk. Adjusts child and sibling positions first in case anything's moved."""
2337 return False
2338 - def saveAs(self, path, as_copy=False):
2339 """Creates a new file at path, moves the (sub-)tree rooted here into the file, and save()s. This node becomes the root.
2340
2341 @param as_copy: (def False) if True: may save a copy without re-parenting anything (faster) if overall size <= MAX_QUICK_WRITE_BYTES
2342 """
2343 return self.save()
2344 - def saveAsText(self, path):
2345 """Writes a text representation of the (sub-)tree rooted here, to path on disk."""
2346 open(path, 'w').write(str(self))
2348 print 'ignoring save of Node_numpy_NoMap'
2349 return True
2373 print 'ignoring close of Node_numpy_NoMap'
2374 return True
2382 """Returns the number of bytes needed to store the (sub-)tree rooted here."""
2383 return self.storage.measure() + sum(c.__re_measure() for c in children(self))
2387 """Returns the names of all child Nodes."""
2388 return [c.name for c in children(self)]
2389 - def find(self, name):
2397 - def next(self, name):
2406 """Returns the next sibling with name == self.name, or the NullNode."""
2407 return self.next(self.name)
2408 - def append(self, childOrName):
2409 raise Exception('read only')
2410 - def insert(self, childOrName, after=None):
2411 raise Exception('read only')
2413 raise Exception('read only')
2415 raise Exception('read only')
2416 - def remove(self, child, re_store=True):
2417 raise Exception('read only')
2419 """artifact from when data edits didn't go memmap straight to disk; had to be marked changed; does nothing."""
2420 pass
2421 - def lock(self, timeoutMS=None):
2422 """the native flavor has a mutex, mainly to protect the ref-count. This flavor does nothing."""
2423 pass
2425 """the native flavor has a mutex, mainly to protect the ref-count. This flavor does nothing."""
2426 pass
2427
2428
2472
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501 if not have_lib:
2502
2503 Node = Node_numpy
2504 NullNode = NullNode_numpy
2505 Open = Open_numpy
2506 ReadBytes = ReadBytes_numpy
2507 FLAVOR = 'numpy'
2508
2509
2510
2511 if __name__ == '__main__':
2512 CHOOSE_FLAVOR('numpy')
2513
2514 - def presume(condition, comment=''):
2515 if not condition:
2516 raise Exception(comment)
2517
2519 adat = open(apath, 'rb').read()
2520 bdat = open(bpath, 'rb').read()
2521 if len(adat) != len(bdat):
2522 print lbl,"Length diff: A:%d B:%d" % (len(adat), len(bdat))
2523 for i in xrange(min(len(adat), len(bdat))):
2524 if adat[i] != bdat[i]:
2525 print lbl,'%i:\t%s\t%s' % (i, adat[i], bdat[i])
2526
2560
2562 if not root: return
2563 root['str'].data = 'foobar'
2564 presume(root['str'].data.type == QTR_TYPE_STRING)
2565 presume(str(root['str'].data) == 'foobar')
2566 root['str2'].data = root['str'].data
2567 presume(str(root['str2'].data) == 'foobar')
2568 root['str2'].data = None
2569 presume(root['str2'].data.count == 0)
2570 root.remove(root['str2'])
2571 root['float'].data.setup(QTR_TYPE_FLOAT, 1, 1)
2572 root['float'].data[0] = 4.5
2573 presume(root['float'].data[0] == 4.5)
2574 presume(str(root['float'].storage.data.dtype) == 'float32')
2575 root['double'].data = [3, 4.0, 5]
2576 presume(root['double'].data.type == QTR_TYPE_DOUBLE)
2577 presume(root['double'].data[2] == 5.0)
2578 root['1d'].data = (1, 2, 3)
2579 presume(root['1d'].data[0] == 1)
2580 root['1d'].data = [1.0, 2.0]
2581 presume(root['1d'].data.type == QTR_TYPE_DOUBLE)
2582 presume(root['1d'].data.rows == 2)
2583 presume(root['1d'].data.rows == root['1d'].data.count)
2584 presume(root['1d'].data[1] == 2.0)
2585 root['2d'].data = ((1,2,3),(4,5,6))
2586 presume(1 < root['2d'].data.rows < root['2d'].data.cols < 4)
2587 presume(root['2d'].data.row(1)[0] == 4)
2588 presume(root['2d'].data.type == QTR_TYPE_INT)
2589 presume(root['2d'].storage.data[0,1] == 2)
2590 root['2d2'].data = root['2d'].data
2591 open('/tmp/qtr_2d', 'wb').write(root['2d'].getBytes())
2592 _2dc = root['2d2'].clone()
2593 _2dc.name = '2d'
2594 open('/tmp/qtr_2d2', 'wb').write(_2dc.getBytes())
2595 bin_compare('2d vs 2d2', '/tmp/qtr_2d', '/tmp/qtr_2d2')
2596 root['2d'].data.resize(5)
2597 root['2d2'].data.resize(1)
2598 presume(root['2d'].data.count == 15)
2599 presume(root['2d2'].data.count == 3)
2600 presume(tuple(root['2d'].data[:3]) == (1,2,3))
2601 root['2d'].data[12:15] = (6, 6, 6)
2602 presume(tuple(root['2d'].storage.data[4,:]) == (6, 6, 6))
2603 root['2d'].data.col(2)[2:4] = (7, 8)
2604 presume(tuple(root['2d'].storage.data[2:4,2]) == (7, 8))
2605 root['clr'].data = 'foobar'
2606 presume(str(root['clr'].data) == 'foobar')
2607 root['clr'].data.clear()
2608 presume(root['clr'].data.type == QTR_TYPE_EMPTY)
2609 presume(root.data.node == root)
2610
2611
2612 presume(NullNode() == NullNode())
2613
2614 root = Node('root')
2615 build_links(root)
2616 build_data(root)
2617 open('/tmp/qtr_gb_root', 'wb').write(root.getBytes())
2618 assert(not root.path)
2619 root.saveAs('/tmp/qtr_sa_root')
2620 assert(root.path == '/tmp/qtr_sa_root')
2621 root.close()
2622 assert(root.path == '')
2623 froot = Node('root')
2624 froot.saveAs('/tmp/qtr_inf_root')
2625 build_links(froot)
2626 build_data(froot)
2627 froot.save()
2628 froot.close()
2629 assert(root.getBytes() == froot.getBytes())
2630
2631 del root
2632 del froot
2633
2634 bin_compare('getbytes vs saveas', '/tmp/qtr_gb_root', '/tmp/qtr_sa_root')
2635
2636
2637
2638
2639 f = Open('/tmp/qtr_inf_root', True)
2640 presume(f)
2641 presume(not f.modified)
2642 open('/tmp/qtr_infb_root', 'wb').write(f.getBytes())
2643 del f
2644 bin_compare('getbytes vs buildin/Open/getbytes', '/tmp/qtr_gb_root', '/tmp/qtr_infb_root')
2645 f = ReadBytes(open('/tmp/qtr_inf_root', 'rb').read())
2646 presume(f)
2647 open('/tmp/qtr_infbb_root', 'wb').write(f.getBytes())
2648 del f
2649 bin_compare('saveas vs buildin/ReadBytes/getbytes', '/tmp/qtr_gb_root', '/tmp/qtr_infb_root')
2650
2651
2652 f = Open('/tmp/qtr_gb_root')
2653 f.remove(f['left'])
2654 f['child'].data = 4.2
2655 f['int'].data = 21
2656 presume(f.modified)
2657 f.save()
2658 f.close()
2659 f = Open('/tmp/qtr_gb_root', True)
2660 presume(not ('left' in f.list()))
2661 presume(f['child'].data[0] == 4.2)
2662 presume(f['int'].data[0] == 21)
2663 del f
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675