QUBTree Python tutorial

This document shows how to build the following tree, then how to save, read, and modify it.

Sample
{
	array of ints = 1 2 3 5
	MATRIX matrix of ints = (1 2 3
				1 2 3)
	some string = blah blah blah
	sample subtree
	{
		#\ a comment
		element
		{
			name = foo
		}
		#\ another comment
		element
		{
			name = bar
		}
	}
	a double = 0.3 #\ with a comment
}

# note: at any time, try "print node" or "print node.data" to double-check what's there.

------- Building the tree -------------------------

>>> import qubtree

#        creating the root node:
>>> tree = qubtree.Node("Sample")

#        accessing a child node by name -- it will be created and appended if it doesn't exist
>>> child = tree["array of ints"]

#        putting data in the child node
>>> child.data = [1, 2, 3, 5]

#        adding a node with matrix data (reusing the QUB_Tree variable)
>>> child = tree["matrix of ints"]
>>> child.data.setup( qubtree.QTR_TYPE_INT, 2, 3 )

#        filling the data
>>> for row in [child.data.row(i) for i in range( child.data.rows )]:
...    for j in range( len(row) ):
...       row[j] = j+1

#        adding a node with string data
tree["some string"].data = "blah blah blah"

#        adding a subtree
>>> subtree = tree["sample subtree"]
>>> child = subtree.append("element")
>>> child["name"].data = "foo"

#        cloning an "element"
>>> child = subtree.appendClone( child );
>>> child["name"].data = "bar"

#        inserting a comment line (which is represented as a node)
#        as the first child
>>> subtree.insert( qubtree.CommentNode( " a comment" ) )
#        and after the first "element"
>>> subtree.insert( subtree["element"], qubtree.CommentNode( " another comment" ) )

#        adding a node with one double as data
>>> child = tree["a double"];
>>> child.data = 0.3
>>> child.lineComment = " with a comment"


-------- Saving and reopening ---------------------------------

#        saving in binary -- the file remains open until you close it
>>> if not tree.saveAs("sample.qtr"):
...    print "can't save"

#        saving a copy as text -- the binary file remains open
>>> if not tree.saveTextCopy("sample.qtt"):
...    print "can't save text"

#        do some modifications here ...
>>> tree.insert("abc").data = 123

#        saving changes to disk
>>> tree.save()

#        closing the binary file
>>> tree.close()                                  1

#        reading the text file
>>> tree = qubtree.ReadText("sample.qtt")

#        opening the binary file for reading and writing
>>> tree = qubtree.Open("sample.qtr")
>>> if tree.isNull:
...    print "couldn't open"


-------- Reading and modifying data ---------------------------------

#        reading some data
>>> print tree["some string"].data
blah blah blah
>>> print tree["a double"].data
0.3

#        data as a list
>>> idata = tree["array of ints"].data
>>> for i in idata:
...    print i,
1 2 3 5
>>> idata[0] = 0
>>> print idata
0 2 3 5

#        data as a matrix
#        by row
>>> child = tree["matrix of ints"]
>>> row = child.data.row(0)
>>> row[2] = 6
#        by column
>>> col = child.data.col(1)
>>> col[0] = 8
>>> print child.data
(1   8   6
 1   2   3)

#        replacing data
>>> child = tree["pqz"];
>>> child.data = "foo"
>>> child.data.setup( qubtree.QTR_TYPE_DOUBLE, 3, 2 )
(0.0    0.0
 0.0    0.0
 0.0    0.0)

#        resizing the number of rows (so it becomes 4x2)
>>> child.data.resize( 4 )

#        removing data
>>> child.data.clear()


-------- Working with large data --------------------------------------

In this section we create a QDF file, but never is the entire data in memory at once.

>>> qdf = qubtree.Node("DataFile")
>>> if qdf.saveAs("sample.qdf")
...
...    # note that saveAs is the first step;
...    # this establishes storage so we can selectively load data
...
...    # set up structure and metadata
...    segment = qdf["Segments"]["Segment"]
...    segment["StartTime"].data = 0.0
...    channels = segment["Channels"]
...
...    qdf["Sampling"].data = 5.0e-5        # in seconds
...    qdf["Scaling"].data = 320.0          # the decimal point matters, since floating point is expected
...    qdf["ADChannelCount"].data = 2
...    qdf["ADDataSize"].data = 2           # in bytes
...    qdf["ADDataType"].data = qubtree.QTR_TYPE_SHORT
...
...    # fill channels with 2 channels of 5 megapoints each;
...    # each row is a sample; each column is a channel;
...    # set PRELOAD to false first, so no RAM is allocated.
...
...    channels.data.preload = 0
...    channels.data.setup( qubtree.QTR_TYPE_SHORT, 5000000, 2 )
...
...    # in a fit of lameness, just set the data points to zero, 100000 samples at a time
...    firstChan = channels.data.col(0)
...    secondChan = channels.data.col(1)
...
...    for firstRow in range(0, 5000000, 100000):
...       channels.data.loadRows(firstRow, firstRow + 100000 - 1)
...       for i in range(firstRow, firstRow + 100000):
...          firstChan[i], secondChan[i] = 0.0, 0.0
...
...    channels.data.unload()
...
...    qdf.save()
...
... else:
...    print "can't save as sample.qdf"


----------- Working with child nodes ----------------------------------

#       tree.find(name) vs. tree[name]
tree[name] looks for the first child with that name; if none, it creates one automatically.
tree.find(name) acts identically if such a child exists, but if none, it returns a null node.

#       seeing if tree has a child named 'qaz'
>>> if tree.find('qaz').isNull:
...    print "there is no child named 'qaz'"

#       printing the name of each child node:
>>> child = tree.child
>>> while not child.isNull:
...   print child.name
...   child = child.sibling

#       printing the "name" of each "element" in "sample subtree"
>>> child = tree['sample subtree'].find('element')
>>> while not child.isNull:
...   print child['name'].data
...   child = child.sibling('element')


---------------------------------------------------------------------------


#        Making a copy of the whole tree
>>> treecopy = tree.clone()

#        or just the node and its data
>>> treecopy = tree.clone( false )

1

Recall that nodes are reference-counted: they are released when the last reference expires.
The flip side is that nodes stay valid as long as they are referenced, even if the
file has been closed.  Not a big deal when all the data is in memory anyway, but
for files with large partially-loaded data you could end up with the whole thing in RAM by mistake.

Calling tree.close() guarantees the whole thing will stay in memory until "tree" goes out of scope.

To avoid this problem, don't call close().  Files are closed automatically when no
variables reference them, so either let the variables go out of scope, or "del tree"

>>> del child
>>> del subtree
>>> del tree

Notice that the root was the last to go.  Otherwise subtree would still have been referenced
when the file closed, and would have jumped into RAM in order to stay valid.

The old python qubtree tutorial:

  not as complete, but maybe more appropriate for the interpreter

*** this document may contain out-of-date API calls.  Please double-check
*** methods and attributes before using them

* creating nodes
	>>> import qubtree
	>>> A = qubtree.Node('A')		# makes a node named 'A'

* manipulating a node's name
	>>> print A.name
	'A'
	>>> a.name = 'AA'

* linking nodes
	>>> B = qubtree.Node('B')
	>>> A.append(B)			# makes B a child of A
	>>> BB = A.child
	>>> BB.name = 'BB'
	>>> print B.name
	'BB'				# notice that B and A.child refer to the same node
	>>> print B.parent.name
	'AA'

* siblings
	>>> A.append( qubtree.Node('C') )
	>>> print A
	AA
	{
		BB
		C
	}
	>>> print B.sibling.name
	'C'

* inserting a new child
	>>> A.list()			# list the names of A's children
	['BB', 'C']
	>>> A.insert( qubtree.Node('X') )	# insert as the first child
	>>> A.list()
	['X', 'BB', 'C']
	>>> A.insert( qubtree.Node('Y'), B ) # insert after B
	>>> A.list()
	['X', 'BB', 'Y', 'C']

* removing children
	>>> A.remove( A['X'] )
	>>> A.list()
	['BB', 'Y', 'C']

* duplicating nodes and trees
	>>> A2 = A.clone()		# deep-clone default: true: copy the tree rooted at A
	>>> A2.list()
	['BB', 'Y', 'C']
	>>> A3 = A.clone(0)		# deep-clone = false: copy only this node and its data
	>>> A3.list()
	[]

* null nodes
	a node object is null if it does not refer to a valid node:
	>>> C.isNull
	0
	>>> C.sibling.isNull		# C has no sibling (it is the last child of A)
	1

* iterating over the children of a node
	>>> child = A.child		# the first child of A
	>>> while not child.isNull:
	...	print child.name,
	...	child = child.sibling
	BB Y C

* working with child nodes by name
	>>> D = A.find('D')		# returns the first child of A named 'D', or a null node
	>>> D.isNull
	1
	>>> D = A['D']			# like A.find('D'), but if there is no 'D' it creates one and appends it, then returns it
	>>> D.isNull
	0
	>>> print C.sibling.name
	'D'

* working with child nodes by number
	>>> A[1].name
	'Y'

	please don't do this in for loops.
	A[3] is equivalent to a.child.sibling.sibling.sibling and takes just as long.

* iterating over child nodes with the same name
	>>> child = A.find('X')
	>>> while not child.isNull:
	...	# do something with child
	...	child = child.next('X')

* node data
	can be empty, a string, or a matrix of numbers:
	>>> C.data = None		# make it empty
	>>> C.data = 'foo'		# make it a string
	>>> C.data = [1, 2, 3]		# make it a 3x1 matrix of ints
	>>> C.data = [1.0, 2, 3]	# make it a 3x1 matrix of floats
	>>> C.data = 5			# make it a 1x1 matrix of integer

        >>> C.data.setup( QTR_TYPE_INT, 3, 2 ) # make room for a 3x2 int matrix

	>>> C.data.rows
	3
	>>> C.data.cols
	2

	can be treated like a list
	>>> C.data[0] = 3
	>>> print C.data[0]
	3

	note that when you assign to .data, the right-hand side is copied into C:
	>>> arr = [1, 2]
	>>> C.data = arr
	>>> arr[0] = 5			# changing the original doesn't affect the node's data
	>>> print C.data[0]
	1

	>>> print str( C.data )		# converts it to a string if it wasn't already
	1	2

	>>> len( C.data ) == C.data.count # two ways of getting the number of elements/characters; both == (rows * cols)
	1
	
	fancy list features like append don't work with node data;
	you have to explicitly resize it:
	>>> C.data.resize( 4 )         # changes the number of rows


* comments
	>>> D.lineComment = ' something'
	>>> print D
	D #\ something
	>>> print D.lineComment
	 something

	comments are stored as specially-named children of the node they're attached to.


* trees on disk
	can be stored as text (for human readability) or binary (for reliability)

	>>> A.saveAsText('foo.qtt')
	-------------- foo.qtt on disk contains -------
	AA
	{
		BB
		Y
		C = 1	2	42389423
		D
	}
	-----------------------------------------------
()	>>> A2 = qubtree.read_text('foo.qtt')
	>>> print A2
	AA
	{
		BB
		Y
		C = 1	2	42389423
		D #\ something
	}

	the text format is convenient for diagnostics, but is not always re-read accurately.
	you should save trees in binary if they will be re-read by computer:
	>>> A.saveAsBinary('foo.qtr')
	>>> A2 = qubtree.read_bin('foo.qtr')
	>>> print A2
	AA
	{
		BB
		Y
		C = 1	2	42389423
		D #\ something
	}

* text-format extras
	data can be a multi-line string.  '\' at the end of a line means it continues onto the next line.
	if one of your lines already ends in '\' the behavior is undefined.

	if data is an array that's too big to print on one line, it can be encased in parentheses:
	>>> A.data = range(20)
	>>> print A
	AA = (0	1	2	3	4	5	6	7	8	9
		10	11	12	13	14	15	16	17	18	19)

	adjacent leaf nodes with array data of the same length can be multiplexed:
	>>> BB.data = range(10)
	>>> BB.sibling.data = range(10,20)
	>>> print A
	AA = (0	1	2	3	4	5	6	7	8	9
		10	11	12	13	14	15	16	17	18	19)
	{
		(BB	Y
		 0	10
		 1	11
		 2	12
		 3	13
		 4	14
		 5	15
		 6	16
		 7	17
		 8	18
		 9	19)
		C = 1	2	42389423
		D #\ something
	}
	


* a note about printing trees
	QUB's Python Scripts window slows way down with a lot of text.
	the Report window doesn't have this problem.
	to print a tree to the report window:
	>>> QUB.Report(myTree)