Part of QUB Online
/* typedefStruct.js -- access slices of a DataView as C-style structs.
Intended for reading and writing binary file formats.
by Christopher Nicolai, 2017
Suppose you have a header file describing some binary file format, e.g.
#pragma once
#pragma pack(1)
// This file is always saved in little-endian byte order
typedef struct {
char[4] magic;
i16_t version;
i16_t flags;
f32_t value1;
f64_t value2;
} SimpleFile;
You also have an ArrayBuffer with the contents of such a file, e.g.
fileInput.addEventListener('change', inputEvent => {
const fileReader = new FileReader;
fileReader.addEventListener('load', readerEvent => {
const arraybuffer = readerEvent.target.result;
// arraybuffer has the contents of file loaded via fileInput (<input type="file">)
});
fileReader.readAsArrayBuffer(fileInput.files[0]);
});
Here's how to read it:
// define the struct class
const SimpleFile = typedefStruct([
['c[4]', 'magic'],
['i16', 'version'],
['i16', 'flags'],
['f32', 'value1'],
['f64', 'value2']
], true); // true: little-endian (by default, structs are big-endian (network byte order))
// make a DataView on the arraybuffer
const dataview = new DataView(arraybuffer);
// make a struct instance operating at offset 0 in the dataview
const myFile = new SimpleFile(dataview, 0);
// find out the size of the struct:
const byteLength = myFile.byteLength;
// read one field at a time
const value1 = myFile.value1;
// or all fields, copied into a plain JS object:
const myValues = myFile.copy();
// (so we can release the arraybuffer asap and save memory)
// also, edit fields:
myFile.value1 = 1.0;
myFile.magic = 'abcd';
We identify most types by the letter 'i' (integer), 'u' (unsigned) or 'f' (floating-point) followed by the bit width,
e.g. 'i8' or 'f64'. We also define special character types 'c', 'd', and 'e' for utf-8, -16, and -32 respectively, and
'p' for pascal strings. On their own, they are synonyms for the appropriate-sized unsigned int, but with array notation
they are treated specially as strings.
Array notation: follow a typename with dimension in brackets to define a (1D) array, e.g.
['i16[8]', 'eightShorts'],
['c[8]', 'eightBytesOfUTF8'],
['d[8]', 'eightBytesOfUTF16'],
['e[8]', 'eightBytesOfUTF32'],
['p[8]', 'oneByteOfStringLengthFollowedBySevenAsciiChars']
...
const [a,b,c,d,e,f,g,h] = myFile.eightShorts;
myFile.eightBytesOfUTF8 = 'testing';
Note that the entire array or string is read or written each time you access it, so batch accesses where possible.
Appending a '*' to the typename treats its value as a 32-bit unsigned pointer. While there is no sensible way to interpret
such pointers, we've included them because they are found in some file headers.
Tip: if someone was careless about (not) declaring struct packing, their compiler may have inserted gaps between fields.
Looking at a hex dump of the file can help locate gaps. They are usually inserted to make the next field start on a multiple
of 4, 8, or 16 bytes. If there are gaps, define extra fields in typedefStruct(), e.g.
['i16', 'myShort'],
['i16', 'packing1'], // inserted to mirror a gap
['i32', 'myInt']
Requires Mozilla StringView (https://developer.mozilla.org/en-US/Add-ons/Code_snippets/StringView) for UTF parsing
(types 'c[]', 'd[]', and 'e[]' only).
*/
const typedefStruct = (() => {
'use strict';
const getters = {
c: 'getUint8', // utf8
d: 'getUint16', // utf16
e: 'getUint32', // utf32
p: 'getUint8', // pascal string
i8: 'getInt8',
i16: 'getInt16',
i32: 'getInt32',
i64: 'getInt64',
u8: 'getUint8',
u16: 'getUint16',
u32: 'getUint32',
u64: 'getUint64',
f32: 'getFloat32',
f64: 'getFloat64',
v: 'getVoid'
};
const setters = {
c: 'setUint8',
d: 'setUint16',
e: 'setUint32',
p: 'setUint8',
i8: 'setInt8',
i16: 'setInt16',
i32: 'setInt32',
i64: 'setInt64',
u8: 'setUint8',
u16: 'setUint16',
u32: 'setUint32',
u64: 'setUint64',
f32: 'setFloat32',
f64: 'setFloat64',
v: 'setVoid'
};
function addPointerTypes(accessors, depth, sizeof_p) {
sizeof_p = sizeof_p || 32;
Object.keys(accessors).forEach(field => {
accessors[field+'*'] = accessors[field][0]+'etUint'+sizeof_p;
});
depth && addPointerTypes(accessors, (depth|0)-1, sizeof_p);
};
addPointerTypes(getters, 2);
addPointerTypes(setters, 2);
const sizeof = {
c: 1, d: 2, e: 4, p: 1
};
Object.keys(getters).forEach(field => {
if ( sizeof[field] ) {
/// already known
} else if ( field.endsWith('*') ) {
sizeof[field] = 4;
} else {
sizeof[field] = (field.slice(1)|0) >> 3;
}
});
const arrayExpr = /([^[]+)\[(.*)\]/;
function typedefStruct(spec, littleEndian) {
// Returns a class, which can be instantiated at an offset into a DataView,
// whose fields are defined in spec, an array of [typename, fieldname].
// Each fieldname becomes an instance property, with getters and setters that
// directly manipulate the DataView contents.
const le = !! littleEndian;
return class Struct {
constructor(dataview, offset) {
let off = offset;
for (const [typ, nm] of spec) {
let o = off,
arrayMatch = typ.match(arrayExpr);
if ( arrayMatch ) {
let [atyp, adim] = [arrayMatch[1], (arrayMatch[2]|0)];
adim = adim|0;
const getterName = getters[atyp];
const setterName = setters[atyp];
const sz = sizeof[atyp];
off += adim*sz;
if ( ['c', 'd', 'e'].indexOf(atyp) >= 0 ) {
const encoding = {c: 'UTF-8', d: 'UTF-16', e: 'UTF-32'}[atyp];
const sView = new StringView(dataview.buffer, encoding, o, adim);
Object.defineProperty(this, nm, {
get: () => sView.toString(),
set: s => {
sView.rawData.fill(0);
sView.rawData.set(new TextEncoder(encoding).encode(s));
}
});
} else if ( atyp === 'p' ) {
Object.defineProperty(this, nm, {
get: () => {
const n = dataview.getUint8(o);
let s = '',
i = 1;
for (; i<=n; ++i) {
s += String.fromCharCode(dataview.getUint8(o+i));
}
return s;
},
set: s => {
const n = Math.min(sz-1, s.length);
dataview.setUint8(o, n);
let i = 1;
for (; i<=n; ++i) {
dataview.setUint8(o+i, s.charCodeAt(i-1));
}
}
});
} else {
Object.defineProperty(this, nm, {
get: () => exports.range(adim).map(i => dataview[getterName](o+i*sz, le)),
set: arr => arr.forEach((v,i) => dataview[setterName](o+i*sz, v, le))
});
}
} else {
off += sizeof[typ];
Object.defineProperty(this, nm, {
get: () => dataview[getters[typ]](o, le),
set: v => dataview[setters[typ]](o, v, le)
});
}
}
this.byteLength = off - offset;
}
copy() {
const c = {};
for (const [typ, nm] of spec) {
c[nm] = this[nm];
}
return c;
}
};
}
return typedefStruct;
})();