aboutsummaryrefslogtreecommitdiffstats
path: root/mpm/python/usrp_mpm/bfrfs.py
diff options
context:
space:
mode:
authorMartin Braun <martin.braun@ettus.com>2017-10-09 17:03:29 -0700
committerMartin Braun <martin.braun@ettus.com>2017-12-22 15:04:02 -0800
commit221805d8b15bd70dbe4528a91e1f955b8e43f36e (patch)
tree88948efdd7abcf88ba3e351407a32eae15415228 /mpm/python/usrp_mpm/bfrfs.py
parent35dc86aaa5c880641b80f494f044f21963207dbc (diff)
downloaduhd-221805d8b15bd70dbe4528a91e1f955b8e43f36e.tar.gz
uhd-221805d8b15bd70dbe4528a91e1f955b8e43f36e.tar.bz2
uhd-221805d8b15bd70dbe4528a91e1f955b8e43f36e.zip
mpm: Add BufferFS serialization library
BufferFS is a serialization format with CRC checking and optional byte-alignment for records. It allows storing arbitrary blobs, together with a 8-character identifier, in a contiguous buffer that supports random access. This is suitable for storing arbitrary blobs in EEPROM, but could also support other things. Signed-off-by: Martin Braun <martin.braun@ettus.com>
Diffstat (limited to 'mpm/python/usrp_mpm/bfrfs.py')
-rw-r--r--mpm/python/usrp_mpm/bfrfs.py375
1 files changed, 375 insertions, 0 deletions
diff --git a/mpm/python/usrp_mpm/bfrfs.py b/mpm/python/usrp_mpm/bfrfs.py
new file mode 100644
index 000000000..78930913d
--- /dev/null
+++ b/mpm/python/usrp_mpm/bfrfs.py
@@ -0,0 +1,375 @@
+#
+# Copyright 2017 Ettus Research (National Instruments)
+#
+# SPDX-License-Identifier: GPL-3.0
+#
+"""
+BufferFS. Serialization mini-library meant for use with EEPROMs.
+"""
+
+from __future__ import print_function, division
+import copy
+import struct
+import zlib
+from collections import OrderedDict
+from builtins import str
+from six import itervalues
+
+DEFAULT_ALIGNMENT = 1024 # bytes
+
+def align_addr(addr, align_to):
+ """
+ Align an address to an alignment boundary, rounding upwards.
+
+ >>> align_addr(5, 8)
+ 8
+ >>> align_addr(21, 8)
+ 24
+ >>> align_addr(16, 16)
+ 16
+ """
+ div, mod = divmod(addr, align_to)
+ return (div + (1 if mod else 0)) * align_to
+
+def _normalize_byte_str(identifier, length=8, pad=b'\0'):
+ " Guarantees that identifier is byte-string of length 'length' "
+ identifier = bytes(identifier, 'ascii')
+ if len(identifier) < length:
+ identifier = identifier + pad * (length - len(identifier))
+ return identifier[:length]
+
+class BufferFS(object):
+ """
+ Buffer-FS -- Map dictionaries of arbitrary byte strings into a buffer.
+
+ This can be useful for storing arbitrary blobs in EEPROMs, but the
+ implementation is not specific to EEPROMS.
+
+ Effectively, this is a serialization class with some CRC checking and byte-
+ alignment. Something like pickle will often also do the trick.
+
+ Arguments:
+ raw_data_buffer -- A copy of the original buffer
+ max_size -- Max length of the buffer in bytes. raw_data_buffer may be
+ smaller than this.
+ alignment -- This will align blobs to certain address boundaries.
+ log -- Logger object. If none is given, one will be created.
+ """
+ magic = b'TofC'
+ default_header = ("!4s I", ('magic', 'version'))
+ default_version = 0
+
+ # Version 0 TOC:
+ # --------------
+ # 4 bytes magic number
+ # 4 bytes magic version (note: Up to here, all versions of the TOC will
+ # look the same)
+ # 4 bytes num entries
+ # <entries>
+ # 4 bytes CRC of entire TOC
+ #
+ # Version 0 Entry:
+ # --------------
+ # 4 bytes base address
+ # 4 bytes length in bytes
+ # 4 bytes CRC of entry
+ # 8 bytes identifier (string, can use nulls to be shorter than 8 bytes)
+ entry = {
+ 0: ("!I I I 8s", ('base', 'length', 'CRC', 'id')),
+ }
+
+
+ def __init__(self, raw_data_buffer, max_size=None, alignment=None, log=None):
+ self.max_size = len(raw_data_buffer) if max_size is None else max_size
+ self.raw_data_buffer = raw_data_buffer[:self.max_size]
+ self.alignment = alignment or DEFAULT_ALIGNMENT
+ self.pad = b'\xFF'
+ if log is None:
+ from usrp_mpm import mpmlog
+ self.log = mpmlog.get_main_logger().getChild("EEPROMFS")
+ else:
+ self.log = log
+ header = self._parse_header(raw_data_buffer)
+ self.log.trace("EEPROM-FS header has {} valid entries.".format(
+ len(header.get('entries', []))))
+ self.entries = OrderedDict({
+ str(x['id'], encoding='ascii'): x for x in header.get('entries', [])
+ })
+ self.buffer = self._trunc_buffer(raw_data_buffer, self.entries)
+ self.log.trace("Truncated buffer to length %d", len(self.buffer))
+ # Start storing entries at 128
+ self.entries_base = 128
+ # We can only store so many entries before running out of TOC space
+ self.max_entries = (128 - 16) // 20
+ # TODO -- these last two shouldn't be hard coded
+ self.log.trace(
+ "This BufferFS has {} max entries, starting at {}".format(
+ self.max_entries, self.entries_base))
+
+ def _parse_header(self, buf):
+ """
+ Read the buffer and return header info as a list of dictionaries.
+ """
+ default_hdr_struct = struct.Struct(self.default_header[0])
+ if len(buf) < default_hdr_struct.size + 8:
+ return {}
+ default_hdr_unpacked = default_hdr_struct.unpack_from(buf)
+ hdr = dict(list(zip(self.default_header[1], default_hdr_unpacked)))
+ # There might be no EEPROM-FS, that's legit:
+ if hdr['magic'] != self.magic:
+ self.log.trace(
+ "No Buffer-FS magic found (was: `{}'), " \
+ "returning empty dict.".format(hdr['magic']))
+ return {}
+ self.log.trace("Buffer has correct magic word...")
+ # The rest of this function assumes version is 0:
+ toc_version = int(hdr['version'])
+ self.log.trace("Found ToC version: 0x{}".format(toc_version))
+ assert toc_version == 0
+ num_entries_struct = struct.Struct('!I')
+ num_entries = num_entries_struct.unpack_from(
+ buf, offset=default_hdr_struct.size
+ )[0]
+ self.log.trace("Header declares num entries: {}".format(num_entries))
+ toc_offset = default_hdr_struct.size + num_entries_struct.size
+ self.log.trace("TOC offset: {}".format(toc_offset))
+ entry_struct = struct.Struct(self.entry[toc_version][0])
+ entries = []
+ for entry_idx in range(num_entries):
+ entry_offset = toc_offset+entry_idx*entry_struct.size
+ entry_unpacked = \
+ entry_struct.unpack_from(
+ buf,
+ offset=entry_offset
+ )
+ entries.append(
+ dict(list(zip(self.entry[toc_version][1], entry_unpacked)))
+ )
+ entries[-1]['id'] = entries[-1]['id'].rstrip(b'\0')
+ self.log.trace("TOC has %d entries (CRC un-checked)", len(entries))
+ crc_offset = toc_offset + num_entries * entry_struct.size
+ self.log.trace("TOC CRC offset: %d", crc_offset)
+ crc_struct = struct.Struct('!I')
+ crc = crc_struct.unpack_from(buf, offset=crc_offset)[0]
+ self.log.trace("Calculating TOC CRC32 on %d bytes...", crc_offset)
+ expected_crc = zlib.crc32(buf[:crc_offset])
+ if crc != expected_crc:
+ self.log.warning(
+ "EEPROM-FS Header CRC failed! " \
+ "Read: {:08X} Expected: {:08X}".format(crc, expected_crc))
+ return hdr
+ self.log.trace("CRC32 matches.")
+ hdr['entries'] = entries
+ return hdr
+
+ def _trunc_buffer(self, buf, entries):
+ """
+ Return the shortest possible buf that contains all entries.
+ """
+ max_len = max([
+ x['base'] + x['length'] for x in itervalues(entries)
+ ] + [0])
+ return buf[:max_len]
+
+
+ def get_blob(self, identifier, entries=None, buf=None):
+ """
+ Return blob by ID.
+
+ By default, will use the internal entries table and buffer.
+ """
+ entries = entries or self.entries
+ buf = buf or self.buffer
+ if identifier not in entries:
+ raise RuntimeError("Requesting non-existent blob {}!".format(
+ identifier))
+ entry_info = entries[identifier]
+ entry_base = entry_info['base']
+ entry_len = entry_info['length']
+ entry_buf = buf[entry_base:entry_base+entry_len]
+ entry_crc = zlib.crc32(entry_buf)
+ self.log.trace("Calculating blob CRC32 on %d bytes...", len(entry_buf))
+ if entry_crc != entry_info['CRC']:
+ raise RuntimeError(
+ "Entry `{}' has CRC mismatch! " \
+ "Calculated {:08X}, expected {:08X}.".format(
+ identifier, entry_crc, entry_info['CRC']
+ )
+ )
+ return entry_buf
+
+ def has_blob(self, identifier):
+ """
+ Returns True if the blob 'identifier' exists.
+ """
+ return self.entries.has_key(identifier)
+
+ def set_blob(self, identifier, blob):
+ """
+ Add a blob to the list.
+ """
+ self.log.trace("Attempting to add new blob `{}'...".format(identifier))
+ identifier = _normalize_byte_str(identifier, length=8)
+ identifier_str = str(identifier.rstrip(b'\0'), encoding='ascii')
+ if identifier_str not in self.entries and \
+ len(self.entries) >= self.max_entries:
+ self.log.error("Exceeded entry limit.")
+ raise RuntimeError("Exceeded entry limit.")
+ entry_info = {
+ 'CRC': zlib.crc32(blob),
+ 'length': len(blob),
+ 'id': identifier,
+ }
+ alignment = self.alignment
+ self.log.trace("Byte-alignment is {}".format(alignment))
+ new_entries = copy.copy(self.entries)
+ entry_base = self._find_base(entry_info, new_entries,
+ alignment=alignment)
+ self.log.trace("First attempt at finding a base yields: {}".format(
+ entry_base
+ ))
+ new_entries.pop(identifier, None)
+ if entry_base is None:
+ self.log.trace("First attempt to find a spot failed.")
+ space_occupied = self._calc_space_occupied(
+ new_entries,
+ alignment=alignment
+ )
+ self.log.trace("Current blobs are occupying {} bytes.".format(
+ space_occupied
+ ))
+ if space_occupied + entry_info['length'] > self.max_size:
+ raise RuntimeError("Not enough space to store blob!")
+ new_entries, new_buffer = \
+ self._pack_entries(new_entries, self.buffer, alignment)
+ entry_base = self._find_base(
+ entry_info,
+ new_entries,
+ alignment=alignment
+ )
+ self.log.trace("2nd attempt at finding a base yields: {}".format(
+ entry_base
+ ))
+ if entry_base is None:
+ raise RuntimeError("Unexpected failure trying to park new blob!")
+ self.buffer, self.entries = new_buffer, new_entries
+ entry_info['base'] = entry_base
+ if len(self.buffer) < entry_base:
+ self.buffer += self.pad * (entry_base - len(self.buffer))
+ assert len(self.buffer) >= entry_base
+ self.entries[identifier_str] = entry_info
+ buf_base = \
+ self.buffer[:self.entries_base] + \
+ self.pad * (self.entries_base - len(self.buffer[:self.entries_base]))
+ assert len(buf_base) == self.entries_base
+ self.log.trace("Updating TOC...")
+ buf_base = self._update_toc(self.entries, buf_base)
+ self.log.trace("Splicing new blob into buffer...")
+ assert len(buf_base) == self.entries_base
+ self.buffer = self._trunc_buffer(
+ buf_base \
+ + self.buffer[len(buf_base):entry_base] \
+ + blob \
+ + self.buffer[entry_base+entry_info['length']:],
+ self.entries,
+ )
+
+
+ def _find_base(self, new_entry, entries, alignment):
+ """
+ Find a spot to park a new entry.
+
+ If it's actually the same ID as an existing entry, try and re-use that
+ space. If the previous entry was smaller, and there's another entry
+ following, move the entry towards the end.
+
+ If it can't overwrite an existing entry, or append (because of space
+ limitations), don't try and be smart. Just return None.
+ """
+ entry_id = str(new_entry['id'].rstrip(b'\0'), encoding='ascii')
+ entry_len = new_entry['length']
+ self.log.trace(
+ "Trying to find a spot for blob `%s' of length %d",
+ entry_id, entry_len
+ )
+ if entry_id in entries and \
+ (entry_len <= entries[entry_id]['length'] or \
+ entries[entry_id]['base'] == \
+ max((x['base'] for x in itervalues(entries)))
+ ):
+ self.log.trace(
+ "Blob was already in index, reusing address %d",
+ entries[entry_id]['base'],
+ )
+ return entries[entry_id]['base']
+ last_base = \
+ max([x['base'] + x['length'] for x in itervalues(entries)] \
+ + [self.entries_base])
+ self.log.trace("New entry needs to go after address %d", last_base)
+ new_base = align_addr(last_base, alignment)
+ self.log.trace("New address is: %d (Alignment is: %d)",
+ new_base, alignment)
+ if new_base + entry_len < self.max_size:
+ return new_base
+ self.log.debug(
+ "New base address %d and length %d would exceed EEPROM size",
+ new_base, entry_len
+ )
+ return None
+
+ def _calc_space_occupied(self, entries, alignment):
+ """
+ Returns the number of bytes required to store TOC and entries, given
+ a certain alignment.
+ """
+ return sum(
+ [align_addr(x['length'], alignment) for x in itervalues(entries)],
+ align_addr(self.entries_base, alignment),
+ )
+
+ def _pack_entries(self, entries_, buf, alignment):
+ """
+ Reorder entries to minimize fragmentation, then return a new buf
+
+ Note: This is not going to try and be smart. In whatever order the
+ blobs are stored, they will stay in that order. Reordering could be
+ better given a certain alignment, but that's "room for improvement".
+ """
+ raise NotImplementedError("tbi") # FIXME
+ # Algorithm is fairly simple:
+ # - Copy all entries_ into a new dict entries
+ # entries = copy.copy(entries_)
+ # - Read all blobs from buf, make another dictionary id -> blob,
+ # storing all the blobs
+ # - Go through the entries in order, recalculate base addresses such
+ # that they are maximally packed.
+ # First address is self.entries_base, second base address is
+ # align_addr(first_entry_base + len(first_blob)), third address is
+ # align_addr(second_entry_base + len(second_blob)), and so on
+ # - Then, create a string that consists of a new TOC, and all the blobs
+ # with appropriate padding
+
+ def _update_toc(self, entries, toc_buf):
+ """
+ Returns a new TOC buffer based on entries.
+ """
+ toc_version = 0 # This method is hardcoded to version 0
+ # Not a great example of generic SW design
+ entries_sorted = sorted(entries.values(), key=lambda x: x['base'])
+ new_toc = \
+ struct.Struct(self.default_header[0]).pack(self.magic, 0) + \
+ struct.Struct('!I').pack(len(entries))
+ entry_struct = struct.Struct(self.entry[toc_version][0])
+ for entry_info in entries_sorted:
+ new_toc += entry_struct.pack(
+ entry_info['base'],
+ entry_info['length'],
+ entry_info['CRC'],
+ entry_info['id'],
+ )
+ self.log.trace("Calculating new TOC CRC32 on %d bytes...", len(new_toc))
+ new_toc_crc = zlib.crc32(new_toc)
+ new_toc += struct.Struct('!I').pack(new_toc_crc)
+ assert len(new_toc) < self.entries_base
+ return new_toc + toc_buf[len(new_toc):]
+