Chunker - Easy Chunk-Based File Structure ParsingΒΆ

Chunker provides simple APIs to help parsing chunk-based file structure such as Zip/PNG/JPEG/WAV, etc.

You simply define the structure and Chunker handles the rest.

An example printing out file names in a zip file:

class InsideFileChunk(Chunk):
    Fields = (
        UnsignedLongField('signature'),
        UnsignedShortField('min_ver'),
        UnsignedShortField('general_purpose'),
        UnsignedShortField('compress_method'),
        UnsignedShortField('last_modified_time'),
        UnsignedShortField('last_modified_date'),
        UnsignedLongField('crc32'),
        UnsignedLongField('compressed_size'),
        UnsignedLongField('uncompressed_size'),
        UnsignedShortField('filename_length'),
        UnsignedShortField('extra_field_length'),
        StringField('filename', 'filename_length'),
        SkipBasedOnLengthField('extra_field', 'extra_field_length'),
        SkipBasedOnLengthField('data', 'compressed_size'),  # No need to read file content
    )

    @classmethod
    def safe_matches(cls, fp):
        # Each file chunk starts with 0x04034b50
        buf = fp.read(4)
        sig = struct.unpack('<L', buf)[0]
        return sig == 0x04034b50

class SkipTheLeftChunk(ToTheEndChunk):
    Fields = (
        UnsignedLongField('signature'),
    )

    @classmethod
    def safe_matches(cls, fp):
        buf = fp.read(4)
        sig = struct.unpack('<L', buf)[0]
        return sig != 0x04034b50

    def populate(self):
        # Skip the left
        self.fp.seek(0, os.SEEK_END)

class ZipParser(FileParser):
    ChunkClasses = (
        InsideFileChunk,    # Chunk indicating files in the zip package.
        SkipTheLeftChunk,   # We have no interest in other chunk types at the end of the zip file.
    )

p = ZipParser('test.zip')
p.parse()
for chunk in p.chunks:
    if isinstance(chunk, InsideFileChunk):
        print chunk.filename

Next topic

Parsers

This Page