Chunker provides simple APIs to help parsing chunk-based file structure such as Zip/PNG/JPEG/WAV, etc.
You simply define the structure and Chunker handles the rest.
An example printing out file names in a zip file:
class InsideFileChunk(Chunk):
Fields = (
UnsignedLongField('signature'),
UnsignedShortField('min_ver'),
UnsignedShortField('general_purpose'),
UnsignedShortField('compress_method'),
UnsignedShortField('last_modified_time'),
UnsignedShortField('last_modified_date'),
UnsignedLongField('crc32'),
UnsignedLongField('compressed_size'),
UnsignedLongField('uncompressed_size'),
UnsignedShortField('filename_length'),
UnsignedShortField('extra_field_length'),
StringField('filename', 'filename_length'),
SkipBasedOnLengthField('extra_field', 'extra_field_length'),
SkipBasedOnLengthField('data', 'compressed_size'), # No need to read file content
)
@classmethod
def safe_matches(cls, fp):
# Each file chunk starts with 0x04034b50
buf = fp.read(4)
sig = struct.unpack('<L', buf)[0]
return sig == 0x04034b50
class SkipTheLeftChunk(ToTheEndChunk):
Fields = (
UnsignedLongField('signature'),
)
@classmethod
def safe_matches(cls, fp):
buf = fp.read(4)
sig = struct.unpack('<L', buf)[0]
return sig != 0x04034b50
def populate(self):
# Skip the left
self.fp.seek(0, os.SEEK_END)
class ZipParser(FileParser):
ChunkClasses = (
InsideFileChunk, # Chunk indicating files in the zip package.
SkipTheLeftChunk, # We have no interest in other chunk types at the end of the zip file.
)
p = ZipParser('test.zip')
p.parse()
for chunk in p.chunks:
if isinstance(chunk, InsideFileChunk):
print chunk.filename