import os.path import io import struct import re from binascii import b2a_hex from hexdump import hexdump, asasc, tohex, unhex, strescape from koddecoder import kodecode from readers import ByteReader import zlib from collections import defaultdict """ python3 crodump.py crodump chechnya_proverki_ul_2012 python3 crodump.py kodump -s 6 -o 0x4cc9 -e 0x5d95 chechnya_proverki_ul_2012/CroStru.dat """ def toout(args, data): """ return either ascdump or hexdump """ if args.ascdump: return asasc(data) else: return tohex(data) def enumunreferenced(ranges, filesize): """ from a list of used ranges and the filesize, enumerate the list of unused ranges """ o = 0 for start, end, desc in sorted(ranges): if start > o: yield o, start-o o = end if o Bank # 0400 -> Index or Sys # 0200 -> Stru or Sys # encoding # 0000 # 0001 --> 'KOD encoded' # 0002 # 0003 --> encrypted def readtad(self): self.tad.seek(0) hdrdata = self.tad.read(2*4) self.nrdeleted, self.firstdeleted = struct.unpack("<2L", hdrdata) indexdata = self.tad.read() if self.use64bit: # 01.03 has 64 bit file offsets self.tadidx = [ struct.unpack_from(">24 ln &= 0xFFFFFFF dat = self.readdata(ofs, ln) if not dat: # empty record encdat = dat elif not flags: extofs, extlen = struct.unpack(">24 ln &= 0xFFFFFFF dat = self.readdata(ofs, ln) ranges.append((ofs, ofs+ln, "item #%d" % i)) decflags = [' ', ' '] infostr = "" tail = b'' if not dat: # empty record encdat = dat elif not flags: if self.use64bit: extofs, extlen = struct.unpack("HH", data, o) if flag!=0x800 and flag!=0x008: return o += size + 2 return True def decompress(self, data): result = b"" o = 0 while o < len(data)-3: size, flag, crc = struct.unpack_from(">HHL", data, o) C = zlib.decompressobj(-15) result += C.decompress(data[o+8:o+8+size]) o += size + 2 return result def dump_db_definition(args, dbdict): """ decode the 'bank' / database definition """ for k, v in dbdict.items(): if re.search(b'[^\x0d\x0a\x09\x20-\x7e\xc0-\xff]', v): print("%-20s - %s" % (k, toout(args, v))) else: print("%-20s - \"%s\"" % (k, strescape(v))) class FieldDefinition: def __init__(self, data): self.decode(data) def decode(self, data): self.defdata = data rd = ByteReader(data) self.typ = rd.readword() self.idx1 = rd.readdword() self.name = rd.readname() self.flags = rd.readdword() self.minval = rd.readbyte() # Always 1 if self.typ: self.idx2 = rd.readdword() self.maxval = rd.readdword() # max value or length self.unk4 = rd.readdword() # Always 0x00000009 or 0x0001000d else: self.idx2 = self.maxval = self.unk4 = None self.remaining = rd.readbytes() def __str__(self): if self.typ: return "Type: %2d (%2d/%2d) %04x,(%d-%4d),%04x - %-40s -- %s" % (self.typ, self.idx1, self.idx2, self.flags, self.minval, self.maxval, self.unk4, "'%s'" % self.name, tohex(self.remaining)) else: return "Type: %2d %2d %d,%d - '%s'" % (self.typ, self.idx1, self.flags, self.minval, self.name) class TableDefinition: def __init__(self, data): self.decode(data) def decode(self, data): """ decode the 'base' / table definition """ rd = ByteReader(data) self.unk1 = rd.readword() self.version = rd.readbyte() if self.version > 1: _ = rd.readbyte() # always 0 anyway self.unk2 = rd.readbyte() # if this is not 5 (but 9), there's another 4 bytes inserted, this could be a length-byte. self.unk3 = rd.readbyte() if self.unk2 > 5: # seen only 5 and 9 for now with 9 implying an extra dword _ = rd.readdword() self.unk4 = rd.readdword() self.tableid = rd.readdword() self.tablename = rd.readname() self.abbrev = rd.readname() self.unk7 = rd.readdword() nrfields = rd.readdword() self.headerdata = data[:rd.o] self.fields = [] for _ in range(nrfields): l = rd.readword() fielddef = rd.readbytes(l) self.fields.append(FieldDefinition(fielddef)) self.remainingdata = rd.readbytes() def __str__(self): return "%d,%d<%d,%d,%d>%d %d,%d '%s' '%s'" % ( self.unk1, self.version, self.unk2, self.unk3, self.unk4, self.tableid, self.unk7, len(self.fields), self.tablename, self.abbrev) def dump(self, args): if args.verbose: print("table: %s" % tohex(self.headerdata)) print(str(self)) for field in self.fields: if args.verbose: print("field: @%04x: %04x - %s" % (field.byteoffset, len(field.defdata), tohex(field.defdata))) print(str(field)) if args.verbose: print("remaining: %s" % tohex(self.remainingdata)) def destruct_sys3_def(rd): pass def destruct_sys4_def(rd): n = rd.readdword() for _ in range(n): marker = rd.readdword() description = rd.readlongstring() path = rd.readlongstring() marker2 = rd.readdword() print("%08x;%08x: %-50s : %s" % (marker, marker2, path, description)) def destruct_sys_definition(args, data): """ decode the 'sys' / dbindex definition """ rd = ByteReader(data) systype = rd.readbyte() if systype == 3: return destruct_sys3_def(rd) elif systype == 4: return destruct_sys4_def(rd) else: raise Exception("unsupported sys record") class Database: """ represent the entire database, consisting of Stru, Index and Bank files """ def __init__(self, dbdir): self.dbdir = dbdir self.stru = self.getfile("Stru") self.index = self.getfile("Index") self.bank = self.getfile("Bank") self.sys = self.getfile("Sys") # BankTemp, Int def nrofrecords(self): return len(self.bank.tadidx) def getfile(self, name): try: datname = self.getname(name, "dat") tadname = self.getname(name, "tad") if datname and tadname: return Datafile(name, open(datname, "rb"), open(tadname, "rb")) except IOError: return def getname(self, name, ext): """ get a case-insensitive filename match for 'name.ext'. Returns None when no matching file was not found. """ basename = "Cro%s.%s" % (name, ext) for fn in os.scandir(self.dbdir): if basename.lower() == fn.name.lower(): return os.path.join(self.dbdir, fn.name) def dump(self, args): if self.stru: self.stru.dump(args) if self.index: self.index.dump(args) if self.bank: self.bank.dump(args) if self.sys: self.sys.dump(args) def strudump(self, args): if not self.stru: print("missing CroStru file") return self.dump_db_table_defs(args) def decode_db_definition(self, data): """ decode the 'bank' / database definition """ rd = ByteReader(data) d = dict() while not rd.eof(): keyname = rd.readname() if keyname in d: print("WARN: duplicate key: %s" % keyname) index_or_length = rd.readdword() if index_or_length >> 31: d[keyname] = rd.readbytes(index_or_length & 0x7FFFFFFF) else: refdata = self.stru.readrec(index_or_length) if refdata[:1] != b"\x04": print("WARN: expected refdata to start with 0x04") d[keyname] = refdata[1:] return d def dump_db_table_defs(self, args): """ decode the table defs from recid #1, which always has table-id #3 Note that I don't know if it is better to refer to this by recid, or by table-id. other table-id's found in CroStru: #4 -> large values referenced from tableid#3 """ dbinfo = self.stru.readrec(1) if dbinfo[:1] != b"\x03": print("WARN: expected dbinfo to start with 0x03") dbdef = self.decode_db_definition(dbinfo[1:]) dump_db_definition(args, dbdef) for k, v in dbdef.items(): if k.startswith("Base") and k[4:].isnumeric(): print("== %s ==" % k) tbdef = TableDefinition(v) tbdef.dump(args) def enumerate_tables(self): dbinfo = self.stru.readrec(1) if dbinfo[:1] != b"\x03": print("WARN: expected dbinfo to start with 0x03") dbdef = self.decode_db_definition(dbinfo[1:]) for k, v in dbdef.items(): if k.startswith("Base") and k[4:].isnumeric(): yield TableDefinition(v) def enumerate_records(self, table): """ usage: for tab in db.enumerate_tables(): for rec in db.enumerate_records(tab): print(sqlformatter(tab, rec)) """ for i in range(self.nrofrecords()): data = self.bank.readrec(i+1) if data and data[0] == table.tableid: yield i+1, data[1:].split(b"\x1e") def recdump(self, args): if args.index: dbfile = self.index elif args.sys: dbfile = self.sys elif args.stru: dbfile = self.stru else: dbfile = self.bank if not dbfile: print(".dat not found") return if args.skipencrypted and dbfile.encoding==3: print("Skipping encrypted CroBank") return nerr = 0 nr_recnone = 0 nr_recempty = 0 tabidxref = [0] * 256 bytexref = [0] * 256 for i in range(1, args.maxrecs+1): try: data = dbfile.readrec(i) if args.find1d: if data and (data.find(b"\x1d")>0 or data.find(b"\x1b")>0): print("%d -> %s" % (i, b2a_hex(data))) break elif not args.stats: if data is None: print("%5d: " % i) else: print("%5d: %s" % (i, toout(args, data))) else: if data is None: nr_recnone += 1 elif not len(data): nr_recempty += 1 else: tabidxref[data[0]] += 1 for b in data[1:]: bytexref[b] += 1 nerr = 0 except IndexError: break except Exception as e: print("%5d: <%s>" % (i, e)) if args.debug: raise nerr += 1 if nerr > 5: break if args.stats: print("-- table-id stats --, %d * none, %d * empty" % (nr_recnone, nr_recempty)) for k, v in enumerate(tabidxref): if v: print("%5d * %02x" % (v, k)) print("-- byte stats --") for k, v in enumerate(bytexref): if v: print("%5d * %02x" % (v, k)) def incdata(data, s): """ add 's' to each byte. This is useful for finding the correct shift from an incorrectly shifted chunk. """ return b"".join(struct.pack(" read from stdin. import sys data = sys.stdin.buffer.read() if args.unhex: data = unhex(data) decode_kod(args, data) def cro_dump(args): """ handle 'crodump' subcommand """ db = Database(args.dbdir) db.dump(args) def stru_dump(args): """ handle 'strudump' subcommand """ db = Database(args.dbdir) db.strudump(args) def sys_dump(args): """ hexdump all CroSys records """ db = Database(args.dbdir) if db.sys: db.sys.dump(args) def rec_dump(args): """ hexdump all records of the specified CroXXX.dat file. """ if args.maxrecs: args.maxrecs = int(args.maxrecs, 0) else: # an arbitrarily large number. args.maxrecs = 0xFFFFFFFF db = Database(args.dbdir) db.recdump(args) def destruct(args): """ decode the index#1 structure information record Takes hex input from stdin. """ import sys data = sys.stdin.buffer.read() data = unhex(data) if args.type==1: destruct_db_definition(args, data) elif args.type==2: tbdef = TableDefinition(data) tbdef.dump(args) elif args.type==3: destruct_sys_definition(args, data) def main(): import argparse parser = argparse.ArgumentParser(description='CRO hexdumper') subparsers = parser.add_subparsers() parser.set_defaults(handler=None) parser.add_argument('--debug', action='store_true', help='break on exceptions') ko = subparsers.add_parser('kodump', help='KOD/hex dumper') ko.add_argument('--offset', '-o', type=str, default="0") ko.add_argument('--length', '-l', type=str) ko.add_argument('--width', '-w', type=str) ko.add_argument('--endofs', '-e', type=str) ko.add_argument('--unhex', '-x', action='store_true', help="assume the input contains hex data") ko.add_argument('--shift', '-s', type=str, help="KOD decode with the specified shift") ko.add_argument('--increment', '-i', action='store_true', help="assume data is already KOD decoded, but with wrong shift -> dump alternatives.") ko.add_argument('--ascdump', '-a', action='store_true', help="CP1251 asc dump of the data") ko.add_argument('--nokod', '-n', action='store_true', help="don't KOD decode") ko.add_argument('filename', type=str, nargs='?', help="dump either stdin, or the specified file") ko.set_defaults(handler=kod_hexdump) p = subparsers.add_parser('crodump', help='CROdumper') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--kodecode', '-k', action='store_true') p.add_argument('--ascdump', '-a', action='store_true') p.add_argument('--nokod', '-n', action='store_true') p.add_argument('--nodecompress', action='store_false', dest='decompress', default='true') p.add_argument('dbdir', type=str) p.set_defaults(handler=cro_dump) p = subparsers.add_parser('sysdump', help='SYSdumper') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--ascdump', '-a', action='store_true') p.add_argument('--nodecompress', action='store_false', dest='decompress', default='true') p.add_argument('dbdir', type=str) p.set_defaults(handler=sys_dump) p = subparsers.add_parser('recdump', help='record dumper') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--ascdump', '-a', action='store_true') p.add_argument('--maxrecs', '-n', type=str, help="max nr or recots to output") p.add_argument('--find1d', action='store_true') p.add_argument('--inclencrypted', action='store_false', dest='skipencrypted', default='true', help='include encrypted records in the output') p.add_argument('--stats', action='store_true', help='calc table stats from the first byte of each record') p.add_argument('--index', action='store_true', help='dump CroIndex') p.add_argument('--stru', action='store_true', help='dump CroIndex') p.add_argument('--bank', action='store_true', help='dump CroBank') p.add_argument('--sys', action='store_true', help='dump CroSys') p.add_argument('dbdir', type=str) p.set_defaults(handler=rec_dump) p = subparsers.add_parser('strudump', help='STRUdumper') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--ascdump', '-a', action='store_true') p.add_argument('dbdir', type=str) p.set_defaults(handler=stru_dump) p = subparsers.add_parser('destruct', help='Stru dumper') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--ascdump', '-a', action='store_true') p.add_argument('--type', '-t', type=int, help='what type of record to destruct') p.set_defaults(handler=destruct) args = parser.parse_args() if args.handler: args.handler(args) if __name__=='__main__': main()