2 files changed, 55 insertions, 15 deletions
diff --git a/crodump.py b/crodump.py
index d668915..d0cd67c 100644
--- a/crodump.py
+++ b/crodump.py
@@ -87,6 +87,9 @@ class Datafile:
        return self.dat.read(size)
    def readrec(self, idx):
+        """
+        extract and decode a single record.
+        """
        ofs, ln, chk = self.tadidx[idx-1]
        if ln==0xFFFFFFFF:
            # deleted record
@@ -120,6 +123,9 @@ class Datafile:
    def dump(self, args):
+        """
+        dump decodes all references data, and optionally will print out all unused bytes in the .dat file.
+        """
        print("hdr: %-6s dat: %04x %s enc:%04x bs:%04x, tad: %08x %08x" % (self.name, self.hdrunk, self.version, self.encoding, self.blocksize, self.nrdeleted, self.firstdeleted))
        ranges = []  # keep track of used bytes in the .dat file.
        for i, (ofs, ln, chk) in enumerate(self.tadidx):
@@ -183,20 +189,31 @@ class Datafile:
                print("%08x-%08x: %s" % (o, o+l, toout(args, dat)))
    def iscompressed(self, data):
+        """
+        Note that the compression header uses big-endian numbers.
+        """
        if len(data)<11:
            return
-        size, flag = struct.unpack_from(">HH", data, 0)
-        if size+5 != len(data):
-            return
-        if flag!=0x800:
-            return
        if data[-3:] != b"\x00\x00\x02":
            return
+        o = 0
+        while o < len(data)-3:
+            size, flag = struct.unpack_from(">HH", data, o)
+            if flag!=0x800 and flag!=0x008:
+                return
+            o += size + 2
        return True
    def decompress(self, data):
-        C = zlib.decompressobj(-15)
+        result = b""
-        return C.decompress(data[8:-3])
+        o = 0
+        while o < len(data)-3:
+            size, flag, crc = struct.unpack_from(">HHL", data, o)
+            C = zlib.decompressobj(-15)
+            result += C.decompress(data[o+8:o+8+size])
+            o += size + 2
+        return result
 def dump_bank_definition(args, bankdict):
    """
@@ -222,9 +239,9 @@ def decode_field(data):
        unk4 = rd.readdword()  # Always 0x00000009 or 0x0001000d
        remain = rd.readbytes()
-        print("Type: %d (%02d/%02d) %04x,(%d-%d),%04x - '%s' -- %s" % (typ, idx1, idx2, unk1, unk2, unk3, unk4, name, tohex(remain)))
+        print("Type: %2d (%2d/%2d) %04x,(%d-%4d),%04x - '%s' -- %s" % (typ, idx1, idx2, unk1, unk2, unk3, unk4, name, tohex(remain)))
    else:
-        print("Type: %d %2d    %d,%d       - '%s'" % (typ, idx1, unk1, unk2, name))
+        print("Type: %2d %2d    %d,%d       - '%s'" % (typ, idx1, unk1, unk2, name))
 """
   2 Base000              - 000001  050001        000000000000000546696c657302464c01000000010000001b000000000000000fd1e8f1f2e5ecedfbe920edeeece5f0010000000000000000010000000000000000
@@ -272,7 +289,11 @@ def destruct_base_definition(args, data):
    abbrev = rd.readname()
    unk7 = rd.readdword()
    nrfields = rd.readdword()
+    if args.verbose:
+        print("table: %s" % tohex(data[:rd.o]))
    print("%d,%d,%d,%d,%d,%d  %d,%d '%s'  '%s'" % (unk1, version, unk2, unk3, unk4, unk5, unk7, nrfields, tablename, abbrev))
    fields = []
    for _ in range(nrfields):
        l = rd.readword()
@@ -396,12 +417,20 @@ class Database:
        if not self.bank:
            print("No CroBank.dat found")
            return
+        if args.skipencrypted and self.bank.encoding==3:
+            print("Skipping encrypted CroBank")
+            return
        nerr = 0
        xref = defaultdict(int)
        for i in range(args.maxrecs):
            try:
                data = self.bank.readrec(i)
-                if not args.stats:
+                if args.find1d:
+                    if data and (data.find(b"\x1d")>0 or data.find(b"\x1b")>0):
+                        print("%d -> %s" % (i, b2a_hex(data)))
+                        break
+                elif not args.stats:
                    if data is None:
                        print("%5d: <deleted>" % i)
                    else:
@@ -426,6 +455,11 @@ class Database:
            for k, v in xref.items():
                print("%5d * %s" % (v, k))
+    def readrec(self, sysnum):
+        data = self.bank.readrec(sysnum)
+        tabnum, = struct.unpack_from("<B", data, 0)
+        fields = data[1:].split(b"\x1e")
 def incdata(data, s):
    """
    add 's' to each byte.
@@ -576,6 +610,8 @@ def main():
    p.add_argument('--verbose', '-v', action='store_true')
    p.add_argument('--ascdump', '-a', action='store_true')
    p.add_argument('--maxrecs', '-n', type=str, help="max nr or recots to output")
+    p.add_argument('--find1d', action='store_true')
+    p.add_argument('--inclencrypted', action='store_false', dest='skipencrypted', default='true', help='include encrypted records in the output')
    p.add_argument('--stats', action='store_true', help='calc table stats from the first byte of each record')
    p.add_argument('dbdir', type=str)
    p.set_defaults(handler=bank_dump)
diff --git a/docs/cronos-research.md b/docs/cronos-research.md
index 1ee75c4..5d0a508 100644
--- a/docs/cronos-research.md
+++ b/docs/cronos-research.md
@@ -157,7 +157,9 @@ The toplevel table-id for CroStru and CroSys is #3, while referenced records hav
 CroBank.dat contains the actual database entries for multiple tables as described in the CroStru file. After each chunk is re-assembled (and potentially decoded with the per block offset being the record number in the .tad file).
-Its first byte defines, which table it belongs to. It is encoded in cp1251 (or possibly IBM866) with actual column data separated by 0x1e. There is an extra concept of sub fields in those columns, indicated by a 0x1d byte.
+Its first byte defines, which table it belongs to. It is encoded in cp1251 (or possibly IBM866) with actual column data separated by 0x1e.
+There is an extra concept of sub fields in those columns, indicated by a 0x1d byte.
+Also files seem have have special fields, starting with a 0x1b byte.
 ## structure definitions
@@ -246,10 +248,12 @@ Other unassigned values in the table entry definition are
 some records are compressed, the format is like this:
-    uint16 size
+    multiple-chunks {
-    uint8   head[2] = { 8, 0 }
+        uint16 size;     // stored in bigendian format.
-    uint32 crc32
+        uint8   head[2] = { 8, 0 }
-    uint8   compdata[size-4]
+        uint32 crc32
+        uint8   compdata[size-6]
+    }
    uint8   tail[3] = { 0, 0, 2 }
 ## encrypted records