/* Extract likely gzip streams from a BIOS executable */

#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <assert.h>

/*
 * See RFC 1952; we're looking for the gzip header
 * 0x1f 0x8b 0x08 0x0 0x0 0x0 0x0 0x0 0x0 0x0
 * in the file
 */

#define ID_1 0x1f
#define ID_2 0x8b
#define CM   0x08

#define SEEN_ID_1 (1)         // magic byte
#define SEEN_ID_2 (1 << 1)    // magic byte
#define SEEN_CM   (1 << 2)    // compression mode (deflate)

#define FILENAME "./ITEM_20121024_754_WIN_P08AAH.exe"

int main(void)
{
        int f = open(FILENAME, O_RDONLY);
        assert(f != -1);

        int offset = 0;
        unsigned char c;
        int seen = 0;
        int zeros = 0;
        int start = 0;
        int ret;

        /*
         * go through byte by byte and match our header.  When we see
         * one, print the offset out; it can be dealt with by zcat or
         * similar
         */
        while (read(f, &c, 1) == 1) {

                if (seen == SEEN_ID_1) {
                        // so now we're looking for id2
                        if (c != ID_2) {
                                //printf("rejecting seq ID2 (%02x) at %d\n", c, offset);
                                seen = 0;
                                goto next_byte;
                        }
                        seen |= SEEN_ID_2;
                        goto next_byte;
                }

                if (seen == (SEEN_ID_1 | SEEN_ID_2)) {
                        // now looking for CM
                        if (c != CM) {
                                //printf("rejecting at CM (%x) at %d\n", c, offset);
                                seen = 0;
                                goto next_byte;
                        }
                        seen |= SEEN_CM;
                        goto next_byte;
                }

                if (seen == (SEEN_ID_1 | SEEN_ID_2 | SEEN_CM)) {
                        // now looking for 6 bytes of zero ...
                        // actually followed by a 0xb for NTFS, but
                        // lets ignore that for now
                        if (c != 0) {
                                //printf("rejecting at post-header at %d\n", offset);
                                seen = 0;
                                zeros = 0;
                                goto next_byte;
                        } else {
                                zeros++;
                                if (zeros == 6) {
                                        printf("Found potential header at %d\n", start);
                                        seen = 0;
                                        zeros = 0;
                                }
                                goto next_byte;
                        }
                }

                if (c == ID_1) {
                        //printf("found potential header at %d\n", offset);
                        seen |= SEEN_ID_1;
                        start = offset;
                        goto next_byte;
                }

        next_byte:
                offset++;
        }
}
