/** * The following code provides a minimal implementation of the following three * algorithms and file formats: * * 1) RFC 1952 - GZIP File Format Specification * https://www.ietf.org/rfc/rfc1952.txt * * 2) RFC 1951 - DEFLATE Compressed Data Format Specification version 1.3 * https://www.ietf.org/rfc/rfc1951.txt * * 3) CRC-32-IEEE, as defined in RFC 1952 https://www.ietf.org/rfc/rfc1952.txt * * The code only creates new GZIP files, and does not read existing ones. In * addition, the code only implements uncompressed DEFLATE blocks (those with a * BTYPE value of 00). As such, the code is not actually useful for compressing * data, but instead only aims to be a minimally-compliant implementation. */ // Set stdin and stdout to binary mode on windows #ifdef _WIN32 #include #include #endif // #ifdef _WIN32 #include #include #include static int crc_table_computed; static uint32_t crc_table[256]; // Initialize a lookup table for CRC-32-IEEE calculations. // // Code taken from pages 10-11 of RFC 1952. void make_crc_table(void) { for (int i = 0; i < 256; i++) { uint32_t crc = i; for (int j = 0; j < 8; j++) { if (crc & 1) { crc = UINT32_C(0xedb88320) ^ (crc >> 1); } else { crc = crc >> 1; } } crc_table[i] = crc; } } // Update a CRC to check additional data. Pass a crc value of 0 if this is the // first block of data being checked. // // Code taken from pages 10-11 of RFC 1952. uint32_t update_crc(uint32_t crc, const uint8_t *buffer, size_t len) { crc = ~crc; for (size_t i = 0; i < len; i++) { crc = crc_table[((uint8_t)crc) ^ buffer[i]] ^ (crc >> 8); } return ~crc; } // Writes a GZIP header, as defined by pages 4-7 of RFC 1952. void gzip_header(FILE *out) { putc(0x1f, out); // ID1 putc(0x8b, out); // ID2 putc(0x08, out); // CM = deflate putc(0x00, out); // FLG = no flags set putc(0x00, out); // MTIME (0) = not set putc(0x00, out); // MTIME (1) = not set putc(0x00, out); // MTIME (2) = not set putc(0x00, out); // MTIME (3) = not set putc(0x00, out); // XFL = no extra flags set putc(0xff, out); // OS = unknown } // Writes a GZIP trailer, as defined by pages 4-7 of RFC 1952. void gzip_trailer(FILE *out, size_t isize, uint32_t crc32) { // CRC32 = 32 bit cyclic redundancy check defined by CRC-32-IEEE putc((uint8_t)(crc32 >> 0), out); putc((uint8_t)(crc32 >> 8), out); putc((uint8_t)(crc32 >> 16), out); putc((uint8_t)(crc32 >> 24), out); // ISIZE = size of the original input, modulo 2^32 putc((uint8_t)(isize >> 0), out); putc((uint8_t)(isize >> 8), out); putc((uint8_t)(isize >> 16), out); putc((uint8_t)(isize >> 24), out); } // Reads input, converts the input to DEFLATE format as defined by pages 8-10 of // RFC 1951, and writes the output. Only implements uncompressed blocks (those // with a BTYPE value of 00). // // Returns ISIZE and CRC32 for the corresponding GZIP fields. void deflate(FILE *in, FILE *out, size_t *isize, uint32_t *crc32) { uint8_t buffer[UINT16_MAX]; // An uncompressed block can be at most 2^16 - 1 *isize = 0; *crc32 = 0; while (1) { size_t len = fread(buffer, 1, sizeof(buffer), in); if (ferror(in)) { fprintf(stderr, "Error reading from stream: %d\n", ferror(in)); exit(EXIT_FAILURE); } else if (feof(in)) { putc(0x01, out); // BFINAL = true, BTYPE = no compression } else { putc(0x00, out); // BFINAL = false, BTYPE = no compression } // LEN = number of bytes read putc((uint8_t)(len >> 0), out); putc((uint8_t)(len >> 8), out); // NLEN = one's complement of LEN putc((uint8_t)(~len >> 0), out); putc((uint8_t)(~len >> 8), out); fwrite(buffer, 1, len, out); if (ferror(out)) { fprintf(stderr, "Error writing to stream: %d\n", ferror(out)); exit(EXIT_FAILURE); } *isize += len; *crc32 = update_crc(*crc32, buffer, len); if (feof(in)) { break; } } } // Reads input, converts the input to GZIP format as defined by pages 4-7 of RFC // 1952, and writes the output. void gzip(FILE *in, FILE *out) { size_t isize; uint32_t crc32; if (!crc_table_computed) { make_crc_table(); crc_table_computed = 1; } gzip_header(out); deflate(in, out, &isize, &crc32); gzip_trailer(out, isize, crc32); } #ifndef NO_MAIN int main(void) { // Set stdin and stdout to binary mode on windows #ifdef _WIN32 if (_setmode(_fileno(stdin), _O_BINARY) == -1) { perror("Cannot set stdin to binary mode"); return EXIT_FAILURE; } else if (_setmode(_fileno(stdout), _O_BINARY) == -1) { perror("Cannot set stdout to binary mode"); return EXIT_FAILURE; } #endif // #ifdef _WIN32 gzip(stdin, stdout); return EXIT_SUCCESS; } #endif // #ifndef NO_MAIN