diff --git a/.gitignore b/.gitignore index 97310df..6d53c27 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ *.vcxproj.filters *.vcxproj.user x64 -.vs \ No newline at end of file +.vs +build +.vscode \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..cfd71b8 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.28) + +project(CompressionAlgorithms VERSION 1.0.0 LANGUAGES C CXX) + +set(CMAKE_C_STANDARD 17) +set(CMAKE_C_STANDARD_REQUIRED ON) +set(CMAKE_C_EXTENSIONS OFF) + + +set(CLIBRARY Clibrary) +set(HUFFMAN_LIBRARY Huffman_library) +set(COMPRESSOR Compressor) +set(EXTRACTOR Extractor) + +option(COMPILE_COMPRESSOR "Whether to compile the compressor" OFF) +option(COMPILE_EXTRACTOR "Whether to compile the extractor" OFF) + +add_subdirectory(clib) +add_subdirectory("Huffman CODING") +add_subdirectory(app) diff --git a/Huffman Coding/CMakeLists.txt b/Huffman Coding/CMakeLists.txt new file mode 100644 index 0000000..5b3e15b --- /dev/null +++ b/Huffman Coding/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(${HUFFMAN_LIBRARY} STATIC "huff.c") + +target_include_directories(${HUFFMAN_LIBRARY} PUBLIC "../include") \ No newline at end of file diff --git a/Huffman Coding/huff.c b/Huffman Coding/huff.c new file mode 100644 index 0000000..bb928ce --- /dev/null +++ b/Huffman Coding/huff.c @@ -0,0 +1,339 @@ +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include +#include +#include "bitio.h" +#include "errhand.h" +#include "main.h" + +typedef struct tree_node { + unsigned int count; + unsigned int saved_count; + int child_0; + int child_1; +} NODE; + +typedef struct code { + unsigned int code; + int code_bits; +}CODE; + + +/* +The special EOS symbol is 256, the first available symbol after all +of the possible bytes. When decoding, reading this symbol +indicating that all of the data has been read in. +*/ + +#define END_OF_STREAM 256 + + +void count_bytes(FILE* input, unsigned long* long_counts); +void scale_counts(unsigned long* long_counts, NODE* nodes); +int build_tree(NODE* nodes); +void convert_tree_to_code(NODE* nodes, CODE* codes, unsigned int code_so_far, int bits, int node); +void output_counts(BIT_FILE* output, NODE* nodes); +void input_counts(BIT_FILE* input, NODE* nodes); +void print_model(NODE* nodes, CODE* codes); +void compress_data(FILE* input, BIT_FILE* output, CODE* codes); +void expand_data(BIT_FILE* input, FILE* output, NODE* nodes, int root_node); +void print_char(int c); + + +char* CompressionName = "static order 0 model with Huffman coding"; +char* Usage = "infinte outfile [-d]\n\n Specifying -d will dump the modeling data\n"; + +void CompressFile(FILE* input, BIT_FILE* output, int argc, char* argv[]) { + + unsigned long* counts; + NODE* nodes; + CODE* codes; + int root_node; + + counts = (unsigned long*)calloc(256, sizeof(unsigned long)); + if (counts == NULL) + fatal_error("Error allocating counts\n"); + if ((nodes = (NODE*)calloc(514, sizeof(NODE))) == NULL) + fatal_error("Error allocating nodes array\n"); + if ((codes = (CODE*)calloc(257, sizeof(CODE))) == NULL) + fatal_error("Error allocating codes array\n"); + + count_bytes(input, counts); + scale_counts(counts, nodes); + output_counts(output, nodes); + root_node = build_tree(nodes); + convert_tree_to_code(nodes, codes, 0, 0, root_node); + if (argc > 0 && strcmp(argv[0], "-d") == 0) + print_model(nodes, codes); + + compress_data(input, output, codes); + free((char*)codes); + +} + +void ExpandFile(BIT_FILE* input, FILE* output, int argc, char* argv[]) { + + NODE* nodes; + int root_node; + if ((nodes = (NODE*)calloc(514, sizeof(NODE))) == NULL) + fatal_error("Error allocating nodes array\n"); + + input_counts(input, nodes); + root_node = build_tree(nodes); + + if (argc > 0 && strcmp(argv[0], "-d") == 0) + print_model(nodes, 0); + + expand_data(input, output, nodes, root_node); + free((char*)nodes); + +} + + +void output_counts(BIT_FILE* output, NODE* nodes) { + int first; + int last; + int next; + int i; + + first = 0; + while (first < 255 && nodes[first].count == 0) + first++; + + for (; first < 256; first = next) { + last = first + 1; + + for (; ; ) { + for (; last < 256; last++) + if (nodes[last].count == 0) + break; + + last--; + for (next = last + 1; next < 256; next++) + if (nodes[next].count != 0) + break; + + if (next > 255) + break; + + if ((next - last) > 3) + break; + + last = next; + } + + if (putc(first, output->file) != first) + fatal_error("Error writing byte counts\n"); + if (putc(last, output->file) != last) + fatal_error("Error writing byte counts\n"); + for (i = first; i <= last; i++) { + if (putc(nodes[i].count, output->file) != (int)nodes[i].count) + fatal_error("Error writing byte counts\n"); + } + } + + if (putc(0, output->file) != 0) + fatal_error("Error writing byte counts\n"); +} + +void input_counts(BIT_FILE* input, NODE* nodes) { + + int first; + int last; + int i; + int c; + + for (i = 0; i < 256; i++) + nodes[i].count = 0; + + if ((first = getc(input->file)) == EOF) + fatal_error("Error reading byte counts\n"); + if ((last = getc(input->file)) == EOF) + fatal_error("Error reading byte counts\n"); + + for (; ;) { + for (i = first; i <= last; i++) { + if ((c = getc(input->file)) == EOF) + fatal_error("Error reading byte counts\n"); + else + nodes[i].count = (unsigned int)c; + } + + if ((first = getc(input->file)) == EOF) + fatal_error("Error reading byte counts\n"); + if (first == 0) + break; + if ((last = getc(input->file)) == EOF) + fatal_error("Error reading byte counts\n"); + } + nodes[END_OF_STREAM].count = 1; +} + + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif + +void count_bytes(FILE* input, unsigned long* counts) { + + long input_marker; + int c; + + input_marker = ftell(input); + while ((c = getc(input)) != EOF) + counts[c]++; + + fseek(input, input_marker, SEEK_SET); +} + +void scale_counts(unsigned long* counts, NODE* nodes) { + + unsigned long max_count; + int i; + + max_count = 0; + for (i = 0; i < 256; i++) + if (counts[i] > max_count) + max_count = counts[i]; + + + if (max_count == 0) { + counts[0] = 1; + max_count = 1; + } + + max_count = max_count / 255; + max_count = max_count + 1; + + for (i = 0; i < 256; i++) { + nodes[i].count = (unsigned int)(counts[i] / max_count); + if (nodes[i].count = 0 && counts[i] != 0) + nodes[i].count = 1; + } + + nodes[END_OF_STREAM].count = 1; +} + +int build_tree(NODE* nodes) { + + int next_free; + int i; + int min_1; + int min_2; + nodes[513].count = 0xffff; + + for (next_free = END_OF_STREAM + 1; ; next_free++) { + min_1 = 513; + min_2 = 513; + + /* min_1 < min_2 */ + for (i = 0; i < next_free; i++) { + if (nodes[i].count != 0) { + if (nodes[i].count < nodes[min_1].count) { + min_2 = min_1; + min_1 = i; + } + else if (nodes[i].count < nodes[min_2].count) + min_2 = i; + } + } + + if (min_2 == 513) + break; + + nodes[next_free].count = nodes[min_1].count + nodes[min_2].count; + nodes[min_1].saved_count = nodes[min_1].count; + nodes[min_1].count = 0; + nodes[min_2].saved_count = nodes[min_2].count; + nodes[min_2].count = 0; + nodes[next_free].child_0 = min_1; + nodes[next_free].child_1 = min_2; + } + + next_free--; + nodes[next_free].saved_count = nodes[next_free].count; + return next_free; +} + + +void convert_tree_to_code(NODE* nodes, CODE* codes, unsigned int code_so_far, int bits, int node) { + + if (node <= END_OF_STREAM) { + codes[node].code = code_so_far; + codes[node].code_bits = bits; + return; + } + + code_so_far <<= 1; + bits++; + convert_tree_to_code(nodes, codes, code_so_far, bits, nodes[node].child_0); + convert_tree_to_code(nodes, codes, code_so_far | 1, bits, nodes[node].child_1); + +} + +/* for debugging purpose */ +void print_model(NODE* nodes, CODE* codes) { + + int i; + + for (i = 0; i < 513; i++) { + if (nodes[i].saved_count != 0) { + printf("node="); + print_char(i); + printf(" count=%3d", nodes[i].saved_count); + printf(" child_0="); + print_char(nodes[i].child_0); + printf(" child_1="); + print_char(nodes[i].child_1); + + if (codes && i <= END_OF_STREAM) { + printf(" Huffman code="); + FilePrintBinary(stdout, codes[i].code, codes[i].code_bits); + } + printf("\n"); + } + } +} + +void print_char(int c) { + if (c >= 0x20 && c < 127) + printf("'%c'", c); + else + printf("%3d", c); +} + + +void compress_data(FILE* input, BIT_FILE* output, CODE* codes) { + + int c; + while ((c = getc(input)) != EOF) + OutputBits(output, (unsigned long)codes[c].code, codes[c].code_bits); + OutputBits(output, (unsigned long)codes[END_OF_STREAM].code, codes[END_OF_STREAM].code_bits); + +} + +void expand_data(BIT_FILE* input, FILE* output, NODE* nodes, int root_node) { + + int node; + for (; ;) { + node = root_node; + do { + if (InputBit(input)) { + node = nodes[node].child_1; + } + else + { + node = nodes[node].child_0; + } + } while (node > END_OF_STREAM); + + if (node == END_OF_STREAM) + break; + if ((putc(node, output)) != node) + fatal_error("Error trying to write byte to output"); + } +} \ No newline at end of file diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt new file mode 100644 index 0000000..02fc1e0 --- /dev/null +++ b/app/CMakeLists.txt @@ -0,0 +1,11 @@ +if(COMPILE_COMPRESSOR) + add_executable(${COMPRESSOR} "main-c.c") + target_link_libraries(${COMPRESSOR} PUBLIC ${CLIBRARY} ${HUFFMAN_LIBRARY}) + target_include_directories(${COMPRESSOR} PUBLIC "../include") +endif() + +if(COMPILE_EXTRACTOR) + add_executable(${EXTRACTOR} "main-e.c") + target_link_libraries(${EXTRACTOR} PUBLIC ${CLIBRARY} ${HUFFMAN_LIBRARY}) + target_include_directories(${EXTRACTOR} PUBLIC "../include") +endif() \ No newline at end of file diff --git a/clib/main-c.c b/app/main-c.c similarity index 100% rename from clib/main-c.c rename to app/main-c.c diff --git a/clib/main-e.c b/app/main-e.c similarity index 100% rename from clib/main-e.c rename to app/main-e.c diff --git a/clib/CMakeLists.txt b/clib/CMakeLists.txt new file mode 100644 index 0000000..c280010 --- /dev/null +++ b/clib/CMakeLists.txt @@ -0,0 +1,5 @@ +add_library(${CLIBRARY} STATIC + "bitio.c" + "errhand.c") + +target_include_directories(${CLIBRARY} PUBLIC "../include") \ No newline at end of file diff --git a/clib/bitio.h b/include/bitio.h similarity index 100% rename from clib/bitio.h rename to include/bitio.h diff --git a/clib/errhand.h b/include/errhand.h similarity index 100% rename from clib/errhand.h rename to include/errhand.h diff --git a/clib/main.h b/include/main.h similarity index 100% rename from clib/main.h rename to include/main.h