EDIT: Got it working, see post below instead.
Okay, trying to understand arithmetic coding, so that I can start learning QM-coding and all that. Having almost no luck.
I was able to write an encoder, but it seems to crash whenever I try scaling the probability table up.
For instance, the below code allows me to compress the Chrono Trigger ROM from 4096kb to 3978kb, and then decompress it successfully.
However, when I set a range > 192, it starts failing. The higher the value, the faster it fails. At 16384, compression ratio is great, getting me down to 3500kb (well, figuratively great for pure entropy coding on a mostly already compressed ROM). But I can only decompress the first four bytes correctly before errors occur.
Can anyone shed any light on what I'm doing wrong? Many thanks in advance ...
Okay, trying to understand arithmetic coding, so that I can start learning QM-coding and all that. Having almost no luck.
I was able to write an encoder, but it seems to crash whenever I try scaling the probability table up.
For instance, the below code allows me to compress the Chrono Trigger ROM from 4096kb to 3978kb, and then decompress it successfully.
However, when I set a range > 192, it starts failing. The higher the value, the faster it fails. At 16384, compression ratio is great, getting me down to 3500kb (well, figuratively great for pure entropy coding on a mostly already compressed ROM). But I can only decompress the first four bytes correctly before errors occur.
Can anyone shed any light on what I'm doing wrong? Many thanks in advance ...
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned long uint32_t;
typedef unsigned long long uint64_t;
FILE *fp;
uint8_t *data;
void aricode(const char *infn, const char *outfn) {
fp = fopen(infn, "rb");
if(!fp) return;
fseek(fp, 0, SEEK_END);
unsigned size = ftell(fp);
rewind(fp);
data = new uint8_t[size];
fread(data, 1, size, fp);
fclose(fp);
fp = fopen(outfn, "wb");
//========================================
//generate probability tables
//========================================
unsigned otable[256];
memset(&otable, 0, sizeof otable);
for(unsigned i = 0; i < size; i++) otable[data[i]]++;
uint16_t ptable_lo[256], ptable_hi[256];
memset(&ptable_lo, 0, sizeof ptable_lo);
memset(&ptable_hi, 0, sizeof ptable_hi);
unsigned scale = 0;
for(unsigned i = 0; i < 256; i++) {
//the higher the scalar, the better the compression.
//optimal value would be 16384, yet this crashes with > 196 ...
unsigned probability = double(otable[i]) / size * 196.0;
if(probability == 0) probability = 1;
unsigned prev = !i ? 0 : i - 1;
ptable_lo[i] = ptable_hi[prev];
ptable_hi[i] = ptable_lo[i] + probability;
scale += probability;
}
for(unsigned i = 0; i < 256; i++) {
ptable_hi[i]--;
//printf("%3d range = %3d-%3d\n", i, ptable_lo[i], ptable_hi[i]);
}
printf("scale = %d\n", scale);
fputc(size, fp);
fputc(size >> 8, fp);
fputc(size >> 16, fp);
fputc(size >> 24, fp);
fwrite(ptable_lo, 1, sizeof ptable_lo, fp);
fwrite(ptable_hi, 1, sizeof ptable_hi, fp);
//========================================
//encode
//========================================
uint16_t lo = 0x0000;
uint16_t hi = 0xffff;
unsigned bitpos = 0, bitval = 0;
for(unsigned i = 0; i < size; i++) {
unsigned range = (hi - lo) + 1;
unsigned symbol = data[i];
unsigned problo = ptable_lo[symbol];
unsigned probhi = ptable_hi[symbol];
//printf("range = %0.5x, lo = %0.4x, hi = %0.4x, sym = %0.2x, pl = %3d, ph = %3d\n", range, lo, hi, symbol, problo, probhi);
hi = lo + range * (double(probhi) / scale);
lo = lo + range * (double(problo) / scale);
//printf("adjust: range = %0.5x, lo = %0.4x, hi = %0.4x\n", (hi - lo) + 1, lo, hi);
while(((hi - lo) + 1) < scale) {
bitval = (bitval << 1) | (lo >> 15);
if(++bitpos == 8) { fputc(bitval, fp); bitval = bitpos = 0; }
lo = (lo << 1);
hi = (hi << 1) + 1;
//printf("renormalize: range = %0.5x, lo = %0.4x, hi = %0.4x\n", (hi - lo) + 1, lo, hi);
}
}
if(bitpos > 0) fputc(bitval, fp);
fputc(lo >> 8, fp);
fputc(lo, fp);
//========================================
//finish
//========================================
delete[] data;
fclose(fp);
}
void aridecode(const char *infn, const char *outfn) {
fp = fopen(infn, "rb");
if(!fp) return;
fseek(fp, 0, SEEK_END);
unsigned size = ftell(fp);
rewind(fp);
unsigned decompsize = fgetc(fp);
decompsize |= fgetc(fp) << 8;
decompsize |= fgetc(fp) << 16;
decompsize |= fgetc(fp) << 24;
//========================================
//read probability tables
//========================================
uint16_t ptable_lo[256], ptable_hi[256];
memset(&ptable_lo, 0, sizeof ptable_lo);
memset(&ptable_hi, 0, sizeof ptable_hi);
fread(ptable_lo, 1, sizeof ptable_lo, fp);
fread(ptable_hi, 1, sizeof ptable_hi, fp);
unsigned scale = 0;
for(unsigned i = 0; i < 256; i++) {
scale += ptable_hi[i] - ptable_lo[i] + 1;
}
printf("scale = %3d\n", scale);
size -= 4;
size -= sizeof ptable_lo;
size -= sizeof ptable_hi;
data = new uint8_t[size + 64];
memset(data, 0, size + 64);
fread(data, 1, size, fp);
fclose(fp);
unsigned p = 0;
fp = fopen(outfn, "wb");
//========================================
//decode
//========================================
uint16_t lo = 0x0000;
uint16_t hi = 0xffff;
uint16_t code = 0;
code = data[p++] << 8;
code |= data[p++];
unsigned bitval = data[p++], bitpos = 0;
unsigned outsize = 0;
while(outsize < decompsize) {
unsigned range = (hi - lo) + 1;
uint16_t pos = (code - lo) + 1;
pos = pos * scale / range;
unsigned symbol;
for(unsigned i = 0; i < 256; i++) {
if(pos >= ptable_lo[i] && pos <= ptable_hi[i]) {
//found symbol
symbol = i;
fputc(symbol, fp);
outsize++;
break;
}
}
unsigned problo = ptable_lo[symbol];
unsigned probhi = ptable_hi[symbol];
hi = lo + range * double(probhi) / scale;
lo = lo + range * double(problo) / scale;
while(((hi - lo) + 1) < scale) {
code = (code << 1) + ((bitval >> 7) & 1);
bitval <<= 1;
if(++bitpos >= 8) { bitval = data[p++]; bitpos = 0; }
lo = (lo << 1);
hi = (hi << 1) + 1;
}
}
//========================================
//finish
//========================================
delete[] data;
fclose(fp);
}
int main() {
aricode("test.bin", "test.ari");
aridecode("test.ari", "output.bin");
printf("done\n");
getch();
return 0;
}
#include <stdlib.h>
#include <string.h>
#include <conio.h>
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned long uint32_t;
typedef unsigned long long uint64_t;
FILE *fp;
uint8_t *data;
void aricode(const char *infn, const char *outfn) {
fp = fopen(infn, "rb");
if(!fp) return;
fseek(fp, 0, SEEK_END);
unsigned size = ftell(fp);
rewind(fp);
data = new uint8_t[size];
fread(data, 1, size, fp);
fclose(fp);
fp = fopen(outfn, "wb");
//========================================
//generate probability tables
//========================================
unsigned otable[256];
memset(&otable, 0, sizeof otable);
for(unsigned i = 0; i < size; i++) otable[data[i]]++;
uint16_t ptable_lo[256], ptable_hi[256];
memset(&ptable_lo, 0, sizeof ptable_lo);
memset(&ptable_hi, 0, sizeof ptable_hi);
unsigned scale = 0;
for(unsigned i = 0; i < 256; i++) {
//the higher the scalar, the better the compression.
//optimal value would be 16384, yet this crashes with > 196 ...
unsigned probability = double(otable[i]) / size * 196.0;
if(probability == 0) probability = 1;
unsigned prev = !i ? 0 : i - 1;
ptable_lo[i] = ptable_hi[prev];
ptable_hi[i] = ptable_lo[i] + probability;
scale += probability;
}
for(unsigned i = 0; i < 256; i++) {
ptable_hi[i]--;
//printf("%3d range = %3d-%3d\n", i, ptable_lo[i], ptable_hi[i]);
}
printf("scale = %d\n", scale);
fputc(size, fp);
fputc(size >> 8, fp);
fputc(size >> 16, fp);
fputc(size >> 24, fp);
fwrite(ptable_lo, 1, sizeof ptable_lo, fp);
fwrite(ptable_hi, 1, sizeof ptable_hi, fp);
//========================================
//encode
//========================================
uint16_t lo = 0x0000;
uint16_t hi = 0xffff;
unsigned bitpos = 0, bitval = 0;
for(unsigned i = 0; i < size; i++) {
unsigned range = (hi - lo) + 1;
unsigned symbol = data[i];
unsigned problo = ptable_lo[symbol];
unsigned probhi = ptable_hi[symbol];
//printf("range = %0.5x, lo = %0.4x, hi = %0.4x, sym = %0.2x, pl = %3d, ph = %3d\n", range, lo, hi, symbol, problo, probhi);
hi = lo + range * (double(probhi) / scale);
lo = lo + range * (double(problo) / scale);
//printf("adjust: range = %0.5x, lo = %0.4x, hi = %0.4x\n", (hi - lo) + 1, lo, hi);
while(((hi - lo) + 1) < scale) {
bitval = (bitval << 1) | (lo >> 15);
if(++bitpos == 8) { fputc(bitval, fp); bitval = bitpos = 0; }
lo = (lo << 1);
hi = (hi << 1) + 1;
//printf("renormalize: range = %0.5x, lo = %0.4x, hi = %0.4x\n", (hi - lo) + 1, lo, hi);
}
}
if(bitpos > 0) fputc(bitval, fp);
fputc(lo >> 8, fp);
fputc(lo, fp);
//========================================
//finish
//========================================
delete[] data;
fclose(fp);
}
void aridecode(const char *infn, const char *outfn) {
fp = fopen(infn, "rb");
if(!fp) return;
fseek(fp, 0, SEEK_END);
unsigned size = ftell(fp);
rewind(fp);
unsigned decompsize = fgetc(fp);
decompsize |= fgetc(fp) << 8;
decompsize |= fgetc(fp) << 16;
decompsize |= fgetc(fp) << 24;
//========================================
//read probability tables
//========================================
uint16_t ptable_lo[256], ptable_hi[256];
memset(&ptable_lo, 0, sizeof ptable_lo);
memset(&ptable_hi, 0, sizeof ptable_hi);
fread(ptable_lo, 1, sizeof ptable_lo, fp);
fread(ptable_hi, 1, sizeof ptable_hi, fp);
unsigned scale = 0;
for(unsigned i = 0; i < 256; i++) {
scale += ptable_hi[i] - ptable_lo[i] + 1;
}
printf("scale = %3d\n", scale);
size -= 4;
size -= sizeof ptable_lo;
size -= sizeof ptable_hi;
data = new uint8_t[size + 64];
memset(data, 0, size + 64);
fread(data, 1, size, fp);
fclose(fp);
unsigned p = 0;
fp = fopen(outfn, "wb");
//========================================
//decode
//========================================
uint16_t lo = 0x0000;
uint16_t hi = 0xffff;
uint16_t code = 0;
code = data[p++] << 8;
code |= data[p++];
unsigned bitval = data[p++], bitpos = 0;
unsigned outsize = 0;
while(outsize < decompsize) {
unsigned range = (hi - lo) + 1;
uint16_t pos = (code - lo) + 1;
pos = pos * scale / range;
unsigned symbol;
for(unsigned i = 0; i < 256; i++) {
if(pos >= ptable_lo[i] && pos <= ptable_hi[i]) {
//found symbol
symbol = i;
fputc(symbol, fp);
outsize++;
break;
}
}
unsigned problo = ptable_lo[symbol];
unsigned probhi = ptable_hi[symbol];
hi = lo + range * double(probhi) / scale;
lo = lo + range * double(problo) / scale;
while(((hi - lo) + 1) < scale) {
code = (code << 1) + ((bitval >> 7) & 1);
bitval <<= 1;
if(++bitpos >= 8) { bitval = data[p++]; bitpos = 0; }
lo = (lo << 1);
hi = (hi << 1) + 1;
}
}
//========================================
//finish
//========================================
delete[] data;
fclose(fp);
}
int main() {
aricode("test.bin", "test.ari");
aridecode("test.ari", "output.bin");
printf("done\n");
getch();
return 0;
}