Library: LZW Compression Engine

// These variables are used to build communications. Commands are sent as 
// combined bits in the integer argument of a link-message, and are 
// recovered using masks, you may wish to read about bit-masks before 
// editing these values. These are used so the string argument is 
// kept free for data only.
//
// Commands take the following form (in hex):
//      0xFFMMIOvv
// Where the letters are:
//      F   Filter, used to quickly determine if a message is for us.
//      C   Command; encrypt/decrypt etc.
//      I   Type of data provided (hex, base64, etc.).
//      O   Desired type of data to be returned (hex, base64, etc.), 
//          this is unused in replies as the reply's value for I will 
//          be the request's value for O.
//      v   Variable, depends on mode.

// This mask allows the filter byte to be retrieved quickly
integer LSLLZW_FILTER_MASK      = 0xFF000000;
// This mask allows the mask byte to be retrieved quickly
integer LSLLZW_COMMAND_MASK     = 0x00FF0000;
// This mask allows the input type to be retrieved quickly
integer LSLLZW_INPUT_TYPE_MASK  = 0x0000F000;
// This mask allows the output type to be retireved quickly
integer LSLLZW_OUTPUT_TYPE_MASK = 0x00000F00;
// This mask allows the variable to retrieved quickly
integer LSLLZW_VARIABLE_MASK    = 0x000000FF;
// How many bits right variable must be shifted
integer LSLLZW_VARIABLE_SHIFT   = 0;

// A request
integer LSLLZW_FILTER_REQUEST   = 0x83000000;
// A reply
integer LSLLZW_FILTER_REPLY     = 0x84000000;

// An error occurred
integer LSLLZW_COMMAND_ERROR    = 0x00000000;
// Prime engine with key
integer LSLLZW_COMMAND_COMPRESS = 0x00010000;
// Encrypt message using expanded key
integer LSLLZW_COMMAND_DECOMPRESS = 0x00020000;

// Input type is hex
integer LSLLZW_INPUT_HEX        = 0x00000000;
// Input type is base64
integer LSLLZW_INPUT_BASE64     = 0x00001000;

// Output type is hex
integer LSLLZW_OUTPUT_HEX       = 0x00000000;
// Output type is base64
integer LSLLZW_OUTPUT_BASE64    = 0x00000100;

// Refuse any data longer than this many characters
integer LSLLZW_MAX_SIZE         = 5120;

integer LSLLZW_CODE_WIDTH       = 12;  // Maximum code-width, restricts table
integer LSLLZW_MIN_DATA_SIZE    = 32;  // Data must be at least this many 
                                       // bits long or compression is 
                                       // pointless.

integer LSLLZW_BUFFER           = 32;

string  LSLLZW_HEX_CHARS        = "0123456789abcdef";
string  LSLLZW_BASE64_CHARS     = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

string  lslLZWInputAlphabet     = LSLLZW_BASE64_CHARS;
integer lslLZWInputCharWidth    = 6;
string  lslLZWOutputAlphabet    = LSLLZW_BASE64_CHARS;
integer lslLZWOutputCharWidth   = 6;

string  lslLZWError             = "";

string  lslLZWInput             = "";
integer lslLZWInputLength       = 0;
integer lslLZWInputIndex        = 0;
integer lslLZWInputBuffer       = 0;
integer lslLZWInputBufferPos    = 0;
string  lslLZWOutput            = "";
integer lslLZWOutputBuffer      = 0;
integer lslLZWOutputBufferPos   = 0;
string  lslLZWOutputPart        = "";
integer lslLZWOutputPartLength  = 0;

// Load value as integers from a base64 string
lslLZWPrepareBase64Input() {
    integer x = llSubStringIndex(lslLZWInput, "=");
    if (x > 0) 
        lslLZWInput = llDeleteSubString(
            (lslLZWInput = "") + lslLZWInput, 
            x,
            -1
        );
    lslLZWInputAlphabet  = LSLLZW_BASE64_CHARS;
    lslLZWInputCharWidth = 6;
    lslLZWPrepareInput();
}

// Load value as integers from a hex string
lslLZWPrepareHexInput() {
    if (llGetSubString(lslLZWInput, 0, 1) == "0x") 
        lslLZWInput = llDeleteSubString(
            (lslLZWInput = "") + lslLZWInput, 
            0, 
            1
        );
    lslLZWInputAlphabet  = LSLLZW_HEX_CHARS;
    lslLZWInputCharWidth = 4;
    lslLZWPrepareInput();
}

// Prepares for input
lslLZWPrepareInput() {
    lslLZWInputLength    = llStringLength(lslLZWInput);
    lslLZWInputIndex     = 0;
    lslLZWInputBuffer    = 0;
    lslLZWInputBufferPos = 0;
}

float inputTime = 0.0;

// Retrieves width bits of data from input. Maximum value for width is 31 as 
// -1 is reserved for end-of-input
integer lslLZWGetInput(integer width) {
    float time = llGetTime();

    integer val    = 0;
    integer result = 0;
    integer shift  = 0;
    integer added  = 0;
    integer bits   = width;
    integer t      = 0;

    do {
        if (lslLZWInputBufferPos <= 0) { // Need more data
            if (lslLZWInputIndex >= lslLZWInputLength) {
                lslLZWInputIndex = lslLZWInputLength = lslLZWInputBuffer = 0;
                lslLZWInput = "";
                jump break;
            }// else if (lslLZWInputIndex >= LSLLZW_BUFFER) {
            //    // Reduce input-size to save memory and keep 
            //    // manipulations fast.
            //    lslLZWInput = llDeleteSubString(
            //        (lslLZWInput = "") + lslLZWInput,
            //        0,
            //        lslLZWInputIndex - 1
            //    );
            //    lslLZWInputLength -= lslLZWInputIndex;
            //    lslLZWInputIndex   = 0;
            //}

            // Pack as many input-characters into 32-bits as possible
            lslLZWInputBufferPos = val = 0;
            do {
                lslLZWInputBufferPos += lslLZWInputCharWidth;

                t = llSubStringIndex(
                    lslLZWInputAlphabet, 
                    llGetSubString(
                        lslLZWInput, 
                        lslLZWInputIndex, 
                        lslLZWInputIndex
                    )
                );
                if (t < 0) {
                    lslLZWError = "Invalid character in input";
                    return 0;
                } else val = val | (t << (32 - lslLZWInputBufferPos));
            } while (
                (lslLZWInputBufferPos <= (32 - lslLZWInputCharWidth)) && 
                ((++lslLZWInputIndex) < lslLZWInputLength)
            );
            lslLZWInputBuffer = (val >> (32 - lslLZWInputBufferPos));
        }

        shift = bits - lslLZWInputBufferPos;

        added = lslLZWInputBufferPos;
        if (shift >= 0) 
             result = result | (lslLZWInputBuffer <<  shift);
        else {
            added += shift;
            result = result | ((lslLZWInputBuffer >> -shift) & 
                ~(0xFFFFFFFF << added));
        }

        bits                 -= added;
        lslLZWInputBufferPos -= added;
    } while (bits > 0);
    @break;

    if ((!result) && (lslLZWInputIndex >= lslLZWInputLength) &&
        (lslLZWInputBufferPos <= 0)) return -1; // end of input

    inputTime += llGetTime() - time;
    return result & ~(0xFFFFFFFF << width);
}

// Returns a hex string representing integers b, preceeded with "0x".
lslLZWPrepareHexOutput() {
    lslLZWOutputAlphabet  = LSLLZW_HEX_CHARS;
    lslLZWOutputCharWidth = 4;
    lslLZWPrepareOutput();
}

// Returns a base64 string representing integers b, with padding equals signs
lslLZWPrepareBase64Output() {
    lslLZWOutputAlphabet  = LSLLZW_BASE64_CHARS;
    lslLZWOutputCharWidth = 6;
    lslLZWPrepareOutput();
}

// Used to add padding to base64 string
lslLZWCompleteBase64Output() {
    integer l = llStringLength(lslLZWOutput) % 4;
    if (l) {
        if (l == 2) 
             lslLZWOutput = (lslLZWOutput = "") + lslLZWOutput + "==";
        else lslLZWOutput = (lslLZWOutput = "") + lslLZWOutput + "=";
    }
}

// Prepare for output
lslLZWPrepareOutput() {
    lslLZWOutput          = lslLZWOutputPart = "";
    lslLZWOutputBuffer    = lslLZWOutputPartLength = 0;
    lslLZWOutputBufferPos = 32;
}

float outputTime = 0.0;

// Outputs width bits of data to the output string
lslLZWPutOutput(integer data, integer width, integer final) {
    float time = llGetTime();

    string s = "";
    if (!width) jump flush;

    integer shift = 0;
    integer added = 0;

    // Place output into buffer
    do {
        shift = lslLZWOutputBufferPos - width;

        added = width;
        if (shift >= 0) 
            lslLZWOutputBuffer = lslLZWOutputBuffer | (data << shift);
        else {
            added += shift;
            lslLZWOutputBuffer = lslLZWOutputBuffer | ((data >> -shift) &
                ~(0xFFFFFFFF << added));
        }

        lslLZWOutputBufferPos -= added;
        width                 -= added;

        if (lslLZWOutputBufferPos < width) 
            s = (s = "") + s + lslLZWOutputChars();
    } while (width > 0);

    @flush;
    if (final && (lslLZWOutputBufferPos < 32)) 
        s = (s = "") + s + lslLZWOutputChars();

    lslLZWOutputPartLength += llStringLength(s);
    lslLZWOutputPart = (s = lslLZWOutputPart = "") + lslLZWOutputPart + s;

    if (final || (lslLZWOutputPartLength >= LSLLZW_BUFFER)) {
        lslLZWOutput = (lslLZWOutputPart = lslLZWOutput = "") + 
            lslLZWOutput + lslLZWOutputPart;
        lslLZWOutputPartLength = 0;
    }

    outputTime += llGetTime() - time;
}

string lslLZWOutputChars() {
    integer mask = ~(0xFFFFFFFF << lslLZWOutputCharWidth);
    string  s    = "";

    // Output characters
    integer v = 0;
    integer c = lslLZWOutputBufferPos;
    integer l = (lslLZWOutputBufferPos + lslLZWOutputCharWidth);
    lslLZWOutputBufferPos = 32;

    do {
        lslLZWOutputBufferPos -= lslLZWOutputCharWidth;
        v = (lslLZWOutputBuffer >> lslLZWOutputBufferPos) & mask;
        s = (s = "") + s + llGetSubString(lslLZWOutputAlphabet, v, v);
    } while (lslLZWOutputBufferPos >= l);

    l = lslLZWOutputBufferPos - c;
    if (lslLZWOutputBufferPos > 0) {
        lslLZWOutputBufferPos = 32 - l;
        lslLZWOutputBuffer = (lslLZWOutputBuffer << lslLZWOutputBufferPos);
    } else {
        lslLZWOutputBufferPos = 32;
        lslLZWOutputBuffer = 0;
    }

    return (s = "") + s;
}

// Compresses the binary data using LZW compression
lslLZWCompress() {
    if (lslLZWInputLength <= LSLLZW_MIN_DATA_SIZE) {
        lslLZWError = "Data too short! Must be at least " + 
            (string)LSLLZW_MIN_DATA_SIZE + " characters long to compress";
        return;
    }

    // Set-up dictionary
    list    dictionary  = [];
    integer nextCode    = 256; // 0-255 are the basic codes
    integer maxCode     = (1 << LSLLZW_CODE_WIDTH) - 1;

    // Process characters
    integer code   = lslLZWGetInput(8);
    if ((code == -1) || (lslLZWError != "")) return;
    integer char   = 0;
    integer hash   = 0;
    integer x      = 0;

    while (~(char = lslLZWGetInput(8))) {
        if (lslLZWError != "") return;

        // Create a hash and look-it up
        hash = (code << 8) | char;

        if ((x = llListFindList(dictionary, [hash])) >= 0) 
            code = 256 + x; // Entry exists, update code
        else { // Need to create a new entry
            if (nextCode < maxCode) {
                dictionary = (dictionary = []) + dictionary + [hash];
                ++nextCode;
            }

            // Output the currently held code
            lslLZWPutOutput(code, LSLLZW_CODE_WIDTH, FALSE);
            code = char;
        }
    }

    // Output final code(s)
    lslLZWPutOutput(code,    LSLLZW_CODE_WIDTH, FALSE);
    lslLZWPutOutput(maxCode, LSLLZW_CODE_WIDTH, TRUE);
}

lslLZWDecompress() {    
    // Set-up dictionary
    list    dictionary  = [];
    list    prefixes    = [];
    integer nextCode    = 256; // 0-255 are the basic codes
    integer maxCode     = (1 << LSLLZW_CODE_WIDTH) - 1;

    // Process characters
    integer oldCode = lslLZWGetInput(LSLLZW_CODE_WIDTH);
    if ((oldCode == -1) || (lslLZWError != "")) return;

    lslLZWPutOutput(oldCode, 8, FALSE);
    integer code    = 0;
    integer char    = oldCode;
    list    stack   = [];
    integer x       = 0;

    while (~(code = lslLZWGetInput(LSLLZW_CODE_WIDTH))) {        
        if (lslLZWError != "") return;
        else if (code == maxCode) {
            lslLZWPutOutput(0, 0, TRUE); // Flush output
            return;
        }

        // Once we have the code we need to look-up it's contents
        if (code >= nextCode) {
            stack = CODE
;
            x = oldCode;
        } else x = code;

        // Produce a stack of codes and sub-codes
        while (x > 255) {
            x -= 256;
            stack = (stack = []) + stack + 
                llList2List(dictionary, x, x);
            x  = llList2Integer(prefixes, x);
        }
        stack = (stack = []) + stack + [x];

        // Now we output the contents of the stack as the original 
        // characters
        x = stack != [];
        char = llList2Integer(stack, -1);
        do  
            lslLZWPutOutput(
                llList2Integer(stack, --x), 
                8, 
                FALSE
            );
        while (x > 0);
        stack = [];

        if (nextCode < maxCode) {
            prefixes   = (prefixes   = []) + prefixes   + [oldCode];
            dictionary = (dictionary = []) + dictionary + [char];
            ++nextCode;
        }

        oldCode = code;
    }
}

error(integer link, string str, key id) {
    llMessageLinked(
        link,
        LSLLZW_FILTER_REPLY | LSLLZW_COMMAND_ERROR,
        str,
        id
    );
}

default {    
    link_message(integer x, integer y, string msg, key id) {        
        // Is the message for us?
        if ((y & LSLLZW_FILTER_MASK) == LSLLZW_FILTER_REQUEST) {
            llResetTime();
            inputTime = outputTime = 0.0;

            lslLZWError = lslLZWInput = "";

            // Refuse overly large messages
            if (llStringLength(msg) > LSLLZW_MAX_SIZE) {
                error(
                    x, 
                    "Maxmimum message length is " + 
                    (string)LSLLZW_MAX_SIZE + 
                    " characters", 
                    id
                );
                return;
            }

            // What type of data do we have?
            integer type = y & LSLLZW_INPUT_TYPE_MASK;
            list data = [];
            if (type == LSLLZW_INPUT_HEX) {
                lslLZWInput = (msg = "") + msg;
                lslLZWPrepareHexInput();
            } else if (type == LSLLZW_INPUT_BASE64) {
                lslLZWInput = (msg = "") + msg;
                lslLZWPrepareBase64Input();
            } else lslLZWError = (msg = "") + "Unsupported input-type";

            // Any errors?
            if (lslLZWError != "") {
                error(x, (lslLZWError = "") + lslLZWError, id);
                return;
            }

            // Set-up requested output type
            integer output = y & LSLLZW_OUTPUT_TYPE_MASK;
            if (output == LSLLZW_OUTPUT_HEX) {
                lslLZWPrepareHexOutput();
                output = LSLLZW_INPUT_HEX;
            } else if (output == LSLLZW_OUTPUT_BASE64) {
                lslLZWPrepareBase64Output();
                output = LSLLZW_INPUT_BASE64;
            } else {
                error(x, "Invalid output type", id);
                return;
            }

            // Now determine mode of operation
            type = y & LSLLZW_COMMAND_MASK;
            if (type == LSLLZW_COMMAND_COMPRESS) 
                lslLZWCompress();
            else if (type == LSLLZW_COMMAND_DECOMPRESS) 
                lslLZWDecompress();
            else lslLZWError = "Unsupported mode";

            // Was mode executed successfully?
            if (lslLZWError != "") {
                error(x, (lslLZWError = "") + lslLZWError, id);
                return;
            }

            if (output == LSLLZW_INPUT_BASE64) 
                lslLZWCompleteBase64Output();

            // Construct reply
            llMessageLinked(
                x,
                LSLLZW_FILTER_REPLY | type | output,
                (lslLZWOutput = "") + lslLZWOutput,
                id
            );
            llOwnerSay("Input time = " + (string)inputTime);
            llOwnerSay("Output time = " + (string)outputTime);
        }
    }
}

Welcome to the Second Life Forums Archive

Library: LZW Compression Engine