Hmm, I've been trying to update this to use a streaming method of reading data, that is; instead of three loops (convert string to bytes -> compress bytes -> convert bytes to string), I would move to use a single loop in which the compression reads characters of the string, and converts them to bytes as it goes. However, this seems to end up significantly slower, around 10 times slower!
If anyone's interested in looking at optimisations then the code follows (quote this post to get it including formatting). I suspect though that there is an inordinate cost to the numerous function calls required, as the difference in speed is huge compared to what I would expect!
The functions I'm concerned about are lslLZWGetInput(), lslLZWPutOutput() and lslLZWOutputCharacters(). They perform buffering and so-on but for some reason cause huge trouble and I'm not sure why, which is annoying after so much time invested to get them to work properly, as it should (in theory) be faster due to a complexity of O(n) opposed to O(3n).
// These variables are used to build communications. Commands are sent as
// combined bits in the integer argument of a link-message, and are
// recovered using masks, you may wish to read about bit-masks before
// editing these values. These are used so the string argument is
// kept free for data only.
//
// Commands take the following form (in hex):
// 0xFFMMIOvv
// Where the letters are:
// F Filter, used to quickly determine if a message is for us.
// C Command; encrypt/decrypt etc.
// I Type of data provided (hex, base64, etc.).
// O Desired type of data to be returned (hex, base64, etc.),
// this is unused in replies as the reply's value for I will
// be the request's value for O.
// v Variable, depends on mode.
// This mask allows the filter byte to be retrieved quickly
integer LSLLZW_FILTER_MASK = 0xFF000000;
// This mask allows the mask byte to be retrieved quickly
integer LSLLZW_COMMAND_MASK = 0x00FF0000;
// This mask allows the input type to be retrieved quickly
integer LSLLZW_INPUT_TYPE_MASK = 0x0000F000;
// This mask allows the output type to be retireved quickly
integer LSLLZW_OUTPUT_TYPE_MASK = 0x00000F00;
// This mask allows the variable to retrieved quickly
integer LSLLZW_VARIABLE_MASK = 0x000000FF;
// How many bits right variable must be shifted
integer LSLLZW_VARIABLE_SHIFT = 0;
// A request
integer LSLLZW_FILTER_REQUEST = 0x83000000;
// A reply
integer LSLLZW_FILTER_REPLY = 0x84000000;
// An error occurred
integer LSLLZW_COMMAND_ERROR = 0x00000000;
// Prime engine with key
integer LSLLZW_COMMAND_COMPRESS = 0x00010000;
// Encrypt message using expanded key
integer LSLLZW_COMMAND_DECOMPRESS = 0x00020000;
// Input type is hex
integer LSLLZW_INPUT_HEX = 0x00000000;
// Input type is base64
integer LSLLZW_INPUT_BASE64 = 0x00001000;
// Output type is hex
integer LSLLZW_OUTPUT_HEX = 0x00000000;
// Output type is base64
integer LSLLZW_OUTPUT_BASE64 = 0x00000100;
// Refuse any data longer than this many characters
integer LSLLZW_MAX_SIZE = 5120;
integer LSLLZW_CODE_WIDTH = 12; // Maximum code-width, restricts table
integer LSLLZW_MIN_DATA_SIZE = 32; // Data must be at least this many
// bits long or compression is
// pointless.
integer LSLLZW_BUFFER = 32;
string LSLLZW_HEX_CHARS = "0123456789abcdef";
string LSLLZW_BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
string lslLZWInputAlphabet = LSLLZW_BASE64_CHARS;
integer lslLZWInputCharWidth = 6;
string lslLZWOutputAlphabet = LSLLZW_BASE64_CHARS;
integer lslLZWOutputCharWidth = 6;
string lslLZWError = "";
string lslLZWInput = "";
integer lslLZWInputLength = 0;
integer lslLZWInputIndex = 0;
integer lslLZWInputBuffer = 0;
integer lslLZWInputBufferPos = 0;
string lslLZWOutput = "";
integer lslLZWOutputBuffer = 0;
integer lslLZWOutputBufferPos = 0;
string lslLZWOutputPart = "";
integer lslLZWOutputPartLength = 0;
// Load value as integers from a base64 string
lslLZWPrepareBase64Input() {
integer x = llSubStringIndex(lslLZWInput, "=");
if (x > 0)
lslLZWInput = llDeleteSubString(
(lslLZWInput = "") + lslLZWInput,
x,
-1
);
lslLZWInputAlphabet = LSLLZW_BASE64_CHARS;
lslLZWInputCharWidth = 6;
lslLZWPrepareInput();
}
// Load value as integers from a hex string
lslLZWPrepareHexInput() {
if (llGetSubString(lslLZWInput, 0, 1) == "0x")
lslLZWInput = llDeleteSubString(
(lslLZWInput = "") + lslLZWInput,
0,
1
);
lslLZWInputAlphabet = LSLLZW_HEX_CHARS;
lslLZWInputCharWidth = 4;
lslLZWPrepareInput();
}
// Prepares for input
lslLZWPrepareInput() {
lslLZWInputLength = llStringLength(lslLZWInput);
lslLZWInputIndex = 0;
lslLZWInputBuffer = 0;
lslLZWInputBufferPos = 0;
}
float inputTime = 0.0;
// Retrieves width bits of data from input. Maximum value for width is 31 as
// -1 is reserved for end-of-input
integer lslLZWGetInput(integer width) {
float time = llGetTime();
integer val = 0;
integer result = 0;
integer shift = 0;
integer added = 0;
integer bits = width;
integer t = 0;
do {
if (lslLZWInputBufferPos <= 0) { // Need more data
if (lslLZWInputIndex >= lslLZWInputLength) {
lslLZWInputIndex = lslLZWInputLength = lslLZWInputBuffer = 0;
lslLZWInput = "";
jump break;
}// else if (lslLZWInputIndex >= LSLLZW_BUFFER) {
// // Reduce input-size to save memory and keep
// // manipulations fast.
// lslLZWInput = llDeleteSubString(
// (lslLZWInput = "") + lslLZWInput,
// 0,
// lslLZWInputIndex - 1
// );
// lslLZWInputLength -= lslLZWInputIndex;
// lslLZWInputIndex = 0;
//}
// Pack as many input-characters into 32-bits as possible
lslLZWInputBufferPos = val = 0;
do {
lslLZWInputBufferPos += lslLZWInputCharWidth;
t = llSubStringIndex(
lslLZWInputAlphabet,
llGetSubString(
lslLZWInput,
lslLZWInputIndex,
lslLZWInputIndex
)
);
if (t < 0) {
lslLZWError = "Invalid character in input";
return 0;
} else val = val | (t << (32 - lslLZWInputBufferPos));
} while (
(lslLZWInputBufferPos <= (32 - lslLZWInputCharWidth)) &&
((++lslLZWInputIndex) < lslLZWInputLength)
);
lslLZWInputBuffer = (val >> (32 - lslLZWInputBufferPos));
}
shift = bits - lslLZWInputBufferPos;
added = lslLZWInputBufferPos;
if (shift >= 0)
result = result | (lslLZWInputBuffer << shift);
else {
added += shift;
result = result | ((lslLZWInputBuffer >> -shift) &
~(0xFFFFFFFF << added));
}
bits -= added;
lslLZWInputBufferPos -= added;
} while (bits > 0);
@break;
if ((!result) && (lslLZWInputIndex >= lslLZWInputLength) &&
(lslLZWInputBufferPos <= 0)) return -1; // end of input
inputTime += llGetTime() - time;
return result & ~(0xFFFFFFFF << width);
}
// Returns a hex string representing integers b, preceeded with "0x".
lslLZWPrepareHexOutput() {
lslLZWOutputAlphabet = LSLLZW_HEX_CHARS;
lslLZWOutputCharWidth = 4;
lslLZWPrepareOutput();
}
// Returns a base64 string representing integers b, with padding equals signs
lslLZWPrepareBase64Output() {
lslLZWOutputAlphabet = LSLLZW_BASE64_CHARS;
lslLZWOutputCharWidth = 6;
lslLZWPrepareOutput();
}
// Used to add padding to base64 string
lslLZWCompleteBase64Output() {
integer l = llStringLength(lslLZWOutput) % 4;
if (l) {
if (l == 2)
lslLZWOutput = (lslLZWOutput = "") + lslLZWOutput + "==";
else lslLZWOutput = (lslLZWOutput = "") + lslLZWOutput + "=";
}
}
// Prepare for output
lslLZWPrepareOutput() {
lslLZWOutput = lslLZWOutputPart = "";
lslLZWOutputBuffer = lslLZWOutputPartLength = 0;
lslLZWOutputBufferPos = 32;
}
float outputTime = 0.0;
// Outputs width bits of data to the output string
lslLZWPutOutput(integer data, integer width, integer final) {
float time = llGetTime();
string s = "";
if (!width) jump flush;
integer shift = 0;
integer added = 0;
// Place output into buffer
do {
shift = lslLZWOutputBufferPos - width;
added = width;
if (shift >= 0)
lslLZWOutputBuffer = lslLZWOutputBuffer | (data << shift);
else {
added += shift;
lslLZWOutputBuffer = lslLZWOutputBuffer | ((data >> -shift) &
~(0xFFFFFFFF << added));
}
lslLZWOutputBufferPos -= added;
width -= added;
if (lslLZWOutputBufferPos < width)
s = (s = "") + s + lslLZWOutputChars();
} while (width > 0);
@flush;
if (final && (lslLZWOutputBufferPos < 32))
s = (s = "") + s + lslLZWOutputChars();
lslLZWOutputPartLength += llStringLength(s);
lslLZWOutputPart = (s = lslLZWOutputPart = "") + lslLZWOutputPart + s;
if (final || (lslLZWOutputPartLength >= LSLLZW_BUFFER)) {
lslLZWOutput = (lslLZWOutputPart = lslLZWOutput = "") +
lslLZWOutput + lslLZWOutputPart;
lslLZWOutputPartLength = 0;
}
outputTime += llGetTime() - time;
}
string lslLZWOutputChars() {
integer mask = ~(0xFFFFFFFF << lslLZWOutputCharWidth);
string s = "";
// Output characters
integer v = 0;
integer c = lslLZWOutputBufferPos;
integer l = (lslLZWOutputBufferPos + lslLZWOutputCharWidth);
lslLZWOutputBufferPos = 32;
do {
lslLZWOutputBufferPos -= lslLZWOutputCharWidth;
v = (lslLZWOutputBuffer >> lslLZWOutputBufferPos) & mask;
s = (s = "") + s + llGetSubString(lslLZWOutputAlphabet, v, v);
} while (lslLZWOutputBufferPos >= l);
l = lslLZWOutputBufferPos - c;
if (lslLZWOutputBufferPos > 0) {
lslLZWOutputBufferPos = 32 - l;
lslLZWOutputBuffer = (lslLZWOutputBuffer << lslLZWOutputBufferPos);
} else {
lslLZWOutputBufferPos = 32;
lslLZWOutputBuffer = 0;
}
return (s = "") + s;
}
// Compresses the binary data using LZW compression
lslLZWCompress() {
if (lslLZWInputLength <= LSLLZW_MIN_DATA_SIZE) {
lslLZWError = "Data too short! Must be at least " +
(string)LSLLZW_MIN_DATA_SIZE + " characters long to compress";
return;
}
// Set-up dictionary
list dictionary = [];
integer nextCode = 256; // 0-255 are the basic codes
integer maxCode = (1 << LSLLZW_CODE_WIDTH) - 1;
// Process characters
integer code = lslLZWGetInput(8);
if ((code == -1) || (lslLZWError != "")) return;
integer char = 0;
integer hash = 0;
integer x = 0;
while (~(char = lslLZWGetInput(8))) {
if (lslLZWError != "") return;
// Create a hash and look-it up
hash = (code << 8) | char;
if ((x = llListFindList(dictionary, [hash])) >= 0)
code = 256 + x; // Entry exists, update code
else { // Need to create a new entry
if (nextCode < maxCode) {
dictionary = (dictionary = []) + dictionary + [hash];
++nextCode;
}
// Output the currently held code
lslLZWPutOutput(code, LSLLZW_CODE_WIDTH, FALSE);
code = char;
}
}
// Output final code(s)
lslLZWPutOutput(code, LSLLZW_CODE_WIDTH, FALSE);
lslLZWPutOutput(maxCode, LSLLZW_CODE_WIDTH, TRUE);
}
lslLZWDecompress() {
// Set-up dictionary
list dictionary = [];
list prefixes = [];
integer nextCode = 256; // 0-255 are the basic codes
integer maxCode = (1 << LSLLZW_CODE_WIDTH) - 1;
// Process characters
integer oldCode = lslLZWGetInput(LSLLZW_CODE_WIDTH);
if ((oldCode == -1) || (lslLZWError != "")) return;
lslLZWPutOutput(oldCode, 8, FALSE);
integer code = 0;
integer char = oldCode;
list stack = [];
integer x = 0;
while (~(code = lslLZWGetInput(LSLLZW_CODE_WIDTH))) {
if (lslLZWError != "") return;
else if (code == maxCode) {
lslLZWPutOutput(0, 0, TRUE); // Flush output
return;
}
// Once we have the code we need to look-up it's contents
if (code >= nextCode) {
stack = ;
x = oldCode;
} else x = code;
// Produce a stack of codes and sub-codes
while (x > 255) {
x -= 256;
stack = (stack = []) + stack +
llList2List(dictionary, x, x);
x = llList2Integer(prefixes, x);
}
stack = (stack = []) + stack + [x];
// Now we output the contents of the stack as the original
// characters
x = stack != [];
char = llList2Integer(stack, -1);
do
lslLZWPutOutput(
llList2Integer(stack, --x),
8,
FALSE
);
while (x > 0);
stack = [];
if (nextCode < maxCode) {
prefixes = (prefixes = []) + prefixes + [oldCode];
dictionary = (dictionary = []) + dictionary + [char];
++nextCode;
}
oldCode = code;
}
}
error(integer link, string str, key id) {
llMessageLinked(
link,
LSLLZW_FILTER_REPLY | LSLLZW_COMMAND_ERROR,
str,
id
);
}
default {
link_message(integer x, integer y, string msg, key id) {
// Is the message for us?
if ((y & LSLLZW_FILTER_MASK) == LSLLZW_FILTER_REQUEST) {
llResetTime();
inputTime = outputTime = 0.0;
lslLZWError = lslLZWInput = "";
// Refuse overly large messages
if (llStringLength(msg) > LSLLZW_MAX_SIZE) {
error(
x,
"Maxmimum message length is " +
(string)LSLLZW_MAX_SIZE +
" characters",
id
);
return;
}
// What type of data do we have?
integer type = y & LSLLZW_INPUT_TYPE_MASK;
list data = [];
if (type == LSLLZW_INPUT_HEX) {
lslLZWInput = (msg = "") + msg;
lslLZWPrepareHexInput();
} else if (type == LSLLZW_INPUT_BASE64) {
lslLZWInput = (msg = "") + msg;
lslLZWPrepareBase64Input();
} else lslLZWError = (msg = "") + "Unsupported input-type";
// Any errors?
if (lslLZWError != "") {
error(x, (lslLZWError = "") + lslLZWError, id);
return;
}
// Set-up requested output type
integer output = y & LSLLZW_OUTPUT_TYPE_MASK;
if (output == LSLLZW_OUTPUT_HEX) {
lslLZWPrepareHexOutput();
output = LSLLZW_INPUT_HEX;
} else if (output == LSLLZW_OUTPUT_BASE64) {
lslLZWPrepareBase64Output();
output = LSLLZW_INPUT_BASE64;
} else {
error(x, "Invalid output type", id);
return;
}
// Now determine mode of operation
type = y & LSLLZW_COMMAND_MASK;
if (type == LSLLZW_COMMAND_COMPRESS)
lslLZWCompress();
else if (type == LSLLZW_COMMAND_DECOMPRESS)
lslLZWDecompress();
else lslLZWError = "Unsupported mode";
// Was mode executed successfully?
if (lslLZWError != "") {
error(x, (lslLZWError = "") + lslLZWError, id);
return;
}
if (output == LSLLZW_INPUT_BASE64)
lslLZWCompleteBase64Output();
// Construct reply
llMessageLinked(
x,
LSLLZW_FILTER_REPLY | type | output,
(lslLZWOutput = "") + lslLZWOutput,
id
);
llOwnerSay("Input time = " + (string)inputTime);
llOwnerSay("Output time = " + (string)outputTime);
}
}
}