Welcome to the Second Life Forums Archive

These forums are CLOSED. Please visit the new forums HERE

Pretty fast case-insensitive word filter

Huns Valen
Don't PM me here.
Join date: 3 May 2003
Posts: 2,749
06-23-2005 01:17
Update 8/8/05: Masakazu Kojima optimized some code in filterWords(). It now runs a bit faster.

I decided to write a case-insensitive word filter. The input text can have filtered words in any case (lower, UPPER, mIxEd), and the replacement text will be whatever case you specify. Whitespace doesn't matter, the word to be filtered can be inside another word or by itself. Uncomment the lines in filterWords() if you want to benchmark it. Long sentences that don't have words that need to be filtered will run through it pretty quick. Sentences that have a few filtered words will run slow. With lots of filtered words it will get slower still.

If anyone has ideas for speeding this up without breaking it (i.e. it can't be whitespace dependent or lose case insensitivity) I'd love to see your code.

The following code includes a default state with some example code, so you can just drop it into a script and watch it work.

Input: Yesterday, I verbed a adjective noun into my closet.
Output: Yesterday, I hoisted a really large crate into my closet.

CODE
// This code is released under the terms of the GNU LGPL by Huns Valen.
// You can get a copy here: http://www.gnu.org/licenses/lgpl.txt

// Update 8/8/05: Masakazu Kojima's faster code is used in filterWords().
// It is included with permission.


list wordFilters; // Words will be changed FROM these (case insensitive)...
list substitutions; // into these.
integer nWordFilters; // This is how many word filters we have.

// Use this function to initialize your word filters.
setFilterWords() {
wordFilters = [
"verb",
"adjective",
"noun"
];
substitutions = [
"hoist",
"really large",
"crate"
];
nWordFilters = llGetListLength(wordFilters);
}

// Use this function to do the actual filtering.
string filterWords(string str) {
string lCaseStr = llToLower(str);
integer i;
integer filterWordLen;
integer filterWordPos;
string curFilterWord;

for(i=0; i<nWordFilters; i++) {
curFilterWord = llList2String(wordFilters, i);
filterWordPos = llSubStringIndex(lCaseStr, curFilterWord);
filterWordLen = llStringLength(curFilterWord) - 1;
while(filterWordPos != -1) {
// Huns Valen's original code:
//str = llDeleteSubString(str, filterWordPos, filterWordPos + filterWordLen);
//str = llInsertString(str, filterWordPos, llList2String(substitutions, i));
//lCaseStr = llDeleteSubString(lCaseStr, filterWordPos, filterWordPos + filterWordLen);
//lCaseStr = llInsertString(lCaseStr, filterWordPos, llList2String(substitutions, i));

// Masakazu Kojima's optimized, faster code:
str = llInsertString(
llDeleteSubString(str, filterWordPos, filterWordPos + filterWordLen),
filterWordPos,
llList2String(substitutions, i));
lCaseStr = llToLower(str);

filterWordPos = llSubStringIndex(lCaseStr, curFilterWord);
}
}
return str;
}

// You don't need this function in your production code. It's just here to demonstrate
// how fast the filter can run.
benchmark() {
// Set up benchmark
integer i;
string test;
float tDone;

// Tests on two strings of equal length. One has filtered words, the other doesn't.
string loadedText = "Yesterday, I verbed a adjective noun into my closet.";
string unloadedText = "This sentence does not contain any filtered words...";

// Loaded test
llOwnerSay(
"Beginning loaded sentence benchmark: " + loadedText + " => " +
filterWords(loadedText));
llResetTime();
for(i=0; i<100; i++) {
if(i % 10 == 0)
llOwnerSay((string)i+"/100...");
test = filterWords(loadedText);
}
tDone = llGetTime();
llOwnerSay("Loaded test: 100 iterations in "+(string)tDone+" seconds, avg="+(string)(tDone / 100));

// Unloaded test
llOwnerSay(
"Beginning unloaded sentence benchmark: " + unloadedText + " => " +
filterWords(unloadedText));
llResetTime();
for(i=0; i<100; i++) {
if(i % 10 == 0)
llOwnerSay((string)i+"/100...");
test = filterWords(unloadedText);
}
tDone = llGetTime();
llOwnerSay("Unloaded test: 100 iterations in "+(string)tDone+" seconds, avg="+(string)(tDone / 100));
}

// This state demonstrates how to use the word filter.
default {
state_entry() {
// Initialize the word filter
setFilterWords();

// Demonstrate usage
string filterTest = "Yesterday, I verbed a adjective noun into my closet.";
llOwnerSay("filterWords can change this: " + filterTest);
llOwnerSay("Into this: " + filterWords(filterTest));

// This runs the benchmark
benchmark();

}
}
Nada Epoch
The Librarian
Join date: 4 Nov 2002
Posts: 1,423
Discussion Thread
06-23-2005 14:16
/54/c9/51273/1.html
_____________________
i've got nothing. ;)