Christopher Omega
Oxymoron
Join date: 28 Mar 2003
Posts: 1,828
|
03-12-2005 09:49
Konnichiwa minnasama!  Some have expressed interest in my current project, an XML Parser. Its still not finished, I have some major bugs to hammer out of it, but here's what I have so far: EDIT: GAH! The forum wont let me post messages over 20k characters. The script is about 20738 characters. I'll split it in two, and post it to the next two replies. Note: It still has debug code in it. Lemme know what you think  ==Chris
|
Christopher Omega
Oxymoron
Join date: 28 Mar 2003
Posts: 1,828
|
Part 1
03-12-2005 10:06
// XML Parsing Module // Written by Christopher Omega
// When triggered by a linked message, // this script parses the file or string specified // and triggers various xml_* link messages // depending upon what text it incounters. // Currently, it can parse only one thing at a time, // so it queues requests.
// ====== The Interface ====== // ========== For method invocation ========== string PARAMETER_SEPERATOR = "|_|"; callMethod(integer callId, string methodName, list parameters) { llSay(0, "/me - " + llGetScriptName() + ": " + methodName + "(" + llList2CSV(parameters) + ")"); //llMessageLinked(llGetLinkNumber(), callId, // llDumpList2String(parameters, PARAMETER_SEPERATOR), methodName); }
returnValue(string methodName, integer methodIdentifyer, list value) { llMessageLinked(llGetLinkNumber(), methodIdentifyer, llDumpList2String(value, PARAMETER_SEPERATOR), methodName + "_ret"); }
string LIST_SEPERATOR = "|~|"; string encodeList(list src) { return llDumpList2String(src, LIST_SEPERATOR); }
list decodeList(string src) { return llParseStringKeepNulls(src, [LIST_SEPERATOR], []); } // =============================================
// Triggered when the parser finishes reading the file. trigger_endDocument(integer requestId) { callMethod(0, "xml_endDocument", [requestId]); }
// Triggered when the parser first starts reading the file. trigger_startDocument(integer requestId) { callMethod(0, "xml_startDocument", [requestId]); }
// Triggered when the parser encounters a XML element (tag) trigger_startElement(integer requestId, string name, list attribNames, list attribVals) { callMethod(0, "xml_startElement", [requestId, name, encodeList(attribNames), encodeList(attribVals)]); }
// Triggered when the parser encounters an end XML element (tag) trigger_endElement(integer requestId, string name) { callMethod(0, "xml_endElement", [requestId, name]); }
// Triggered when the parser encounters character data. (data not inside a tag) trigger_characters(integer requestId, string characters) { callMethod(0, "xml_characters", [requestId, characters]); }
// Triggered when the parser encounters an error that requires it to // stop parsing. trigger_fatalError(integer requestId, integer lineNumber, integer errorConst, string details) { callMethod(0, "xml_fatalError", [requestId, lineNumber, errorConst, details]); }
// Triggered when the parser encounters an error that probably // invalidates the rest of the data in the document. trigger_error(integer requestId, integer lineNumber, integer errorConst, string details) { callMethod(0, "xml_error", [requestId, lineNumber, errorConst, details]); }
// Triggered when the parser encounters a recoverable flaw // in the document, which most likely doesn't invalidate the data // in the document. trigger_warning(integer requestId, integer lineNumber, integer errorConst, string details) { callMethod(0, "xml_warning", [requestId, lineNumber, errorConst, details]); }
// ====== Library/Utility Functions: ====== integer subStringFirstIndex(string src, string pattern, integer start) { if (start > 0) src = llDeleteSubString(src, 0, start - 1); integer index = llSubStringIndex(src, pattern); if (index != -1) { return index + start; } else { return -1; } }
// Returns the larger value. integer max(integer a, integer b) { if (a > b) return a; return b; }
// Kudos to Jeffery Gomez for these two parseString* functions :-D list parseStringKeepNulls(string src, list separators, list spacers) { integer sep_num = llGetListLength(separators); integer spa_num = llGetListLength(spacers); string separator; if (sep_num > 0) { separator = llList2String(separators, 0); } else { // Generate a random string: do { separator = (string) llFrand(1822901); } while (llSubStringIndex(src, separator) != -1); } integer top = max(sep_num, spa_num); integer i; integer j; integer k; for(i = 0; i < top; i += 8) { j = i + 8; k = j; if(j > sep_num) j = sep_num - 1; if(k > spa_num) k = spa_num - 1; list test = llParseStringKeepNulls(src, llList2List(separators, i, j), llList2List(spacers, i, k)); src = llDumpList2String(test, separator); } return llParseStringKeepNulls(src, [separator], []); }
list parseString2List(string src, list separators, list spacers) { integer sep_num = llGetListLength(separators); integer spa_num = llGetListLength(spacers); string separator; if (sep_num > 0) { separator = llList2String(separators, 0); } else { // Generate a random string: do { separator = (string) llFrand(1822901); } while (llSubStringIndex(src, separator) != -1); } integer top = max(sep_num, spa_num); integer i; integer j; integer k; for(i = 0; i < top; i += 8) { j = i + 8; k = j; if(j > sep_num) j = sep_num - 1; if(k > spa_num) k = spa_num - 1; list test = llParseString2List(src, llList2List(separators, i, j), llList2List(spacers, i, k)); src = llDumpList2String(test, separator); } return llParseString2List(src, [separator], []); }
// Returns the string, parsed using spacers and seperators // leaving elements surrounded by the quote character intact. integer unclosedQuote = FALSE; list parseQuotesIntact(string src, list seperators, list spacers, string quoteChar, integer keepNulls) { list ret; list delimeters = seperators + spacers; list quoteEnders = [quoteChar]; list quoteStarters; integer i; integer len = llGetListLength(delimeters); // This function only specifies text as quoted if // the quote character comes after a spacer/seperator. for (i = 0; i < len; ++i) { string spacer = llList2String(delimeters, i); quoteStarters += (spacer + quoteChar); quoteEnders += (quoteChar + spacer); } delimeters += quoteChar; llSay(0, "delimeters = \"" + llDumpList2String(delimeters, "|") + "\""); list parsedData; if (keepNulls) { parsedData = parseStringKeepNulls(src, [], quoteEnders + quoteStarters + delimeters); } else { parsedData = parseString2List(src, [], quoteEnders + quoteStarters + delimeters); } llSay(0, "parsedData = " + llDumpList2String(parsedData, "|")); string betweenQuotes; integer withinQuote = FALSE; len = llGetListLength(parsedData); for (i = 0; i < len; ++i) { string element = llList2String(parsedData, i); if (llListFindList(quoteEnders, [element]) != -1 && withinQuote) { ret += betweenQuotes; betweenQuotes = ""; withinQuote = FALSE; } else if (llListFindList(quoteStarters, [element]) != -1 && !withinQuote) { withinQuote = TRUE; } else { if (withinQuote) { betweenQuotes += element; } else { // Dont add it if its a seperator. if (llListFindList(seperators, [element]) == -1) { ret += element; } } } } // If the string ended still inside a quote, then // the quote is unclosed. unclosedQuote = withinQuote; return ret; }
// returns TRUE if the list only consists of // the value specified integer isListFullOf(list src, list element) { element = llList2List(element, 0, 0); integer ret = TRUE; src = llListSort(src, 1, TRUE); ret = ret && llListFindList(src, element) == 0; src = llListSort(src, 1, FALSE); ret = ret && llListFindList(src, element) == 0; return ret; }
// Replaces the element(s) at the specified index. string NULL = ""; list replaceListSlice(list dest, list src, integer start) { if (llGetListEntryType(dest, start - 1) == TYPE_INVALID) { integer len; for (len = llGetListLength(dest); len < start; len++) { dest += NULL; } } integer srcLen = llGetListLength(src); return llListInsertList(llDeleteSubList(dest, start, start + srcLen - 1), src, start); }
// A list2ListStrided function that obeys its start parameter. list list2ListStrided(list src, integer start, integer stride) { if (start > 0) src = llDeleteSubList(src, 0, start - 1); return llList2ListStrided(src, 0, -1, stride); }
// Returns TRUE if str starts with prefix, mostly for readability. integer strStartsWith(string str, string prefix) { return llSubStringIndex(str, prefix) == 0; }
// Replaces all instances of toReplace in str with replaceWith. string replaceAll(string str, string toReplace, string replaceWith) { return llDumpList2String(llParseStringKeepNulls(str, [toReplace], []), replaceWith); }
// ====== Functions specific to this purpose: ====== // Converts all entities in unconverted to literals. string convertEntities(string unconverted) { string converted = unconverted; integer numEntities = llGetListLength(ENTITIES); integer i; for (i = 0; i < numEntities; i++) { converted = replaceAll(converted, llList2String(ENTITIES, i), llList2String(LITERALS, i)); } return converted; }
// Parses the data contained within the tagData global // returns TRUE if parsing successful (no fatalErrors triggered) integer dumpTagData() { llSay(0, "dumpTagData called."); list parsedTag = parseQuotesIntact(tagData, [" "], ["=", "/"], "\"", FALSE); llSay(0, "parsedTag = \"" + llDumpList2String(parsedTag, "|") + "\""); string tagName = llList2String(parsedTag, 0); if (tagName == "/") { // This is an end tag. tagName = llList2String(parsedTag, 1); trigger_endElement(curId, tagName); } else { integer selfContained = llList2String(parsedTag, -1) == "/"; // if it has a / at the end. list attribData = llDeleteSubList(parsedTag, 0, 0); if (selfContained) attribData = llDeleteSubList(attribData, -1, -1); // Remove the / at the end. list attribNames = list2ListStrided(attribData, 0, 2); // Every other element, starting at the first. list attribEquals = list2ListStrided(attribData, 1, 2); // Every other element, starting at the second. list attribValues = list2ListStrided(attribData, 2, 2); // You get the picture. integer numVals = llGetListLength(attribValues); // Check each list for consistancy; all should be the same length // and attribEquals should be filled with = only. if (numVals != llGetListLength(attribNames) || numVals != llGetListLength(attribEquals) || !(isListFullOf(attribEquals, ["="]) || numVals == 0)) { trigger_error(curId, curLine, PARSING, "Attributes for " + tagName + " are defined incorrectly."); } llSay(0, "numVals = " + (string) numVals); llSay(0, "attribNames len = " + (string) llGetListLength(attribNames)); llSay(0, "attribEquals len = " + (string) llGetListLength(attribEquals)); llSay(0, "attribNames = \"" + llDumpList2String(attribNames, "|") + "\""); // Convert the entities in each value: integer i; for (i = 0; i < numVals; i++) { string value = llList2String(attribValues, i); value = convertEntities(value); attribValues = replaceListSlice(attribValues, [value], i); } trigger_startElement(curId, tagName, attribNames, attribValues); if (selfContained) { trigger_endElement(curId, tagName); } } tagData = ""; return TRUE; }
// Parses the data contained within the charData global // returns TRUE if parsing successful (no fatalErrors triggered) integer dumpCharData() { if (charData == "") return TRUE; trigger_characters(curId, convertEntities(charData)); charData = ""; return TRUE; }
|
Christopher Omega
Oxymoron
Join date: 28 Mar 2003
Posts: 1,828
|
Part 2
03-12-2005 10:07
// Parses arbitrary XML text data. // May be stack-intensive, uses recursion liberally, // returns TRUE if parsing successful (no fatalErrors triggered). integer parseLine(string lineData) { llSay(0, "parseLine called, lineData == \"" + lineData + "\""); llSay(0, "tagData = \"" + tagData + "\""); llSay(0, "charData = \"" + charData + "\""); //if (lineData == "") //return TRUE; if (!withinTag) { if (strStartsWith(lineData, "<")) { withinTag = TRUE; return parseLine(llDeleteSubString(lineData, 0, 0)); } else if (strStartsWith(lineData, ">")) { // Yikes, this shouldn't happen. trigger_error(curId, curLine, PARSING, "Element ended without beginning."); // Try skipping over it return parseLine(llDeleteSubString(lineData, 0, 0)); } else { // Handle character data: integer tagPosition = llSubStringIndex(lineData, "<"); if (tagPosition == -1) { charData += lineData + "\n"; } else { charData += llGetSubString(lineData, 0, tagPosition - 1); if (!dumpCharData()) return FALSE; // fatalError occured in dumpCharData. return parseLine(llDeleteSubString(lineData, 0, tagPosition - 1)); } } } else { if (strStartsWith(lineData, "<")) { // Yikes, this shouldn't happen. trigger_error(curId, curLine, PARSING, "Element cannot be defined within another element."); // Try skipping over it. return parseLine(llDeleteSubString(lineData, 0, 0)); } else if (strStartsWith(lineData, ">")) { llSay(0, "Tag ended!"); if (!dumpTagData()) return FALSE; // fatalError occured in dumpTagData. withinTag = FALSE; return parseLine(llDeleteSubString(lineData, 0, 0)); } else { // Handle additional tag data: integer endPosition = llSubStringIndex(lineData, ">"); if (endPosition == -1) { tagData += lineData + "\n"; } else { tagData += llGetSubString(lineData, 0, endPosition - 1); return parseLine(llDeleteSubString(lineData, 0, endPosition - 1)); } } } return TRUE; }
// Initilizes the first queued parseXMLFile call, // prepares it for processing and removes it from queue. integer popRequest() { if (llGetListEntryType(requestSourceQueue, 0) == TYPE_INVALID) return FALSE; // Clear document variables: tagData = charData = ""; withinTag = FALSE; // Get next request data. curSource = llList2String(requestSourceQueue, 0); curId = llList2Integer(requestIdQueue, 0); curType = llList2Integer(requestTypeQueue, 0); // Remove next request from queues. requestSourceQueue = llDeleteSubList(requestSourceQueue, 0, 0); requestIdQueue = llDeleteSubList(requestIdQueue, 0, 0); requestTypeQueue = llDeleteSubList(requestTypeQueue, 0, 0); if (curType == REQUEST_FILE_READ) { // Data is a filename. curLine = 0; curQuery = llGetNotecardLine(curSource, curLine); } else { // Data is actual XML text. curLine = -1; parseLine(curSource); // TODO: Handle EOF stuff. return popRequest(); } return TRUE; }
// Pushes a new call to parseXMLFile onto the queue. pushRequest(string data, integer type, integer id) { requestSourceQueue += data; requestTypeQueue += type; requestIdQueue += id; }
// Global constants: // XML Entities and Literals: list ENTITIES = ["<", ">", "&", "&apos", """]; list LITERALS = ["<", ">", "&", "'", "\""]; // Error constants, passed to fatalError, error and warning: integer PARSING = 1; // Parsing error occured. integer FILE_NOT_FOUND = 2; // The notecard isnt in objects inv. // Request types integer REQUEST_FILE_READ = 1; integer REQUEST_STRING_READ = 2; // Global variables: // Queues, used to queue calls to parseXMLFile, as the script // can only be handling one at a time. list requestTypeQueue; list requestSourceQueue; list requestIdQueue; // Info on request currently being processed in the parsing state. string curSource; integer curId; integer curLine; integer curType; key curQuery; // Dataserver request key. // Contains intermittant tag and character data; // data on the tag/characters currently being parsed. // Used by parseLine/dumpTagData/dumpCharData functions. string tagData; string charData; integer withinTag = FALSE; default { link_message(integer sender, integer num, string parameters, key methodName) { if (methodName == "parseXMLFile") { list paramList = llParseStringKeepNulls(parameters, [PARAMETER_SEPERATOR], []); // Method signature: // parseXMLFile(integer requestId, string fileName) integer requestId = (integer) llList2String(paramList, 0); string fileName = llList2String(paramList, 1); if (llGetInventoryKey(fileName) != NULL_KEY) { pushRequest(fileName, REQUEST_FILE_READ, requestId); state parsing; } else { trigger_fatalError(requestId, -1, FILE_NOT_FOUND, fileName + " is not accessible"); } } else if (methodName == "parseXMLString") { list paramList = llParseStringKeepNulls(parameters, [PARAMETER_SEPERATOR], []); // Method signature: // parseXMLString(integer requestId, string str) integer requestId = (integer) llList2String(paramList, 0); string str = llList2String(paramList, 1); pushRequest(str, REQUEST_STRING_READ, requestId); state parsing; } } }
state parsing { state_entry() { // If there aren't any more queued requests: if (!popRequest()) state default; } dataserver(key query, string data) { if (query == curQuery) { curQuery = ""; if (data != EOF) { // If parsing successful: if (parseLine(data + "\n")) { // Request the next line: curQuery = llGetNotecardLine(curSource, ++curLine); } else { // Something triggered a fatal error, // jump to the next request. if (!popRequest()) state default; } } else { // TODO: Handle EOF stuff, including // unclosed elements and run-on quotes. } } } link_message(integer sender, integer num, string parameters, key methodName) { if (methodName == "parseXMLFile") { list paramList = llParseStringKeepNulls(parameters, [PARAMETER_SEPERATOR], []); // Method signature: // parseXMLFile(integer requestId, string fileName) integer requestId = (integer) llList2String(paramList, 0); string fileName = llList2String(paramList, 1); if (llGetInventoryKey(fileName) != NULL_KEY) { pushRequest(fileName, REQUEST_FILE_READ, requestId); } else { trigger_fatalError(requestId, -1, FILE_NOT_FOUND, fileName + " is not accessible"); } } else if (methodName == "parseXMLString") { list paramList = llParseStringKeepNulls(parameters, [PARAMETER_SEPERATOR], []); // Method signature: // parseXMLString(integer requestId, string str) integer requestId = (integer) llList2String(paramList, 0); string str = llList2String(paramList, 1); pushRequest(str, REQUEST_STRING_READ, requestId); } } }
|