12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763 |
- define(["index", "options", "stemmer", "util"], function(index, options, stemmer, util) {
-
-
-
- var useCJKTokenizing = false;
-
- var w = {};
-
- var excluded = [];
-
- var realSearchQuery;
-
- var singleWordExactMatch = false;
-
- var searchInsideFilePath = false;
-
- var booleanSearch = false;
-
- var defaultOperator = "or";
-
- var knownOperators = ["and", "or", "not"];
-
- var stemQueryMap = [];
-
- var resultCategoriesMap = {};
-
- var resultCategoriesCount = 0;
-
- var resultCategoriesMapFiles = [];
- var localNote =
- '<div class="alert alert-warning alert-dismissible fade show" role="alert">'
- + '<strong>WARNING!</strong> Due to security reasons, the Japanese Morphological Analyzer (Kuromoji) '
- + 'is disabled while browsing WebHelp output locally. <a href="#" style="font-size: 0.9em"> [ Read more ]</a>'
- + '</div>';
-
- function TopicInfo(title, relativePath, shortDescription) {
- this.title = title;
- this.relativePath = relativePath;
- this.shortDescription = shortDescription;
- }
-
- function SearchResult(searchExpression, excluded, originalSearchExpression, documents, errorMsg) {
- this.searchExpression = searchExpression;
- this.excluded = excluded;
- this.documents = documents;
- this.originalSearchExpression = originalSearchExpression;
- this.error = errorMsg;
- }
-
- function DocumentInfo(topicID, relativePath, title, shortDescription, words, scoring, breadcrumb) {
- this.topicID = topicID;
- this.relativePath = relativePath;
- this.title = title;
- this.shortDescription = shortDescription;
- this.words = words;
- this.scoring = scoring;
- this.breadcrumb = breadcrumb;
- }
- function performSearchDriver(searchQuery, _callback) {
- var indexerLanguage = options.getIndexerLanguage();
- var useKuromoji = indexerLanguage.indexOf("ja") != -1 && options.getBoolean('webhelp.enable.search.kuromoji.js')
- && !util.isLocal();
- if (indexerLanguage.indexOf("ja") != -1 && util.isLocal() && options.getBoolean('webhelp.enable.search.kuromoji.js')) {
- var note = $('<div/>').addClass('col-xs-12 col-sm-12 col-md-12 col-lg-12')
- .html(localNote);
- $('#searchResults').before(note);
- }
- if (useKuromoji) {
- require(["kuromoji"], function (kuromoji) {
- kuromoji.builder({ dicPath: "oxygen-webhelp/lib/kuromoji/dict" }).build(function (err, tokenizer) {
-
- var tokens = tokenizer.tokenize(searchQuery);
- var finalWordsList = [];
- for (var w in tokens) {
- var word = tokens[w].surface_form;
- if (word!=" ") {
- finalWordsList.push(word);
- }
- }
- if (finalWordsList.length) {
- var finalWordsString = finalWordsList.join(" ");
- _callback(performSearchInternal(finalWordsString));
- } else {
- util.debug("Empty set");
- }
- });
- })
- } else {
- _callback(performSearchInternal(searchQuery));
- }
- }
-
- function performSearchInternal(searchQuery) {
- util.debug("searchQuery", searchQuery);
- init();
- var initialSearchExpression = searchQuery;
- var phraseSearch = false;
- searchQuery = searchQuery.trim();
- if (searchQuery.length > 2 && !useCJKTokenizing) {
- var firstChar = searchQuery.charAt(0);
- var lastChar = searchQuery.charAt(searchQuery.length - 1);
- phraseSearch =
- (firstChar == "'" || firstChar == '"') &&
- (lastChar == "'" || lastChar == '"');
- }
-
- searchQuery = searchQuery.replace(/"/g, " ").replace(/'/g, " ");
- var errorMsg;
- try {
- realSearchQuery = preprocessSearchQuery(searchQuery, phraseSearch);
- } catch (e) {
- errorMsg = e.message;
- util.debug(e);
- }
- util.debug("Search query after pre-process: ", realSearchQuery);
- if (realSearchQuery.trim().length != 0) {
-
- searchQuery = normalizeQuery(realSearchQuery);
- var searchWordCount = 1;
- if (!useCJKTokenizing) {
- var sw = searchQuery.split(" ");
- searchWordCount = sw.length;
- singleWordExactMatch = phraseSearch && searchWordCount == 1;
- if (!singleWordExactMatch && !phraseSearch) {
- searchInsideFilePath = isURLorFilePath(realSearchQuery);
- }
- }
-
- var rpnExpression = convertToRPNExpression(searchQuery);
-
- var res = calculateRPN(rpnExpression);
- var sRes = res.value;
- if (searchWordCount == 1) {
-
- var doStem = options.getBoolean('use.stemming');
- if (!singleWordExactMatch && !doStem && !useCJKTokenizing) {
-
- singleWordExactMatch = true;
- var exactMatchRes = calculateRPN(rpnExpression);
- addSearchResultCategory(exactMatchRes.value);
-
- addSearchResultCategory(sRes);
- } else {
- addSearchResultCategory(sRes);
- }
- } else {
- if (phraseSearch) {
- sRes = filterResultsForPhraseSearch(res.value, realSearchQuery);
- addSearchResultCategory(sRes);
- } else if (booleanSearch) {
- groupResultsByWordCount(sRes);
- } else {
-
- var phraseSearchResult =
- filterResultsForPhraseSearch(res.value, realSearchQuery);
- addSearchResultCategory(phraseSearchResult);
- groupResultsByWordCount(sRes);
- }
- }
- sRes = sortSearchResults();
- var docInfos = [];
- for (var i = 0; i < sRes.length; i++) {
- var cDoc = sRes[i];
-
- var topicInfoString = index.fil[cDoc.filenb];
- var topicInfo = computeTopicInfo(topicInfoString);
- if (topicInfo == null) {
- warn("There is no definition for topic with ID ", cDoc.filenb);
- continue;
- }
- var wordsStrArray = [];
- for (var k in cDoc.wordsList) {
- wordsStrArray.push(cDoc.wordsList[k].word);
- }
- var breadcrumb = computeBreadcrumbTopicInfos(cDoc.filenb);
- var docInfo =
- new DocumentInfo(
- cDoc.filenb,
- topicInfo.relativePath,
- topicInfo.title,
- topicInfo.shortDescription,
- wordsStrArray,
- cDoc.scoring,
- breadcrumb);
- docInfos.push(docInfo);
- }
- }
-
- initialSearchExpression = filterOriginalSearchExpression(initialSearchExpression);
- var searchResult = new SearchResult(realSearchQuery, excluded, initialSearchExpression, docInfos, errorMsg);
- return searchResult;
- }
-
- function computeTopicInfo(topicInfoString) {
- if (topicInfoString === undefined) {
- return null;
- }
- var pos1 = topicInfoString.indexOf("@@@");
- var pos2 = topicInfoString.lastIndexOf("@@@");
- var relPath = topicInfoString.substring(0, pos1);
-
-
- var topicTitle = topicInfoString.substring(pos1 + 3, pos2)
- .replace(/</g, "<").replace(/>/g, ">");
- var topicShortDesc = topicInfoString.substring(pos2 + 3, topicInfoString.length);
-
- return new TopicInfo(topicTitle, relPath, topicShortDesc);
- }
-
- function computePath2Root(topicID) {
- var path2Root = [];
- var parentTopicID = index.link2parent[topicID];
- while (parentTopicID !== undefined && parentTopicID !== -1) {
- path2Root.unshift(parentTopicID);
- parentTopicID = index.link2parent[parentTopicID];
- }
- return path2Root;
- }
-
- function computeBreadcrumbTopicInfos(topicIndex) {
- var path2Root = computePath2Root(topicIndex);
- var breadcrumbPaths = [];
- for (var i = 0; i < path2Root.length; i++) {
- var topicInfoString = index.fil[path2Root[i]];
- var topicInfo = computeTopicInfo(topicInfoString);
- if (topicInfo !== null) {
- breadcrumbPaths.push(topicInfo);
- }
- }
- return breadcrumbPaths;
- }
-
- function init() {
- searchInsideFilePath = false;
- excluded = [];
- realSearchQuery = "";
- singleWordExactMatch = false;
- booleanSearch = false;
- resultCategoriesMap = {};
- resultCategoriesCount = 0;
- resultCategoriesMapFiles = [];
- }
-
- function addSearchResultCategory(searchCategory) {
-
-
- var filteredResults = [];
- for (var si = 0; si < searchCategory.length; si++) {
-
- searchCategory[si].scoring = Math.max(1, searchCategory[si].scoring);
- if (resultCategoriesMapFiles.indexOf(searchCategory[si].filenb) == -1) {
- filteredResults.push(searchCategory[si]);
- resultCategoriesMapFiles.push(searchCategory[si].filenb);
- }
- }
- if (filteredResults.length > 0) {
- resultCategoriesMap[resultCategoriesCount++] = filteredResults;
- }
- }
-
- function scaleSortResultScoring(sortResult) {
- var maxScore = 0;
- for (var i = 0; i < sortResult.length; i++) {
- maxScore = Math.max(maxScore, sortResult[i].scoring);
- }
- if (maxScore != 0) {
- var ratio = 99 / maxScore;
- for (var i = 0; i < sortResult.length; i++) {
- var s = Math.ceil(sortResult[i].scoring * ratio);
- var s = Math.min(99, s);
- sortResult[i].scoring = s;
- }
- }
- }
- function sortSearchResults() {
- var result = [];
- var keys = [];
- for (var prop in resultCategoriesMap) {
- keys.push(prop);
- }
- keys.sort();
- var catNumber = keys.length;
- for (var k = 0; k < keys.length; k++) {
- var r = resultCategoriesMap[k];
- scaleSortResultScoring(r);
- r.sort(function (first, second) {
- return -(first.scoring - second.scoring);
- });
- for (var ri = 0; ri < r.length; ri++) {
- r[ri].scoring = r[ri].scoring + ((catNumber - 1 - k) * 100);
- }
- result = result.concat(r);
- }
-
- return result;
- }
-
- function filterResultsForPhraseSearch(resPerFileArray, realSearchQuery) {
- var searchWords = realSearchQuery.split(" ");
- var doStem = options.getBoolean('use.stemming');
- var fResult = [];
-
- for (var i = 0; i < resPerFileArray.length; i++) {
-
- if (searchWords.length == resPerFileArray[i].wordsList.length) {
-
- var sameWords = true;
- for (var j = 0; j < resPerFileArray[i].wordsList.length; j++) {
- var sj = searchWords[j];
- if (typeof stemmer != "undefined" && doStem) {
- sj = stemmer(sj);
- }
- sj = sj.toLowerCase();
- if (sj != resPerFileArray[i].wordsList[j].word) {
- sameWords = false;
- break;
- }
- }
- if (sameWords) {
-
- var firstWordIndices = resPerFileArray[i].wordsList[0].indices;
- for (var fi in firstWordIndices) {
- var cidx = parseInt(firstWordIndices[fi], 32);
- if (cidx == -1) {
- continue;
- }
- var consecutiveIndices = true;
-
- for (var ii = 1; ii < resPerFileArray[i].wordsList.length; ii++) {
- var nextIndices = resPerFileArray[i].wordsList[ii].indices;
- var nextIdxFound = false;
- for (var nIdx in nextIndices) {
- var cRes = parseInt(nextIndices[nIdx], 32);
- if (cRes != -1 && cidx == cRes - 1) {
- cidx = cRes;
- nextIdxFound = true;
- break;
- }
- }
- if (!nextIdxFound) {
- consecutiveIndices = false;
- break;
- }
- }
- if (consecutiveIndices) {
- fResult.push(resPerFileArray[i]);
- break;
- }
- }
- }
- }
- }
- return fResult;
- }
-
- function filterOriginalSearchExpression(searchTextField) {
-
- searchTextField = searchTextField.replace(/</g, " ")
- .replace(/>/g, " ")
- .replace(/"/g, " ")
- .replace(/'/g, " ")
- .replace(/=/g, " ")
- .replace(/0\\/g, " ")
- .replace(/\\/g, " ")
- .replace(/\//g, " ")
- .replace(/ +/g, " ");
-
- searchTextField =
- searchTextField.replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");
-
- searchTextField = searchTextField.replace(/ +/g, " ");
- searchTextField = searchTextField.replace(/ $/, "").replace(/^ /, " ");
- return searchTextField;
- }
-
- function preprocessSearchQuery(query, phraseSearch) {
- var searchTextField = trim(query);
-
- var openBracket = [],
- closedBracket = [];
- var idx = 0, oIndex;
- while (query.indexOf("(", idx) !== -1) {
- idx = query.indexOf("(", idx);
- openBracket.push(idx);
- idx++;
- }
- idx = 0;
- while (query.indexOf(")", idx) !== -1) {
- idx = query.indexOf(")", idx);
- closedBracket.push(idx);
- idx++;
- }
- if (openBracket.length != closedBracket.length) {
- throw new Error("Invalid expression!");
- } else {
- while (oIndex = openBracket.shift()) {
- var cIndex = closedBracket.shift();
- if (oIndex > cIndex) {
- throw new Error("Invalid expression!");
- }
- }
- }
-
- searchTextField = searchTextField.replace(/\((\S*)/g, '( $1');
- searchTextField = searchTextField.replace(/\)(\S*)/g, ') $1');
- searchTextField = searchTextField.replace(/(\S*)\)/g, '$1 )');
-
-
- searchTextField = searchTextField.replace(/[,]/g, ' ');
-
- searchTextField = searchTextField.replace(/\s\./g, ' ');
- searchTextField = searchTextField.replace(/\.\s/g, ' ');
-
- searchTextField = searchTextField.replace(/\s!/g, ' ');
- searchTextField = searchTextField.replace(/!\s/g, ' ');
-
- searchTextField = searchTextField.replace(/\s\?/g, ' ');
- searchTextField = searchTextField.replace(/\?\s/g, ' ');
- var expressionInput = searchTextField;
- var wordsArray = [];
- var splitExpression = expressionInput.split(" ");
-
- for (var t in splitExpression) {
- var cw = splitExpression[t].toLowerCase();
- if (cw.trim().length == 0) {
-
- continue;
- }
- var isParenthesis =
- "(" == cw || ")" == cw;
- if (contains(knownOperators, cw)) {
-
- if (phraseSearch) {
- excluded.push(cw);
- } else {
- wordsArray.push(cw);
- }
- } else if (isParenthesis) {
-
- if (phraseSearch) {
- excluded.push(cw);
- } else {
- wordsArray.push(cw);
- }
- } else if (contains(index.stopWords, cw)) {
-
- excluded.push(cw);
- } else {
- wordsArray.push(cw);
- }
- }
- expressionInput = wordsArray.join(" ");
- realSearchQuery = expressionInput;
- return expressionInput.trim();
- }
-
- function groupResultsByWordCount(searchResults) {
- var resultsByWordCount = {};
- for (var sri = 0; sri < searchResults.length; sri++) {
- var csr = searchResults[sri];
- var wc = csr.wordsList.length;
- if (resultsByWordCount[wc] == undefined) {
- resultsByWordCount[wc] = [];
- }
- resultsByWordCount[wc].push(csr);
- }
-
- var keys = [];
- for (var prop in resultsByWordCount) {
- keys.push(prop);
- }
- keys.sort();
-
- for (var k = keys.length - 1; k >= 0; k--) {
- var ck = keys[k];
- addSearchResultCategory(resultsByWordCount[ck]);
- }
- }
-
- function combineOperators(op1, op2) {
- if (op1 == op2) {
- return op1;
- }
- if (op1 == "not" || op2 == "not") {
- return "not";
- }
- if (op1 == "or" || op2 == "or") {
- return "or";
- }
- }
-
- function isKnownOperator(word) {
- return inArray(word, knownOperators);
- }
-
- function normalizeQuery(query) {
- util.debug("normalizeQuery(" + query + ")");
- var toReturn = [];
-
- query = query.toLowerCase().trim();
-
-
-
- query = query.replace(/ +/g, ' ');
-
- query = query.replace(/\( /g, '(');
-
- query = query.replace(/ \)/g, ')');
- var queryParts = query.split(" ");
- for (var i = 0; i < queryParts.length; i++) {
-
- var currentWord = queryParts[i];
- if (currentWord == "") {
- continue;
- }
- var knownOperator = isKnownOperator(currentWord);
- booleanSearch = booleanSearch || knownOperator;
- if (toReturn.length == 0) {
-
- if (!knownOperator) {
- toReturn.push(currentWord);
- }
- } else {
-
- if (isKnownOperator(toReturn[toReturn.length - 1]) && knownOperator) {
- toReturn[toReturn.length - 1] = combineOperators(toReturn[toReturn.length - 1], currentWord);
- }
-
- if (!isKnownOperator(toReturn[toReturn.length - 1]) && !knownOperator) {
- toReturn.push(defaultOperator);
- toReturn.push(currentWord);
- }
-
- if (!isKnownOperator(toReturn[toReturn.length - 1]) && knownOperator) {
- toReturn.push(currentWord);
- }
-
- if (isKnownOperator(toReturn[toReturn.length - 1]) && !knownOperator) {
- toReturn.push(currentWord);
- }
- }
- }
-
- for (i = toReturn.length - 1; i >= 0; i--) {
- if (isKnownOperator(toReturn[i])) {
- toReturn.pop();
- } else {
- break;
- }
- }
- return toReturn.join(" ");
- }
-
- function convertToRPNExpression(search) {
- util.debug("convertToRPNExpression(" + search + ")");
- var stringToStore = "";
- var stack = [];
- var item = "";
- var items = [];
- for (var i = 0; i < search.length; i++) {
- if (search[i] != " " && search[i] != "(" && search[i] != ")") {
- item += search[i];
- }
- if (search[i] == " ") {
- if (item != "") {
- items.push(item);
- item = "";
- }
- }
- if (search[i] == "(") {
- if (item != "") {
- items.push(item);
- items.push("(");
- item = "";
- } else {
- items.push("(");
- }
- }
- if (search[i] == ")") {
- if (item != "") {
- items.push(item);
- items.push(")");
- item = "";
- } else {
- items.push(")");
- }
- }
- }
- if (item != "") {
- items.push(item);
- }
- for (i = 0; i < items.length; i++) {
- if (isTerm(items[i])) {
- stringToStore += items[i] + " ";
- }
- if (inArray(items[i], knownOperators)) {
- while (stack.length > 0 && inArray(stack[stack.length - 1], knownOperators)) {
- stringToStore += stack.pop() + " ";
- }
- stack.push(items[i]);
- } else if (items[i] == "(") {
- stack.push(items[i]);
- } else if (items[i] == ")") {
- var popped = stack.pop();
- while (popped != "(") {
- stringToStore += popped + " ";
- popped = stack.pop();
- }
- }
- }
- while (stack.length > 0) {
- stringToStore += stack.pop() + " ";
- }
- return stringToStore.trim();
- }
-
- function calculateRPN(rpn) {
- util.debug("calculate(" + rpn + ")");
- var lastResult1, lastResult2;
- var rpnTokens = trim(rpn);
- rpnTokens = rpnTokens.split(' ');
- var result;
- var stackResults = [];
- var realSearchWords = [];
- for (var i = 0; i < rpnTokens.length; i++) {
- var token = rpnTokens[i];
- if (isTerm(token)) {
- result = searchSingleWord(token);
- util.debug(token, " -- single word search result -- ", result);
- realSearchWords.push(token);
- if (result.length > 0) {
- stackResults.push(new BooleanSearchOperand(result));
- } else {
- stackResults.push(new BooleanSearchOperand([]));
- }
- } else {
- switch (token) {
- case "and":
-
- lastResult2 = stackResults.pop();
- lastResult1 = stackResults.pop();
- if (lastResult1.value == undefined || !inArray(token, knownOperators)) {
- util.debug("Error in calculateRPN(string) Method!");
- } else {
- stackResults.push(lastResult1.and(lastResult2));
- }
- break;
- case "or":
- lastResult2 = stackResults.pop();
- lastResult1 = stackResults.pop();
- if (lastResult1.value == undefined || !inArray(token, knownOperators)) {
- util.debug("Error in calculateRPN(string) Method!");
- } else {
- stackResults.push(lastResult1.or(lastResult2));
- }
- break;
- case "not":
- lastResult2 = stackResults.pop();
- lastResult1 = stackResults.pop();
- if (lastResult1.value == undefined || !inArray(token, knownOperators)) {
- util.debug("Error in calculateRPN(string) Method!");
- } else {
- stackResults.push(lastResult1.not(lastResult2));
- }
- break;
- default:
- util.debug("Error in calculateRPN(string) Method!");
- break;
- }
- }
- }
- realSearchQuery = realSearchWords.join(" ");
- return stackResults[0];
- }
-
- function isTerm(string) {
- return !inArray(string, knownOperators) && string.indexOf("(") == -1 && string.indexOf(")") == -1;
- }
-
- function inArray(needle, haystack) {
- var length = haystack.length;
- for (var i = 0; i < length; i++) {
- if (haystack[i] == needle) return true;
- }
- return false;
- }
-
- function searchSingleWord(wordToFind) {
- util.debug('searchSingleWord("' + wordToFind + '")');
- wordToFind = trim(wordToFind);
- wordToFind = wordToFind.toLowerCase();
- var txt_wordsnotfound = "";
- var wordsList = [wordToFind];
- util.debug('words from search:', wordsList);
- var indexerLanguage = options.getIndexerLanguage();
-
- useCJKTokenizing = !!(typeof indexerLanguage != "undefined" && (indexerLanguage == "zh" || indexerLanguage == "ko"));
-
-
-
-
-
- var finalWordsList = [];
- var doStem = options.getBoolean('use.stemming');
- if (doStem) {
- if (useCJKTokenizing) {
-
- finalWordsList = cjkTokenize(wordsList);
- } else {
-
- finalWordsList = tokenize(wordsList);
- }
- } else if (useCJKTokenizing) {
-
- finalWordsList = cjkTokenize(wordsList);
- util.debug('CJKTokenizing, finalWordsList: ' + finalWordsList);
- } else {
- finalWordsList = [wordToFind];
- }
-
- if (!useCJKTokenizing) {
-
- var tempTab = [];
- var wordsArray = '';
- for (var t in finalWordsList) {
- if (!contains(index.stopWords, finalWordsList[t])) {
- if (doStem || finalWordsList[t].toString().length == 2) {
- if (index.w[finalWordsList[t].toString()] == undefined) {
- txt_wordsnotfound += finalWordsList[t] + " ";
- } else {
- tempTab.push(finalWordsList[t]);
- }
- } else {
- var searchedValue = finalWordsList[t].toString();
- var listOfWordsStartWith = searchedValue + ",";
- if (!singleWordExactMatch) {
- if (searchInsideFilePath) {
- listOfWordsStartWith = wordsContains(searchedValue);
- } else {
- listOfWordsStartWith = wordsStartsWith(searchedValue);
- }
- }
- if (listOfWordsStartWith != undefined) {
- listOfWordsStartWith = listOfWordsStartWith.substr(0, listOfWordsStartWith.length - 1);
- wordsArray = listOfWordsStartWith.split(",");
- for (var i in wordsArray) {
- tempTab.push(wordsArray[i]);
- }
- }
- }
- }
- }
- finalWordsList = tempTab;
- finalWordsList = removeDuplicate(finalWordsList);
- }
- var fileAndWordList = [];
- if (finalWordsList.length) {
- fileAndWordList = searchStartWith(finalWordsList, wordToFind);
- }
- return fileAndWordList;
- }
- function contains(arrayOfWords, word) {
- var found = false;
- for (var w in arrayOfWords) {
- if (arrayOfWords[w] === word) {
- found = true;
- break;
- }
- }
- return found;
- }
- function wordsStartsWith(searchedValue) {
- var toReturn = '';
- for (var sv in index.w) {
- if (sv.toLowerCase().indexOf(searchedValue.toLowerCase()) == 0) {
- toReturn += sv + ",";
- }
- }
- return toReturn.length > 0 ? toReturn : undefined;
- }
- function wordsContains(searchedValue) {
- var toReturn = '';
- for (var sv in index.w) {
- if (sv.toLowerCase().indexOf(searchedValue.toLowerCase()) != -1) {
- toReturn += sv + ",";
- }
- }
- return toReturn.length > 0 ? toReturn : undefined;
- }
- function tokenize(wordsList) {
- util.debug('tokenize(' + wordsList + ')');
- var stemmedWordsList = [];
- var cleanwordsList = [];
- var doStem = options.getBoolean('use.stemming');
- for (var j in wordsList) {
- var word = wordsList[j];
- if (typeof stemmer != "undefined" && doStem) {
- var s = stemmer(word);
- util.debug(word, " -stem- ", s);
- stemQueryMap[s] = word;
- } else {
- stemQueryMap[word] = word;
- }
- }
-
- for (var t in wordsList) {
- if (wordsList.hasOwnProperty(t)) {
- wordsList[t] = wordsList[t].replace(/(%22)|^-/g, "");
- if (wordsList[t] != "%20") {
- cleanwordsList.push(wordsList[t]);
- }
- }
- }
- if (typeof stemmer != "undefined" && doStem) {
-
- for (var i = 0; i < cleanwordsList.length; i++) {
- var stemWord = stemmer(cleanwordsList[i]);
- stemmedWordsList.push(stemWord);
- }
- } else {
- stemmedWordsList = cleanwordsList;
- }
- return stemmedWordsList;
- }
- function cjkTokenize(wordsList) {
- var allTokens = [];
- var notCJKTokens = [];
- util.debug('in cjkTokenize(), wordsList: ', wordsList);
- for (var j = 0; j < wordsList.length; j++) {
- var word = wordsList[j];
- util.debug('in cjkTokenize(), next word: ', word);
- if (getAvgAsciiValue(word) < 127) {
- notCJKTokens.push(word);
- } else {
- util.debug('in cjkTokenize(), use CJKTokenizer');
- var tokenizer = new CJKTokenizer(word);
- var tokensTmp = tokenizer.getAllTokens();
- allTokens = allTokens.concat(tokensTmp);
- util.debug('in cjkTokenize(), found new tokens: ', allTokens);
- }
- }
- allTokens = allTokens.concat(tokenize(notCJKTokens));
- return allTokens;
- }
- function getAvgAsciiValue(word) {
- var tmp = 0;
- var num = word.length < 5 ? word.length : 5;
- for (var i = 0; i < num; i++) {
- if (i == 5) break;
- tmp += word.charCodeAt(i);
- }
- return tmp / num;
- }
- function CJKTokenizer(input) {
- this.input = input;
- this.offset = -1;
- this.tokens = [];
- this.incrementToken = incrementToken;
- this.tokenize = tokenize;
- this.getAllTokens = getAllTokens;
- this.unique = unique;
- function incrementToken() {
- if (this.input.length - 2 <= this.offset) {
- return false;
- } else {
- this.offset += 1;
- return true;
- }
- }
- function tokenize() {
- return this.input.substring(this.offset, this.offset + 2);
- }
- function getAllTokens() {
- while (this.incrementToken()) {
- var tmp = this.tokenize();
- this.tokens.push(tmp);
- }
- return this.unique(this.tokens);
- }
- function unique(a) {
- var r = [];
- o:for (var i = 0, n = a.length; i < n; i++) {
- for (var x = 0, y = r.length; x < y; x++) {
- if (r[x] == a[i]) continue o;
- }
- r[r.length] = a[i];
- }
- return r;
- }
- }
-
- function unique(array) {
- util.debug("unique(", array, ")");
- var a = [];
- var i;
- var l = array.length;
- if (array[0] != undefined) {
- a[0] = array[0];
- }
- else {
- return -1;
- }
- for (i = 1; i < l; i++) {
- if (indexof(a, array[i], 0) < 0) {
- a.push(array[i]);
- }
- }
- return a;
- }
-
- function indexof(array, element, begin) {
- for (var i = begin; i < array.length; i++) {
- if (array[i] == element) {
- return i;
- }
- }
- return -1;
- }
-
-
- function searchStartWith(words, searchedWord) {
- if (words.length == 0 || words[0].length == 0) {
- return null;
- }
-
-
-
-
- var fileAndWordList = {};
- for (var t in words) {
-
- var topicIDAndScore = index.w[words[t]];
- if (topicIDAndScore != undefined) {
- var topicInfoArray = topicIDAndScore.split(",");
-
- for (var t2 in topicInfoArray) {
- var tmp = '';
- var temp = topicInfoArray[t2].toString();
- var idx = temp.indexOf('*');
- if (idx != -1) {
- var tid = temp.substring(0, idx);
-
- var starLastIdx = temp.indexOf("*", idx + 1);
- var wordIndices = [];
- if (starLastIdx != -1) {
- var indicesStr = temp.substr(starLastIdx + 1);
- wordIndices = indicesStr.split('$');
- }
- if (fileAndWordList[tid] == undefined) {
- fileAndWordList[tid] = [];
- }
- var wAndIdx = {
- word: words[t],
- indices: wordIndices
- };
- fileAndWordList[tid].push(wAndIdx);
- } else {
- warn("Unexpected writing format, '*' delimiter is missing.");
- }
- }
- }
- }
-
- var tidWordsArray = [];
- for (t in fileAndWordList) {
- tidWordsArray.push(new TopicIDAndWordList(t, fileAndWordList[t]));
- }
- tidWordsArray = removeDerivates(tidWordsArray, searchedWord);
-
- var resultsPerFileArrays = [];
- for (t in tidWordsArray) {
- var cTopicIDAndWordList = tidWordsArray[t];
- var scoring =
- computeScoring(fileAndWordList[cTopicIDAndWordList.filesNo], cTopicIDAndWordList.filesNo);
- resultsPerFileArrays.push(
- new ResultPerFile(
- cTopicIDAndWordList.filesNo,
- cTopicIDAndWordList.wordList,
- scoring));
- }
-
- resultsPerFileArrays.sort(function (a, b) {
- return b.scoring - a.scoring;
- });
- return resultsPerFileArrays;
- }
-
- function removeDerivates(obj, searchedWord) {
- var toResultObject = [];
- for (var i in obj) {
- var filesNo = obj[i].filesNo;
- var wordList = obj[i].wordList;
-
- var wordIndicesMap = {};
- for (var j = 0; j < wordList.length; j++) {
- var w = wordList[j].word;
- if (searchInsideFilePath) {
- if (w.indexOf(searchedWord) != -1) {
- w = searchedWord;
- }
- } else {
- if (startsWith(w, searchedWord)) {
- w = searchedWord;
- }
- }
- if (wordIndicesMap[w] == undefined) {
- wordIndicesMap[w] = wordList[j].indices;
- } else {
- wordIndicesMap[w] = wordIndicesMap[w].concat(wordList[j].indices);
- }
- }
- var newWordsAray = [];
- for (var w in wordIndicesMap) {
- newWordsAray.push(
- {
- word: w,
- indices: wordIndicesMap[w]
- }
- );
- }
- toResultObject.push(new TopicIDAndWordList(filesNo, newWordsAray));
- }
- return toResultObject;
- }
-
- function TopicIDAndWordList(filesNo, wordList) {
- this.filesNo = filesNo;
- this.wordList = wordList;
- }
-
- function ResultPerFile(filenb, wordsList, scoring) {
- this.filenb = filenb;
- this.wordsList = wordsList;
- this.scoring = scoring;
- }
-
- function computeScoring(words, topicID) {
- var sum = 0;
- for (var jj = 0; jj < words.length; jj++) {
- var cWord = words[jj].word;
-
- if (index.w[cWord] !== undefined) {
-
- var topicIDScoreArray = index.w[cWord].split(',');
- for (var ii = 0; ii < topicIDScoreArray.length; ii++) {
- var tidAndScore = topicIDScoreArray[ii].split('*');
- if (tidAndScore[0] == topicID) {
- sum += parseInt(tidAndScore[1]);
- }
- }
- }
- }
- return sum;
- }
- function compareWords(s1, s2) {
- var t1 = s1.split(',');
- var t2 = s2.split(',');
- if (t1.length == t2.length) {
- return 0;
- } else if (t1.length > t2.length) {
- return 1;
- } else {
- return -1;
- }
- }
- function removeDuplicate(arr) {
- var r = [];
- o:for (var i = 0, n = arr.length; i < n; i++) {
- for (var x = 0, y = r.length; x < y; x++) {
- if (r[x] == arr[i]) continue o;
- }
- r[r.length] = arr[i];
- }
- return r;
- }
- function trim(str, chars) {
- util.debug("Trim a string... " + str);
- return ltrim(rtrim(str, chars), chars);
- }
- function ltrim(str, chars) {
- chars = chars || "\\s";
- return str.replace(new RegExp("^[" + chars + "]+", "g"), "");
- }
- function rtrim(str, chars) {
- chars = chars || "\\s";
- return str.replace(new RegExp("[" + chars + "]+$", "g"), "");
- }
-
-
- function BooleanSearchOperand(resPerFileArray) {
- this.value = resPerFileArray;
- this.toString = function () {
- var stringResult = "";
- stringResult += "INDEX\t|\tfilenb\t|\tscoring\n";
- for (var i = 0; i < this.value.length; i++) {
- stringResult += i + ".\t\t|\t" + this.value[i].filenb + "\t\t|\t" + this.value[i].scoring + "\n";
- }
- return stringResult;
- };
- this.writeIDs = function () {
- var stringResult = "";
- for (var i = 0; i < this.value.length; i++) {
- stringResult += this.value[i].filenb + " | ";
- }
- return stringResult;
- };
-
- this.and = function and(secondOperand) {
- if (typeof secondOperand == "undefined" || secondOperand == null) {
- return this;
- }
- var result = [];
- for (var x = 0; x < this.value.length; x++) {
- var found = false;
- for (var y = 0; y < secondOperand.value.length; y++) {
- if (this.value[x].filenb == secondOperand.value[y].filenb) {
- this.value[x].wordsList = this.value[x].wordsList.concat(secondOperand.value[y].wordsList);
- this.value[x].scoring += secondOperand.value[y].scoring;
- found = true;
- break;
- }
- }
- if (found) {
- result.push(this.value[x]);
- }
- }
- this.value = result;
- return this;
- };
-
- this.or = function or(operand) {
- if (typeof operand == "undefined" || operand == null) {
- return this;
- }
- this.value = this.value.concat(operand.value);
- var result = [];
- for (var i = 0; i < this.value.length; i++) {
- var unique = true;
- for (var j = 0; j < result.length; j++) {
- if (this.value[i].filenb == result[j].filenb) {
- result[j].wordsList = result[j].wordsList.concat(this.value[i].wordsList);
- var numberOfWords = result[j].wordsList.length;
- result[j].scoring = this.value[i].scoring + result[j].scoring;
- unique = false;
- break;
- }
- }
- if (unique) {
- result.push(this.value[i]);
- }
- }
- this.value = result;
- return this;
- };
- this.not = function not(newArray) {
- if (typeof newArray == "undefined" || newArray == null) {
- return this;
- }
- var result = [];
- for (var x = 0; x < this.value.length; x++) {
- var found = false;
- for (var y = 0; y < newArray.value.length; y++) {
- if (this.value[x].filenb == newArray.value[y].filenb) {
- found = true;
- }
- }
- if (!found) {
- result.push(this.value[x]);
- }
- }
- this.value = result;
- return this;
- };
- }
-
- function warn() {
- var res = typeof console.log;
- if (res === "function") {
- console.warn(console, arguments);
- }
- }
-
- function info() {
- var res = typeof console.info;
- if (res === "function") {
- console.info.apply(console, arguments);
- }
- }
-
- function startsWith(word1, word2) {
- var prefix = false;
- if (word1 !== null && word2 !== null) {
- if (word2.length <= word1.length) {
- prefix = true;
- for (var i = 0; i < word2.length; i++) {
- if (word1.charAt(i) !== word2.charAt(i)) {
- prefix = false;
- break;
- }
- }
- }
- } else {
- if (word1 !== null) {
- prefix = true;
- }
- }
- return prefix;
- }
-
- function isURLorFilePath(toTest) {
- var re = new RegExp('[\./\\\-:_]');
- return re.test(toTest);
- }
- return {
- performSearch: performSearchDriver
- }
- });
|