nwSearchFnt.js 62 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763
  1. define(["index", "options", "stemmer", "util"], function(index, options, stemmer, util) {
  2. /*
  3. David Cramer
  4. <david AT thingbag DOT net>
  5. Kasun Gajasinghe
  6. <kasunbg AT gmail DOT com>
  7. Copyright © 2008-2012 Kasun Gajasinghe, David Cramer
  8. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  9. 1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  10. 2. Except as contained in this notice, the names of individuals credited with contribution to this software shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization from the individuals in question.
  11. 3. Any stylesheet derived from this Software that is publicly distributed will be identified with a different name and the version strings in any derived Software will be changed so that no possibility of confusion between the derived package and this Software will exist.
  12. Warranty: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL DAVID CRAMER, KASUN GAJASINGHE, OR ANY OTHER CONTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  13. */
  14. /*
  15. List of modifications added by the Oxygen Webhelp plugin:
  16. 1. Make sure the space-separated words from the search query are
  17. passed to the search function searchSingleWord() in all cases (the
  18. total number of words can be less than or greater than 10 words).
  19. 2. Accept as valid search words a sequence of two words separated
  20. by ':', '.' or '-'.
  21. 3. Convert the search query to lowercase before executing the search.
  22. 4. Do not omit words between angle brackets from the title of the
  23. search results.
  24. 5. Normalize search results HREFs and add '#' for no-frames webhelp
  25. 6. Keep custom footer in TOC after searching some text
  26. 7. Accept as valid search words that contains only 2 characters
  27. */
  28. /**
  29. * Is set to true when the CJK tokenizer is used.
  30. * @type {boolean}
  31. */
  32. var useCJKTokenizing = false;
  33. /**
  34. * The map with indexed words.
  35. *
  36. * w[word] = topicID*score, topicID*score;
  37. */
  38. var w = {};
  39. /**
  40. * Array with excluded words from search.
  41. *
  42. * @type {[string]}
  43. */
  44. var excluded = [];
  45. /**
  46. * The search query used in search process, after it was filtered.
  47. */
  48. var realSearchQuery;
  49. /**
  50. * It is true when the user searches for a single word between quotes, like "flower".
  51. * In this case only this word will be displayed as search result.
  52. *
  53. * Note that it is not taken into consideration when stemming is activated.
  54. *
  55. * @type {boolean}
  56. */
  57. var singleWordExactMatch = false;
  58. /**
  59. * It is true when the search query seems to be a part of am URL or file path.
  60. * For this situation we will search using 'contains' method.
  61. *
  62. * @type {boolean}
  63. */
  64. var searchInsideFilePath = false;
  65. /**
  66. * It is true when original search expression contains boolean operators.
  67. *
  68. * @type {boolean}
  69. */
  70. var booleanSearch = false;
  71. /**
  72. * The default boolean search operator.
  73. * @type {string}
  74. */
  75. var defaultOperator = "or";
  76. /**
  77. * List of all known operators.
  78. * @type {string[]}
  79. */
  80. var knownOperators = ["and", "or", "not"];
  81. /**
  82. * A hashtable which maps stems to query words
  83. */
  84. var stemQueryMap = [];
  85. /**
  86. * A map that contains search results organized by categories.
  87. *
  88. * @type {}
  89. */
  90. var resultCategoriesMap = {};
  91. /**
  92. * The number of result categories already counted.
  93. *
  94. * @type {number}
  95. */
  96. var resultCategoriesCount = 0;
  97. /**
  98. * Arrays with file IDs that were already in search result.
  99. *
  100. * @type {Array}
  101. */
  102. var resultCategoriesMapFiles = [];
  103. var localNote =
  104. '<div class="alert alert-warning alert-dismissible fade show" role="alert">'
  105. + '<strong>WARNING!</strong> Due to security reasons, the Japanese Morphological Analyzer (Kuromoji) '
  106. + 'is disabled while browsing WebHelp output locally. <a href="#" style="font-size: 0.9em"> [ Read more ]</a>'
  107. + '</div>';
  108. /**
  109. * An object describing the topic information. It contains the title of the topic, the relative path to the output directory,
  110. * the topic's short description.
  111. *
  112. * @param {string} title The topic's title.
  113. * @param {string} relativePath The relative path to the output directory
  114. * @param {string} shortDescription The short description of the topic.
  115. *
  116. * @constructor
  117. */
  118. function TopicInfo(title, relativePath, shortDescription) {
  119. this.title = title;
  120. this.relativePath = relativePath;
  121. this.shortDescription = shortDescription;
  122. }
  123. /**
  124. * An object describing the search result. It contains a string with the search expression and a list with documents
  125. * where search terms were found.
  126. *
  127. * @param {string} searchExpression The search expression that belongs/represents this result.
  128. * It might be different from the initial search expression after stop words and invalid boolean operators were removed.
  129. * @param {[string]} The array with excluded words from initial search expression.
  130. * @param {string} originalSearchExpression The initial search expression.
  131. * @param {DocumentInfo[]} documents The array containing the search result grouped by topic/document.
  132. * @param {string} errorMsg The message returned by search when an error occurred. This message will be displayed to user.
  133. *
  134. * @constructor
  135. */
  136. function SearchResult(searchExpression, excluded, originalSearchExpression, documents, errorMsg) {
  137. this.searchExpression = searchExpression;
  138. this.excluded = excluded;
  139. this.documents = documents;
  140. this.originalSearchExpression = originalSearchExpression;
  141. this.error = errorMsg;
  142. }
  143. /**
  144. * An object containing the search result for a single topic/HTML page.
  145. * Contains pointer to the topicID, title, short description and the list of words that were found.
  146. *
  147. * @param {string} topicID The ID of the topic. Can be used to identify unique a document in the search result.
  148. * @param {string} relativePath The relative path to the topic.
  149. * @param {string} title The topic title.
  150. * @param {string} shortDescription The topic short description.
  151. * @param {[string]} words The array with words contained by this topic.
  152. * @param {int} scoring The search scoring computed for this document.
  153. * @param {[TopicInfo]} breadcrumb The breadcrumb of current document (optional).
  154. *
  155. * @constructor
  156. */
  157. function DocumentInfo(topicID, relativePath, title, shortDescription, words, scoring, breadcrumb) {
  158. this.topicID = topicID;
  159. this.relativePath = relativePath;
  160. this.title = title;
  161. this.shortDescription = shortDescription;
  162. this.words = words;
  163. this.scoring = scoring;
  164. this.breadcrumb = breadcrumb;
  165. }
  166. function performSearchDriver(searchQuery, _callback) {
  167. var indexerLanguage = options.getIndexerLanguage();
  168. var useKuromoji = indexerLanguage.indexOf("ja") != -1 && options.getBoolean('webhelp.enable.search.kuromoji.js')
  169. && !util.isLocal();
  170. if (indexerLanguage.indexOf("ja") != -1 && util.isLocal() && options.getBoolean('webhelp.enable.search.kuromoji.js')) {
  171. var note = $('<div/>').addClass('col-xs-12 col-sm-12 col-md-12 col-lg-12')
  172. .html(localNote);
  173. $('#searchResults').before(note);
  174. }
  175. if (useKuromoji) {
  176. require(["kuromoji"], function (kuromoji) {
  177. kuromoji.builder({ dicPath: "oxygen-webhelp/lib/kuromoji/dict" }).build(function (err, tokenizer) {
  178. // tokenizer is ready
  179. var tokens = tokenizer.tokenize(searchQuery);
  180. var finalWordsList = [];
  181. for (var w in tokens) {
  182. var word = tokens[w].surface_form;
  183. if (word!=" ") {
  184. finalWordsList.push(word);
  185. }
  186. }
  187. if (finalWordsList.length) {
  188. var finalWordsString = finalWordsList.join(" ");
  189. _callback(performSearchInternal(finalWordsString));
  190. } else {
  191. util.debug("Empty set");
  192. }
  193. });
  194. })
  195. } else {
  196. _callback(performSearchInternal(searchQuery));
  197. }
  198. }
  199. /**
  200. * This is the main function of the WH search library used to execute a search query.
  201. * The stop words are filtered.
  202. *
  203. * @param {String} searchQuery The search query
  204. * @return {SearchResult}The search result containing the search expression together with an arrays
  205. * of DocumentInfo objects.
  206. */
  207. function performSearchInternal(searchQuery) {
  208. util.debug("searchQuery", searchQuery);
  209. init();
  210. var initialSearchExpression = searchQuery;
  211. var phraseSearch = false;
  212. searchQuery = searchQuery.trim();
  213. if (searchQuery.length > 2 && !useCJKTokenizing) {
  214. var firstChar = searchQuery.charAt(0);
  215. var lastChar = searchQuery.charAt(searchQuery.length - 1);
  216. phraseSearch =
  217. (firstChar == "'" || firstChar == '"') &&
  218. (lastChar == "'" || lastChar == '"');
  219. }
  220. // Remove ' and " characters
  221. searchQuery = searchQuery.replace(/"/g, " ").replace(/'/g, " ");
  222. var errorMsg;
  223. try {
  224. realSearchQuery = preprocessSearchQuery(searchQuery, phraseSearch);
  225. } catch (e) {
  226. errorMsg = e.message;
  227. util.debug(e);
  228. }
  229. util.debug("Search query after pre-process: ", realSearchQuery);
  230. if (realSearchQuery.trim().length != 0) {
  231. // Add the default boolean operator between words if it is missing
  232. searchQuery = normalizeQuery(realSearchQuery);
  233. var searchWordCount = 1;
  234. if (!useCJKTokenizing) {
  235. var sw = searchQuery.split(" ");
  236. searchWordCount = sw.length;
  237. singleWordExactMatch = phraseSearch && searchWordCount == 1;
  238. if (!singleWordExactMatch && !phraseSearch) {
  239. searchInsideFilePath = isURLorFilePath(realSearchQuery);
  240. }
  241. }
  242. // Convert to RPN notation
  243. var rpnExpression = convertToRPNExpression(searchQuery);
  244. // Perform search with RPN expression
  245. var res = calculateRPN(rpnExpression);
  246. var sRes = res.value;
  247. if (searchWordCount == 1) {
  248. // single word search
  249. var doStem = options.getBoolean('use.stemming');
  250. if (!singleWordExactMatch && !doStem && !useCJKTokenizing) {
  251. // Perform exact match first
  252. singleWordExactMatch = true;
  253. var exactMatchRes = calculateRPN(rpnExpression);
  254. addSearchResultCategory(exactMatchRes.value);
  255. // Add other results with lower priority
  256. addSearchResultCategory(sRes);
  257. } else {
  258. addSearchResultCategory(sRes);
  259. }
  260. } else {
  261. if (phraseSearch) {
  262. sRes = filterResultsForPhraseSearch(res.value, realSearchQuery);
  263. addSearchResultCategory(sRes);
  264. } else if (booleanSearch) {
  265. groupResultsByWordCount(sRes);
  266. } else {
  267. // Search criterion was not specified
  268. var phraseSearchResult =
  269. filterResultsForPhraseSearch(res.value, realSearchQuery);
  270. addSearchResultCategory(phraseSearchResult);
  271. groupResultsByWordCount(sRes);
  272. }
  273. }
  274. sRes = sortSearchResults();
  275. var docInfos = [];
  276. for (var i = 0; i < sRes.length; i++) {
  277. var cDoc = sRes[i];
  278. // Compute the topic information
  279. var topicInfoString = index.fil[cDoc.filenb];
  280. var topicInfo = computeTopicInfo(topicInfoString);
  281. if (topicInfo == null) {
  282. warn("There is no definition for topic with ID ", cDoc.filenb);
  283. continue;
  284. }
  285. var wordsStrArray = [];
  286. for (var k in cDoc.wordsList) {
  287. wordsStrArray.push(cDoc.wordsList[k].word);
  288. }
  289. var breadcrumb = computeBreadcrumbTopicInfos(cDoc.filenb);
  290. var docInfo =
  291. new DocumentInfo(
  292. cDoc.filenb,
  293. topicInfo.relativePath,
  294. topicInfo.title,
  295. topicInfo.shortDescription,
  296. wordsStrArray,
  297. cDoc.scoring,
  298. breadcrumb);
  299. docInfos.push(docInfo);
  300. }
  301. }
  302. // Filter expression to cross site scripting possibility
  303. initialSearchExpression = filterOriginalSearchExpression(initialSearchExpression);
  304. var searchResult = new SearchResult(realSearchQuery, excluded, initialSearchExpression, docInfos, errorMsg);
  305. return searchResult;
  306. }
  307. /**
  308. * Computes the topic associated information.
  309. *
  310. * @param topicInfoString The topic information as string.
  311. *
  312. * @returns An object which contains the topic title, topic relative path and the topic short description.
  313. */
  314. function computeTopicInfo(topicInfoString) {
  315. if (topicInfoString === undefined) {
  316. return null;
  317. }
  318. var pos1 = topicInfoString.indexOf("@@@");
  319. var pos2 = topicInfoString.lastIndexOf("@@@");
  320. var relPath = topicInfoString.substring(0, pos1);
  321. // EXM-27709 START
  322. // Display words between '<' and '>' in title of search results.
  323. var topicTitle = topicInfoString.substring(pos1 + 3, pos2)
  324. .replace(/</g, "&lt;").replace(/>/g, "&gt;");
  325. var topicShortDesc = topicInfoString.substring(pos2 + 3, topicInfoString.length);
  326. // EXM-27709 END
  327. return new TopicInfo(topicTitle, relPath, topicShortDesc);
  328. }
  329. /**
  330. * Compute the list of topic indexes representing the path to the root for the given topic.
  331. *
  332. * @param topicID The index of the topic in the index.fil list.
  333. *
  334. * @returns {Array} The array of indexes from the root to the topic.
  335. */
  336. function computePath2Root(topicID) {
  337. var path2Root = [];
  338. var parentTopicID = index.link2parent[topicID];
  339. while (parentTopicID !== undefined && parentTopicID !== -1) {
  340. path2Root.unshift(parentTopicID);
  341. parentTopicID = index.link2parent[parentTopicID];
  342. }
  343. return path2Root;
  344. }
  345. /**
  346. * Computes an array of TopicInfo objects representing the breadcrumb components for the given topic.
  347. *
  348. * @param topicIndex The index of the topic in the index.fil list.
  349. *
  350. * @returns {Array} The breadcrumb components.
  351. */
  352. function computeBreadcrumbTopicInfos(topicIndex) {
  353. var path2Root = computePath2Root(topicIndex);
  354. var breadcrumbPaths = [];
  355. for (var i = 0; i < path2Root.length; i++) {
  356. var topicInfoString = index.fil[path2Root[i]];
  357. var topicInfo = computeTopicInfo(topicInfoString);
  358. if (topicInfo !== null) {
  359. breadcrumbPaths.push(topicInfo);
  360. }
  361. }
  362. return breadcrumbPaths;
  363. }
  364. /**
  365. * Initialize the library for search.
  366. */
  367. function init() {
  368. searchInsideFilePath = false;
  369. excluded = [];
  370. realSearchQuery = "";
  371. singleWordExactMatch = false;
  372. booleanSearch = false;
  373. resultCategoriesMap = {};
  374. resultCategoriesCount = 0;
  375. resultCategoriesMapFiles = [];
  376. }
  377. /**
  378. * Add a search result category. This new added category has a lower priority.
  379. *
  380. * @param searchCategory The search results category.
  381. */
  382. function addSearchResultCategory(searchCategory) {
  383. // Filter results that was already registered
  384. /*info("************ addSearchResultCategory ", searchCategory);*/
  385. var filteredResults = [];
  386. for (var si = 0; si < searchCategory.length; si++) {
  387. // Make sure that score is greater than 0
  388. searchCategory[si].scoring = Math.max(1, searchCategory[si].scoring);
  389. if (resultCategoriesMapFiles.indexOf(searchCategory[si].filenb) == -1) {
  390. filteredResults.push(searchCategory[si]);
  391. resultCategoriesMapFiles.push(searchCategory[si].filenb);
  392. }
  393. }
  394. if (filteredResults.length > 0) {
  395. resultCategoriesMap[resultCategoriesCount++] = filteredResults;
  396. }
  397. }
  398. /**
  399. * Scale scoring to be between 0 and 100.
  400. *
  401. * @param {[ResultPerFile]} sortResult The sort result to scale.
  402. */
  403. function scaleSortResultScoring(sortResult) {
  404. var maxScore = 0;
  405. for (var i = 0; i < sortResult.length; i++) {
  406. maxScore = Math.max(maxScore, sortResult[i].scoring);
  407. }
  408. if (maxScore != 0) {
  409. var ratio = 99 / maxScore;
  410. for (var i = 0; i < sortResult.length; i++) {
  411. var s = Math.ceil(sortResult[i].scoring * ratio);
  412. var s = Math.min(99, s);
  413. sortResult[i].scoring = s;
  414. }
  415. }
  416. }
  417. function sortSearchResults() {
  418. var result = [];
  419. var keys = [];
  420. for (var prop in resultCategoriesMap) {
  421. keys.push(prop);
  422. }
  423. keys.sort();
  424. var catNumber = keys.length;
  425. for (var k = 0; k < keys.length; k++) {
  426. var r = resultCategoriesMap[k];
  427. scaleSortResultScoring(r);
  428. r.sort(function (first, second) {
  429. return -(first.scoring - second.scoring);
  430. });
  431. for (var ri = 0; ri < r.length; ri++) {
  432. r[ri].scoring = r[ri].scoring + ((catNumber - 1 - k) * 100);
  433. }
  434. result = result.concat(r);
  435. }
  436. /*info("final result:", result);*/
  437. return result;
  438. }
  439. /**
  440. * Filter results for phrase search.
  441. *
  442. * @param {[ResultPerFile]} resPerFileArray The array with search results to be filtered.
  443. * @param realSearchQuery The search query.
  444. * @returns {Array} The filtered array.
  445. */
  446. function filterResultsForPhraseSearch(resPerFileArray, realSearchQuery) {
  447. var searchWords = realSearchQuery.split(" ");
  448. var doStem = options.getBoolean('use.stemming');
  449. var fResult = [];
  450. // Iterate over all results
  451. for (var i = 0; i < resPerFileArray.length; i++) {
  452. // Test if number of words are the same
  453. if (searchWords.length == resPerFileArray[i].wordsList.length) {
  454. // Test if words are the same
  455. var sameWords = true;
  456. for (var j = 0; j < resPerFileArray[i].wordsList.length; j++) {
  457. var sj = searchWords[j];
  458. if (typeof stemmer != "undefined" && doStem) {
  459. sj = stemmer(sj);
  460. }
  461. sj = sj.toLowerCase();
  462. if (sj != resPerFileArray[i].wordsList[j].word) {
  463. sameWords = false;
  464. break;
  465. }
  466. }
  467. if (sameWords) {
  468. // Test if indices are consecutive
  469. var firstWordIndices = resPerFileArray[i].wordsList[0].indices;
  470. for (var fi in firstWordIndices) {
  471. var cidx = parseInt(firstWordIndices[fi], 32);
  472. if (cidx == -1) {
  473. continue;
  474. }
  475. var consecutiveIndices = true;
  476. // Test if next words indices are consecutive
  477. for (var ii = 1; ii < resPerFileArray[i].wordsList.length; ii++) {
  478. var nextIndices = resPerFileArray[i].wordsList[ii].indices;
  479. var nextIdxFound = false;
  480. for (var nIdx in nextIndices) {
  481. var cRes = parseInt(nextIndices[nIdx], 32);
  482. if (cRes != -1 && cidx == cRes - 1) {
  483. cidx = cRes;
  484. nextIdxFound = true;
  485. break;
  486. }
  487. }
  488. if (!nextIdxFound) {
  489. consecutiveIndices = false;
  490. break;
  491. }
  492. }
  493. if (consecutiveIndices) {
  494. fResult.push(resPerFileArray[i]);
  495. break;
  496. }
  497. }
  498. }
  499. }
  500. }
  501. return fResult;
  502. }
  503. /**
  504. * Filter the original search query to avoid cross site scripting possibility.
  505. *
  506. * @param {string} searchTextField The search query to process.
  507. * @returns {string} The filtered search query.
  508. */
  509. function filterOriginalSearchExpression(searchTextField) {
  510. // Eliminate the cross site scripting possibility.
  511. searchTextField = searchTextField.replace(/</g, " ")
  512. .replace(/>/g, " ")
  513. .replace(/"/g, " ")
  514. .replace(/'/g, " ")
  515. .replace(/=/g, " ")
  516. .replace(/0\\/g, " ")
  517. .replace(/\\/g, " ")
  518. .replace(/\//g, " ")
  519. .replace(/ +/g, " ");
  520. /* START - EXM-20414 */
  521. searchTextField =
  522. searchTextField.replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");
  523. /* END - EXM-20414 */
  524. searchTextField = searchTextField.replace(/ +/g, " ");
  525. searchTextField = searchTextField.replace(/ $/, "").replace(/^ /, " ");
  526. return searchTextField;
  527. }
  528. /**
  529. * Pre-process the search query before it is used as search expression. It removes the stop words.
  530. *
  531. * @param {string} query The search query to process.
  532. * @param {boolean} phraseSearch True if phrase search was detected.
  533. * @returns {string} The processing result.
  534. */
  535. function preprocessSearchQuery(query, phraseSearch) {
  536. var searchTextField = trim(query);
  537. /**
  538. * Validate brackets
  539. */
  540. var openBracket = [],
  541. closedBracket = [];
  542. var idx = 0, oIndex;
  543. while (query.indexOf("(", idx) !== -1) {
  544. idx = query.indexOf("(", idx);
  545. openBracket.push(idx);
  546. idx++;
  547. }
  548. idx = 0;
  549. while (query.indexOf(")", idx) !== -1) {
  550. idx = query.indexOf(")", idx);
  551. closedBracket.push(idx);
  552. idx++;
  553. }
  554. if (openBracket.length != closedBracket.length) {
  555. throw new Error("Invalid expression!");
  556. } else {
  557. while (oIndex = openBracket.shift()) {
  558. var cIndex = closedBracket.shift();
  559. if (oIndex > cIndex) {
  560. throw new Error("Invalid expression!");
  561. }
  562. }
  563. }
  564. // Add a space between '(' or ')' and the real word
  565. searchTextField = searchTextField.replace(/\((\S*)/g, '( $1');
  566. searchTextField = searchTextField.replace(/\)(\S*)/g, ') $1');
  567. searchTextField = searchTextField.replace(/(\S*)\)/g, '$1 )');
  568. // EXM-39245 - Remove punctuation marks
  569. // w1,w2 -> w1 w2
  570. searchTextField = searchTextField.replace(/[,]/g, ' ');
  571. // w1. w2 -> w1 w2
  572. searchTextField = searchTextField.replace(/\s\./g, ' ');
  573. searchTextField = searchTextField.replace(/\.\s/g, ' ');
  574. // w1! w2 -> w1 w2
  575. searchTextField = searchTextField.replace(/\s!/g, ' ');
  576. searchTextField = searchTextField.replace(/!\s/g, ' ');
  577. // w1? w2 -> w1 w2
  578. searchTextField = searchTextField.replace(/\s\?/g, ' ');
  579. searchTextField = searchTextField.replace(/\?\s/g, ' ');
  580. var expressionInput = searchTextField;
  581. var wordsArray = [];
  582. var splitExpression = expressionInput.split(" ");
  583. // Exclude/filter stop words
  584. for (var t in splitExpression) {
  585. var cw = splitExpression[t].toLowerCase();
  586. if (cw.trim().length == 0) {
  587. // Empty string
  588. continue;
  589. }
  590. var isParenthesis =
  591. "(" == cw || ")" == cw;
  592. if (contains(knownOperators, cw)) {
  593. // Boolean operators are excluded from phrase search
  594. if (phraseSearch) {
  595. excluded.push(cw);
  596. } else {
  597. wordsArray.push(cw);
  598. }
  599. } else if (isParenthesis) {
  600. // Paranthesis are excluded from phrase search
  601. if (phraseSearch) {
  602. excluded.push(cw);
  603. } else {
  604. wordsArray.push(cw);
  605. }
  606. } else if (contains(index.stopWords, cw)) {
  607. // Exclude stop words
  608. excluded.push(cw);
  609. } else {
  610. wordsArray.push(cw);
  611. }
  612. }
  613. expressionInput = wordsArray.join(" ");
  614. realSearchQuery = expressionInput;
  615. return expressionInput.trim();
  616. }
  617. /**
  618. * Group the search results by word count.
  619. *
  620. * @param {[ResultPerFile]} searchResults The search results to be grouped.
  621. */
  622. function groupResultsByWordCount(searchResults) {
  623. var resultsByWordCount = {};
  624. for (var sri = 0; sri < searchResults.length; sri++) {
  625. var csr = searchResults[sri];
  626. var wc = csr.wordsList.length;
  627. if (resultsByWordCount[wc] == undefined) {
  628. resultsByWordCount[wc] = [];
  629. }
  630. resultsByWordCount[wc].push(csr);
  631. }
  632. /*info("Results by words count:", resultsByWordCount);*/
  633. var keys = [];
  634. for (var prop in resultsByWordCount) {
  635. keys.push(prop);
  636. }
  637. keys.sort();
  638. /*info("Sorted keys", keys);*/
  639. for (var k = keys.length - 1; k >= 0; k--) {
  640. var ck = keys[k];
  641. addSearchResultCategory(resultsByWordCount[ck]);
  642. }
  643. }
  644. /**
  645. * @description Combine two selectors into one
  646. * e.g: "and or" => "or"
  647. * @param {String} op1 Operator one
  648. * @param {String} op2 Operator two
  649. * @returns {String} Resulted operator
  650. */
  651. function combineOperators(op1, op2) {
  652. if (op1 == op2) {
  653. return op1;
  654. }
  655. if (op1 == "not" || op2 == "not") {
  656. return "not";
  657. }
  658. if (op1 == "or" || op2 == "or") {
  659. return "or";
  660. }
  661. }
  662. /**
  663. * @param word Word to check if is an known operator or not
  664. * @returns {boolean} TRUE if searched word is a known operator
  665. * FALSE otherwise
  666. */
  667. function isKnownOperator(word) {
  668. return inArray(word, knownOperators);
  669. }
  670. /**
  671. * @description Normalize query so that we have an operator between each two adjacent search terms. We'll add the defaultOperator if the
  672. * operator is missing.
  673. * e.g: If the defaultOperator is "and" the "iris flower" query will be "iris and flower"
  674. *
  675. * @param {String} query Search query
  676. * @return {String} Normalized query
  677. */
  678. function normalizeQuery(query) {
  679. util.debug("normalizeQuery(" + query + ")");
  680. var toReturn = [];
  681. // Remove whitespaces from the beginning and from the end of the expression
  682. query = query.toLowerCase().trim();
  683. // Consider "-" (dash) character to be "and" operator
  684. //query = query.replace(/-/g, ' and ');
  685. // Replace multiple spaces with a single space
  686. query = query.replace(/ +/g, ' ');
  687. // Remove space after left bracket
  688. query = query.replace(/\( /g, '(');
  689. // Remove space before right bracket
  690. query = query.replace(/ \)/g, ')');
  691. var queryParts = query.split(" ");
  692. for (var i = 0; i < queryParts.length; i++) {
  693. // Skip empty parts
  694. var currentWord = queryParts[i];
  695. if (currentWord == "") {
  696. continue;
  697. }
  698. var knownOperator = isKnownOperator(currentWord);
  699. booleanSearch = booleanSearch || knownOperator;
  700. if (toReturn.length == 0) {
  701. // First item in result should be a term, not an operator
  702. if (!knownOperator) {
  703. toReturn.push(currentWord);
  704. }
  705. } else {
  706. // Combine multiple operators into one
  707. if (isKnownOperator(toReturn[toReturn.length - 1]) && knownOperator) {
  708. toReturn[toReturn.length - 1] = combineOperators(toReturn[toReturn.length - 1], currentWord);
  709. }
  710. // Add default operator when no operator is specified
  711. if (!isKnownOperator(toReturn[toReturn.length - 1]) && !knownOperator) {
  712. toReturn.push(defaultOperator);
  713. toReturn.push(currentWord);
  714. }
  715. // Add operator after term
  716. if (!isKnownOperator(toReturn[toReturn.length - 1]) && knownOperator) {
  717. toReturn.push(currentWord);
  718. }
  719. // Add term after operator
  720. if (isKnownOperator(toReturn[toReturn.length - 1]) && !knownOperator) {
  721. toReturn.push(currentWord);
  722. }
  723. }
  724. }
  725. // Remove the last operators from the list
  726. for (i = toReturn.length - 1; i >= 0; i--) {
  727. if (isKnownOperator(toReturn[i])) {
  728. toReturn.pop();
  729. } else {
  730. break;
  731. }
  732. }
  733. return toReturn.join(" ");
  734. }
  735. /**
  736. * @description Convert search expression from infix notation to reverse polish notation (RPN): iris and flower => iris flower and
  737. * @param {string} search Search expression to be converted. e.g.: iris and flower or (gerbera not salvia)
  738. * @return {String} Search expression in RPN notation
  739. */
  740. function convertToRPNExpression(search) {
  741. util.debug("convertToRPNExpression(" + search + ")");
  742. var stringToStore = "";
  743. var stack = [];
  744. var item = "";
  745. var items = [];
  746. for (var i = 0; i < search.length; i++) {
  747. if (search[i] != " " && search[i] != "(" && search[i] != ")") {
  748. item += search[i];
  749. }
  750. if (search[i] == " ") {
  751. if (item != "") {
  752. items.push(item);
  753. item = "";
  754. }
  755. }
  756. if (search[i] == "(") {
  757. if (item != "") {
  758. items.push(item);
  759. items.push("(");
  760. item = "";
  761. } else {
  762. items.push("(");
  763. }
  764. }
  765. if (search[i] == ")") {
  766. if (item != "") {
  767. items.push(item);
  768. items.push(")");
  769. item = "";
  770. } else {
  771. items.push(")");
  772. }
  773. }
  774. }
  775. if (item != "") {
  776. items.push(item);
  777. }
  778. for (i = 0; i < items.length; i++) {
  779. if (isTerm(items[i])) {
  780. stringToStore += items[i] + " ";
  781. }
  782. if (inArray(items[i], knownOperators)) {
  783. while (stack.length > 0 && inArray(stack[stack.length - 1], knownOperators)) {
  784. stringToStore += stack.pop() + " ";
  785. }
  786. stack.push(items[i]);
  787. } else if (items[i] == "(") {
  788. stack.push(items[i]);
  789. } else if (items[i] == ")") {
  790. var popped = stack.pop();
  791. while (popped != "(") {
  792. stringToStore += popped + " ";
  793. popped = stack.pop();
  794. }
  795. }
  796. }
  797. while (stack.length > 0) {
  798. stringToStore += stack.pop() + " ";
  799. }
  800. return stringToStore.trim();
  801. }
  802. /**
  803. * @description Compute results from a RPN expression
  804. * @param {string} rpn Expression in Reverse Polish notation
  805. * @return {Page} An object that contains the search result.
  806. */
  807. function calculateRPN(rpn) {
  808. util.debug("calculate(" + rpn + ")");
  809. var lastResult1, lastResult2;
  810. var rpnTokens = trim(rpn);
  811. rpnTokens = rpnTokens.split(' ');
  812. var result;
  813. var stackResults = [];
  814. var realSearchWords = [];
  815. for (var i = 0; i < rpnTokens.length; i++) {
  816. var token = rpnTokens[i];
  817. if (isTerm(token)) {
  818. result = searchSingleWord(token);
  819. util.debug(token, " -- single word search result -- ", result);
  820. realSearchWords.push(token);
  821. if (result.length > 0) {
  822. stackResults.push(new BooleanSearchOperand(result));
  823. } else {
  824. stackResults.push(new BooleanSearchOperand([]));
  825. }
  826. } else {
  827. switch (token) {
  828. case "and":
  829. // debug("Implement AND operator");
  830. lastResult2 = stackResults.pop();
  831. lastResult1 = stackResults.pop();
  832. if (lastResult1.value == undefined || !inArray(token, knownOperators)) {
  833. util.debug("Error in calculateRPN(string) Method!");
  834. } else {
  835. stackResults.push(lastResult1.and(lastResult2));
  836. }
  837. break;
  838. case "or":
  839. lastResult2 = stackResults.pop();
  840. lastResult1 = stackResults.pop();
  841. if (lastResult1.value == undefined || !inArray(token, knownOperators)) {
  842. util.debug("Error in calculateRPN(string) Method!");
  843. } else {
  844. stackResults.push(lastResult1.or(lastResult2));
  845. }
  846. break;
  847. case "not":
  848. lastResult2 = stackResults.pop();
  849. lastResult1 = stackResults.pop();
  850. if (lastResult1.value == undefined || !inArray(token, knownOperators)) {
  851. util.debug("Error in calculateRPN(string) Method!");
  852. } else {
  853. stackResults.push(lastResult1.not(lastResult2));
  854. }
  855. break;
  856. default:
  857. util.debug("Error in calculateRPN(string) Method!");
  858. break;
  859. }
  860. }
  861. }
  862. realSearchQuery = realSearchWords.join(" ");
  863. return stackResults[0];
  864. }
  865. /**
  866. * Tests if a given string is a valid search term or not.
  867. *
  868. * @param {string} string String to look for in the known operators list
  869. * @return {boolean} TRUE if the search string is a search term
  870. * FALSE if the search string is not a search term
  871. */
  872. function isTerm(string) {
  873. return !inArray(string, knownOperators) && string.indexOf("(") == -1 && string.indexOf(")") == -1;
  874. }
  875. /**
  876. * @description Search for an element into an array
  877. * @param needle Searched element
  878. * @param haystack Array of elements
  879. * @return {boolean} TRUE if the searched element is part of the array
  880. * FALSE otherwise
  881. */
  882. function inArray(needle, haystack) {
  883. var length = haystack.length;
  884. for (var i = 0; i < length; i++) {
  885. if (haystack[i] == needle) return true;
  886. }
  887. return false;
  888. }
  889. /**
  890. * Search for a single word/term.
  891. *
  892. * @param {String} wordToFind A single search term to search for.
  893. * @return {[ResultPerFile]} Array with the resulted pages and indices.
  894. */
  895. function searchSingleWord(wordToFind) {
  896. util.debug('searchSingleWord("' + wordToFind + '")');
  897. wordToFind = trim(wordToFind);
  898. wordToFind = wordToFind.toLowerCase();
  899. var txt_wordsnotfound = "";
  900. var wordsList = [wordToFind];
  901. util.debug('words from search:', wordsList);
  902. var indexerLanguage = options.getIndexerLanguage();
  903. // set the tokenizing method
  904. useCJKTokenizing = !!(typeof indexerLanguage != "undefined" && (indexerLanguage == "zh" || indexerLanguage == "ko"));
  905. //If Lucene CJKTokenizer was used as the indexer, then useCJKTokenizing will be true. Else, do normal tokenizing.
  906. // 2-gram tokenizing happens in CJKTokenizing,
  907. // If doStem then make tokenize with Stemmer
  908. //var finalArray;
  909. /**
  910. * data initialisation
  911. */
  912. var finalWordsList = []; // Array with the words to look for after removing spaces
  913. var doStem = options.getBoolean('use.stemming');
  914. if (doStem) {
  915. if (useCJKTokenizing) {
  916. // Array of words
  917. finalWordsList = cjkTokenize(wordsList);
  918. } else {
  919. // Array of words
  920. finalWordsList = tokenize(wordsList);
  921. }
  922. } else if (useCJKTokenizing) {
  923. // Array of words
  924. finalWordsList = cjkTokenize(wordsList);
  925. util.debug('CJKTokenizing, finalWordsList: ' + finalWordsList);
  926. } else {
  927. finalWordsList = [wordToFind];
  928. }
  929. // Add the words that start with the searched words.
  930. if (!useCJKTokenizing) {
  931. /**
  932. * Compare with the indexed words (in the w[] array), and push words that are in it to tempTab.
  933. */
  934. var tempTab = [];
  935. var wordsArray = '';
  936. for (var t in finalWordsList) {
  937. if (!contains(index.stopWords, finalWordsList[t])) {
  938. if (doStem || finalWordsList[t].toString().length == 2) {
  939. if (index.w[finalWordsList[t].toString()] == undefined) {
  940. txt_wordsnotfound += finalWordsList[t] + " ";
  941. } else {
  942. tempTab.push(finalWordsList[t]);
  943. }
  944. } else {
  945. var searchedValue = finalWordsList[t].toString();
  946. var listOfWordsStartWith = searchedValue + ",";
  947. if (!singleWordExactMatch) {
  948. if (searchInsideFilePath) {
  949. listOfWordsStartWith = wordsContains(searchedValue);
  950. } else {
  951. listOfWordsStartWith = wordsStartsWith(searchedValue);
  952. }
  953. }
  954. if (listOfWordsStartWith != undefined) {
  955. listOfWordsStartWith = listOfWordsStartWith.substr(0, listOfWordsStartWith.length - 1);
  956. wordsArray = listOfWordsStartWith.split(",");
  957. for (var i in wordsArray) {
  958. tempTab.push(wordsArray[i]);
  959. }
  960. }
  961. }
  962. }
  963. }
  964. finalWordsList = tempTab;
  965. finalWordsList = removeDuplicate(finalWordsList);
  966. }
  967. var fileAndWordList = [];
  968. if (finalWordsList.length) {
  969. fileAndWordList = searchStartWith(finalWordsList, wordToFind);
  970. }
  971. return fileAndWordList;
  972. }
  973. // Return true if "word" value is an element of "arrayOfWords"
  974. function contains(arrayOfWords, word) {
  975. var found = false;
  976. for (var w in arrayOfWords) {
  977. if (arrayOfWords[w] === word) {
  978. found = true;
  979. break;
  980. }
  981. }
  982. return found;
  983. }
  984. // Look for elements that start with searchedValue.
  985. function wordsStartsWith(searchedValue) {
  986. var toReturn = '';
  987. for (var sv in index.w) {
  988. if (sv.toLowerCase().indexOf(searchedValue.toLowerCase()) == 0) {
  989. toReturn += sv + ",";
  990. }
  991. }
  992. return toReturn.length > 0 ? toReturn : undefined;
  993. }
  994. // Look for indexed words that contains the searchedValue.
  995. function wordsContains(searchedValue) {
  996. var toReturn = '';
  997. for (var sv in index.w) {
  998. if (sv.toLowerCase().indexOf(searchedValue.toLowerCase()) != -1) {
  999. toReturn += sv + ",";
  1000. }
  1001. }
  1002. return toReturn.length > 0 ? toReturn : undefined;
  1003. }
  1004. function tokenize(wordsList) {
  1005. util.debug('tokenize(' + wordsList + ')');
  1006. var stemmedWordsList = []; // Array with the words to look for after removing spaces
  1007. var cleanwordsList = []; // Array with the words to look for
  1008. var doStem = options.getBoolean('use.stemming');
  1009. for (var j in wordsList) {
  1010. var word = wordsList[j];
  1011. if (typeof stemmer != "undefined" && doStem) {
  1012. var s = stemmer(word);
  1013. util.debug(word, " -stem- ", s);
  1014. stemQueryMap[s] = word;
  1015. } else {
  1016. stemQueryMap[word] = word;
  1017. }
  1018. }
  1019. //stemmedWordsList is the stemmed list of words separated by spaces.
  1020. for (var t in wordsList) {
  1021. if (wordsList.hasOwnProperty(t)) {
  1022. wordsList[t] = wordsList[t].replace(/(%22)|^-/g, "");
  1023. if (wordsList[t] != "%20") {
  1024. cleanwordsList.push(wordsList[t]);
  1025. }
  1026. }
  1027. }
  1028. if (typeof stemmer != "undefined" && doStem) {
  1029. //Do the stemming using Porter's stemming algorithm
  1030. for (var i = 0; i < cleanwordsList.length; i++) {
  1031. var stemWord = stemmer(cleanwordsList[i]);
  1032. stemmedWordsList.push(stemWord);
  1033. }
  1034. } else {
  1035. stemmedWordsList = cleanwordsList;
  1036. }
  1037. return stemmedWordsList;
  1038. }
  1039. //Invoker of CJKTokenizer class methods.
  1040. function cjkTokenize(wordsList) {
  1041. var allTokens = [];
  1042. var notCJKTokens = [];
  1043. util.debug('in cjkTokenize(), wordsList: ', wordsList);
  1044. for (var j = 0; j < wordsList.length; j++) {
  1045. var word = wordsList[j];
  1046. util.debug('in cjkTokenize(), next word: ', word);
  1047. if (getAvgAsciiValue(word) < 127) {
  1048. notCJKTokens.push(word);
  1049. } else {
  1050. util.debug('in cjkTokenize(), use CJKTokenizer');
  1051. var tokenizer = new CJKTokenizer(word);
  1052. var tokensTmp = tokenizer.getAllTokens();
  1053. allTokens = allTokens.concat(tokensTmp);
  1054. util.debug('in cjkTokenize(), found new tokens: ', allTokens);
  1055. }
  1056. }
  1057. allTokens = allTokens.concat(tokenize(notCJKTokens));
  1058. return allTokens;
  1059. }
  1060. //A simple way to determine whether the query is in english or not.
  1061. function getAvgAsciiValue(word) {
  1062. var tmp = 0;
  1063. var num = word.length < 5 ? word.length : 5;
  1064. for (var i = 0; i < num; i++) {
  1065. if (i == 5) break;
  1066. tmp += word.charCodeAt(i);
  1067. }
  1068. return tmp / num;
  1069. }
  1070. //CJKTokenizer
  1071. function CJKTokenizer(input) {
  1072. this.input = input;
  1073. this.offset = -1;
  1074. this.tokens = [];
  1075. this.incrementToken = incrementToken;
  1076. this.tokenize = tokenize;
  1077. this.getAllTokens = getAllTokens;
  1078. this.unique = unique;
  1079. function incrementToken() {
  1080. if (this.input.length - 2 <= this.offset) {
  1081. return false;
  1082. } else {
  1083. this.offset += 1;
  1084. return true;
  1085. }
  1086. }
  1087. function tokenize() {
  1088. return this.input.substring(this.offset, this.offset + 2);
  1089. }
  1090. function getAllTokens() {
  1091. while (this.incrementToken()) {
  1092. var tmp = this.tokenize();
  1093. this.tokens.push(tmp);
  1094. }
  1095. return this.unique(this.tokens);
  1096. }
  1097. function unique(a) {
  1098. var r = [];
  1099. o:for (var i = 0, n = a.length; i < n; i++) {
  1100. for (var x = 0, y = r.length; x < y; x++) {
  1101. if (r[x] == a[i]) continue o;
  1102. }
  1103. r[r.length] = a[i];
  1104. }
  1105. return r;
  1106. }
  1107. }
  1108. /**
  1109. * Array.unique( strict ) - Remove duplicate values
  1110. *
  1111. * @param array The array to search.
  1112. * @returns {*} The array without duplicates.
  1113. */
  1114. function unique(array) {
  1115. util.debug("unique(", array, ")");
  1116. var a = [];
  1117. var i;
  1118. var l = array.length;
  1119. if (array[0] != undefined) {
  1120. a[0] = array[0];
  1121. }
  1122. else {
  1123. return -1;
  1124. }
  1125. for (i = 1; i < l; i++) {
  1126. if (indexof(a, array[i], 0) < 0) {
  1127. a.push(array[i]);
  1128. }
  1129. }
  1130. return a;
  1131. }
  1132. /**
  1133. * Finds the index of an element in an array.
  1134. *
  1135. * @param array The array.
  1136. * @param element The element to find.
  1137. * @param begin The begin index.
  1138. * @returns The index of the element or -1.
  1139. */
  1140. function indexof(array, element, begin) {
  1141. for (var i = begin; i < array.length; i++) {
  1142. if (array[i] == element) {
  1143. return i;
  1144. }
  1145. }
  1146. return -1;
  1147. }
  1148. /* end of Array functions */
  1149. /**
  1150. * Searches in the indexed words for the terms in words and sort the mathes by scoring.
  1151. *
  1152. * @param {Array} words - list of words to look for.
  1153. * @param {String} searchedWord - search term typed by user
  1154. * @return {Array} - the hashtable fileAndWordList
  1155. */
  1156. function searchStartWith(words, searchedWord) {
  1157. if (words.length == 0 || words[0].length == 0) {
  1158. return null;
  1159. }
  1160. // In generated js file we add scoring at the end of the word
  1161. // Example word1*scoringForWord1,word2*scoringForWord2 and so on
  1162. // Split after * to obtain the right values
  1163. // Group the words by topicID -> {word, indices}
  1164. var fileAndWordList = {};
  1165. for (var t in words) {
  1166. // get the list of the indices of the files.
  1167. var topicIDAndScore = index.w[words[t]];
  1168. if (topicIDAndScore != undefined) {
  1169. var topicInfoArray = topicIDAndScore.split(",");
  1170. //for each file (file's index):
  1171. for (var t2 in topicInfoArray) {
  1172. var tmp = '';
  1173. var temp = topicInfoArray[t2].toString();
  1174. var idx = temp.indexOf('*');
  1175. if (idx != -1) {
  1176. var tid = temp.substring(0, idx);
  1177. // Extract word indices
  1178. var starLastIdx = temp.indexOf("*", idx + 1);
  1179. var wordIndices = [];
  1180. if (starLastIdx != -1) {
  1181. var indicesStr = temp.substr(starLastIdx + 1);
  1182. wordIndices = indicesStr.split('$');
  1183. }
  1184. if (fileAndWordList[tid] == undefined) {
  1185. fileAndWordList[tid] = [];
  1186. }
  1187. var wAndIdx = {
  1188. word: words[t],
  1189. indices: wordIndices
  1190. };
  1191. fileAndWordList[tid].push(wAndIdx);
  1192. } else {
  1193. warn("Unexpected writing format, '*' delimiter is missing.");
  1194. }
  1195. }
  1196. }
  1197. }
  1198. // An array with TopicIDAndWordList objects
  1199. var tidWordsArray = [];
  1200. for (t in fileAndWordList) {
  1201. tidWordsArray.push(new TopicIDAndWordList(t, fileAndWordList[t]));
  1202. }
  1203. tidWordsArray = removeDerivates(tidWordsArray, searchedWord);
  1204. // Compute the array with results per file
  1205. var resultsPerFileArrays = [];
  1206. for (t in tidWordsArray) {
  1207. var cTopicIDAndWordList = tidWordsArray[t];
  1208. var scoring =
  1209. computeScoring(fileAndWordList[cTopicIDAndWordList.filesNo], cTopicIDAndWordList.filesNo);
  1210. resultsPerFileArrays.push(
  1211. new ResultPerFile(
  1212. cTopicIDAndWordList.filesNo,
  1213. cTopicIDAndWordList.wordList,
  1214. scoring));
  1215. }
  1216. // Sort by score
  1217. resultsPerFileArrays.sort(function (a, b) {
  1218. return b.scoring - a.scoring;
  1219. });
  1220. return resultsPerFileArrays;
  1221. }
  1222. /**
  1223. * Remove derivatives words from the list of words with the original word.
  1224. *
  1225. * @param {[TopicIDAndWordList]} obj Array that contains results for searched words
  1226. * @param {String} searchedWord search term typed by user
  1227. * @return {Array} Clean array results without duplicated and derivatives words
  1228. */
  1229. function removeDerivates(obj, searchedWord) {
  1230. var toResultObject = [];
  1231. for (var i in obj) {
  1232. var filesNo = obj[i].filesNo;
  1233. var wordList = obj[i].wordList;
  1234. // concat word results if word starts with the original word
  1235. var wordIndicesMap = {};
  1236. for (var j = 0; j < wordList.length; j++) {
  1237. var w = wordList[j].word;
  1238. if (searchInsideFilePath) {
  1239. if (w.indexOf(searchedWord) != -1) {
  1240. w = searchedWord;
  1241. }
  1242. } else {
  1243. if (startsWith(w, searchedWord)) {
  1244. w = searchedWord;
  1245. }
  1246. }
  1247. if (wordIndicesMap[w] == undefined) {
  1248. wordIndicesMap[w] = wordList[j].indices;
  1249. } else {
  1250. wordIndicesMap[w] = wordIndicesMap[w].concat(wordList[j].indices);
  1251. }
  1252. }
  1253. var newWordsAray = [];
  1254. for (var w in wordIndicesMap) {
  1255. newWordsAray.push(
  1256. {
  1257. word: w,
  1258. indices: wordIndicesMap[w]
  1259. }
  1260. );
  1261. }
  1262. toResultObject.push(new TopicIDAndWordList(filesNo, newWordsAray));
  1263. }
  1264. return toResultObject;
  1265. }
  1266. /**
  1267. * Object to keep the topicID and a list of words that was found in that topic.
  1268. *
  1269. * @param filesNo The topic ID or file number.
  1270. * @param {[obj]} wordList An array of {word, [idx]} objects.
  1271. * @constructor
  1272. */
  1273. function TopicIDAndWordList(filesNo, wordList) {
  1274. this.filesNo = filesNo;
  1275. this.wordList = wordList;
  1276. }
  1277. // Object.
  1278. // Add a new parameter - scoring.
  1279. /**
  1280. * An object containing the search result for a single topic.
  1281. * Contains pointer to the topicID and the list of words found.
  1282. *
  1283. * @param filenb The topic ID or number.
  1284. * @param {obj[]} wordsList The array with words separated.
  1285. * The object has form: {word: "flower"; indices: {1, 5, 7}}
  1286. * @param scoring The scoring associated with this topic.
  1287. *
  1288. * @constructor
  1289. */
  1290. function ResultPerFile(filenb, wordsList, scoring) {
  1291. this.filenb = filenb;
  1292. this.wordsList = wordsList;
  1293. this.scoring = scoring;
  1294. }
  1295. /**
  1296. * Compute score for one or more words for a given topic ID.
  1297. *
  1298. * @param words {[word: string, indices: [integer]]} The list with words separated by ','.
  1299. * @param topicID {number} The topic ID.
  1300. * @returns {number} The score for the given words.
  1301. */
  1302. function computeScoring(words, topicID) {
  1303. var sum = 0;
  1304. for (var jj = 0; jj < words.length; jj++) {
  1305. var cWord = words[jj].word;
  1306. // Check if the word was indexed
  1307. if (index.w[cWord] !== undefined) {
  1308. // w["flowering"]="1*5,3*7";
  1309. var topicIDScoreArray = index.w[cWord].split(',');
  1310. for (var ii = 0; ii < topicIDScoreArray.length; ii++) {
  1311. var tidAndScore = topicIDScoreArray[ii].split('*');
  1312. if (tidAndScore[0] == topicID) {
  1313. sum += parseInt(tidAndScore[1]);
  1314. }
  1315. }
  1316. }
  1317. }
  1318. return sum;
  1319. }
  1320. function compareWords(s1, s2) {
  1321. var t1 = s1.split(',');
  1322. var t2 = s2.split(',');
  1323. if (t1.length == t2.length) {
  1324. return 0;
  1325. } else if (t1.length > t2.length) {
  1326. return 1;
  1327. } else {
  1328. return -1;
  1329. }
  1330. }
  1331. // Remove duplicate values from an array
  1332. function removeDuplicate(arr) {
  1333. var r = [];
  1334. o:for (var i = 0, n = arr.length; i < n; i++) {
  1335. for (var x = 0, y = r.length; x < y; x++) {
  1336. if (r[x] == arr[i]) continue o;
  1337. }
  1338. r[r.length] = arr[i];
  1339. }
  1340. return r;
  1341. }
  1342. function trim(str, chars) {
  1343. util.debug("Trim a string... " + str);
  1344. return ltrim(rtrim(str, chars), chars);
  1345. }
  1346. function ltrim(str, chars) {
  1347. chars = chars || "\\s";
  1348. return str.replace(new RegExp("^[" + chars + "]+", "g"), "");
  1349. }
  1350. function rtrim(str, chars) {
  1351. chars = chars || "\\s";
  1352. return str.replace(new RegExp("[" + chars + "]+$", "g"), "");
  1353. }
  1354. /**
  1355. * PATCH FOR BOOLEAN SEARCH
  1356. */
  1357. /**
  1358. * @description Object with resulted pages as array
  1359. * @param {[ResultPerFile]}resPerFileArray Array that contains partial results
  1360. * @constructor
  1361. */
  1362. function BooleanSearchOperand(resPerFileArray) {
  1363. this.value = resPerFileArray;
  1364. this.toString = function () {
  1365. var stringResult = "";
  1366. stringResult += "INDEX\t|\tfilenb\t|\tscoring\n";
  1367. for (var i = 0; i < this.value.length; i++) {
  1368. stringResult += i + ".\t\t|\t" + this.value[i].filenb + "\t\t|\t" + this.value[i].scoring + "\n";
  1369. }
  1370. return stringResult;
  1371. };
  1372. this.writeIDs = function () {
  1373. var stringResult = "";
  1374. for (var i = 0; i < this.value.length; i++) {
  1375. stringResult += this.value[i].filenb + " | ";
  1376. }
  1377. return stringResult;
  1378. };
  1379. /**
  1380. * Combine two search results using AND function.
  1381. *
  1382. * @param {BooleanSearchOperand} secondOperand The second boolean operand to combine with.
  1383. * @returns {BooleanSearchOperand} The AND operation result.
  1384. */
  1385. this.and = function and(secondOperand) {
  1386. if (typeof secondOperand == "undefined" || secondOperand == null) {
  1387. return this;
  1388. }
  1389. var result = [];
  1390. for (var x = 0; x < this.value.length; x++) {
  1391. var found = false;
  1392. for (var y = 0; y < secondOperand.value.length; y++) {
  1393. if (this.value[x].filenb == secondOperand.value[y].filenb) {
  1394. this.value[x].wordsList = this.value[x].wordsList.concat(secondOperand.value[y].wordsList);
  1395. this.value[x].scoring += secondOperand.value[y].scoring;
  1396. found = true;
  1397. break;
  1398. }
  1399. }
  1400. if (found) {
  1401. result.push(this.value[x]);
  1402. }
  1403. }
  1404. this.value = result;
  1405. return this;
  1406. };
  1407. /**
  1408. * Conbine two search results using OR operator.
  1409. *
  1410. * @param {Pages} operand The second operand.
  1411. * @returns {BooleanSearchOperand} The new operand after applying the OR operator.
  1412. */
  1413. this.or = function or(operand) {
  1414. if (typeof operand == "undefined" || operand == null) {
  1415. return this;
  1416. }
  1417. this.value = this.value.concat(operand.value);
  1418. var result = [];
  1419. for (var i = 0; i < this.value.length; i++) {
  1420. var unique = true;
  1421. for (var j = 0; j < result.length; j++) {
  1422. if (this.value[i].filenb == result[j].filenb) {
  1423. result[j].wordsList = result[j].wordsList.concat(this.value[i].wordsList);
  1424. var numberOfWords = result[j].wordsList.length;
  1425. result[j].scoring = this.value[i].scoring + result[j].scoring;
  1426. unique = false;
  1427. break;
  1428. }
  1429. }
  1430. if (unique) {
  1431. result.push(this.value[i]);
  1432. }
  1433. }
  1434. this.value = result;
  1435. return this;
  1436. };
  1437. this.not = function not(newArray) {
  1438. if (typeof newArray == "undefined" || newArray == null) {
  1439. return this;
  1440. }
  1441. var result = [];
  1442. for (var x = 0; x < this.value.length; x++) {
  1443. var found = false;
  1444. for (var y = 0; y < newArray.value.length; y++) {
  1445. if (this.value[x].filenb == newArray.value[y].filenb) {
  1446. found = true;
  1447. }
  1448. }
  1449. if (!found) {
  1450. result.push(this.value[x]);
  1451. }
  1452. }
  1453. this.value = result;
  1454. return this;
  1455. };
  1456. }
  1457. /**
  1458. * Utility method to debug a message. By default delegated to the console.log, but it can be overwritten
  1459. * by other scripts.
  1460. *
  1461. * @param args The list with arguments.
  1462. */
  1463. function warn() {
  1464. var res = typeof console.log;
  1465. if (res === "function") {
  1466. console.warn(console, arguments);
  1467. }
  1468. }
  1469. /**
  1470. * Utility method to debug a message. By default delegated to the console.log, but it can be overwritten
  1471. * by other scripts.
  1472. *
  1473. * @param args The list with arguments.
  1474. */
  1475. function info() {
  1476. var res = typeof console.info;
  1477. if (res === "function") {
  1478. console.info.apply(console, arguments);
  1479. }
  1480. }
  1481. // Return true if "word1" starts with "word2"
  1482. function startsWith(word1, word2) {
  1483. var prefix = false;
  1484. if (word1 !== null && word2 !== null) {
  1485. if (word2.length <= word1.length) {
  1486. prefix = true;
  1487. for (var i = 0; i < word2.length; i++) {
  1488. if (word1.charAt(i) !== word2.charAt(i)) {
  1489. prefix = false;
  1490. break;
  1491. }
  1492. }
  1493. }
  1494. } else {
  1495. if (word1 !== null) {
  1496. prefix = true;
  1497. }
  1498. }
  1499. return prefix;
  1500. }
  1501. /**
  1502. * Detect if a search token seems to be an URL or file path.
  1503. *
  1504. * @param toTest The search expression.
  1505. * @returns {boolean} True if the search query seems to be an URL or file path.
  1506. */
  1507. function isURLorFilePath(toTest) {
  1508. var re = new RegExp('[\./\\\-:_]');
  1509. return re.test(toTest);
  1510. }
  1511. return {
  1512. performSearch: performSearchDriver
  1513. }
  1514. });