lunr.stemmer.support.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /*!
  2. * Snowball JavaScript Library v0.3
  3. * http://code.google.com/p/urim/
  4. * http://snowball.tartarus.org/
  5. *
  6. * Copyright 2010, Oleg Mazko
  7. * http://www.mozilla.org/MPL/
  8. */
  9. /**
  10. * export the module via AMD, CommonJS or as a browser global
  11. * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
  12. */
  13. ;(function (root, factory) {
  14. if (typeof define === 'function' && define.amd) {
  15. // AMD. Register as an anonymous module.
  16. define(factory)
  17. } else if (typeof exports === 'object') {
  18. /**
  19. * Node. Does not work with strict CommonJS, but
  20. * only CommonJS-like environments that support module.exports,
  21. * like Node.
  22. */
  23. module.exports = factory()
  24. } else {
  25. // Browser globals (root is window)
  26. factory()(root.lunr);
  27. }
  28. }(this, function () {
  29. /**
  30. * Just return a value to define the module export.
  31. * This example returns an object, but the module
  32. * can return a function as the exported value.
  33. */
  34. return function(lunr) {
  35. /* provides utilities for the included stemmers */
  36. lunr.stemmerSupport = {
  37. Among: function(s, substring_i, result, method) {
  38. this.toCharArray = function(s) {
  39. var sLength = s.length, charArr = new Array(sLength);
  40. for (var i = 0; i < sLength; i++)
  41. charArr[i] = s.charCodeAt(i);
  42. return charArr;
  43. };
  44. if ((!s && s != "") || (!substring_i && (substring_i != 0)) || !result)
  45. throw ("Bad Among initialisation: s:" + s + ", substring_i: "
  46. + substring_i + ", result: " + result);
  47. this.s_size = s.length;
  48. this.s = this.toCharArray(s);
  49. this.substring_i = substring_i;
  50. this.result = result;
  51. this.method = method;
  52. },
  53. SnowballProgram: function() {
  54. var current;
  55. return {
  56. bra : 0,
  57. ket : 0,
  58. limit : 0,
  59. cursor : 0,
  60. limit_backward : 0,
  61. setCurrent : function(word) {
  62. current = word;
  63. this.cursor = 0;
  64. this.limit = word.length;
  65. this.limit_backward = 0;
  66. this.bra = this.cursor;
  67. this.ket = this.limit;
  68. },
  69. getCurrent : function() {
  70. var result = current;
  71. current = null;
  72. return result;
  73. },
  74. in_grouping : function(s, min, max) {
  75. if (this.cursor < this.limit) {
  76. var ch = current.charCodeAt(this.cursor);
  77. if (ch <= max && ch >= min) {
  78. ch -= min;
  79. if (s[ch >> 3] & (0X1 << (ch & 0X7))) {
  80. this.cursor++;
  81. return true;
  82. }
  83. }
  84. }
  85. return false;
  86. },
  87. in_grouping_b : function(s, min, max) {
  88. if (this.cursor > this.limit_backward) {
  89. var ch = current.charCodeAt(this.cursor - 1);
  90. if (ch <= max && ch >= min) {
  91. ch -= min;
  92. if (s[ch >> 3] & (0X1 << (ch & 0X7))) {
  93. this.cursor--;
  94. return true;
  95. }
  96. }
  97. }
  98. return false;
  99. },
  100. out_grouping : function(s, min, max) {
  101. if (this.cursor < this.limit) {
  102. var ch = current.charCodeAt(this.cursor);
  103. if (ch > max || ch < min) {
  104. this.cursor++;
  105. return true;
  106. }
  107. ch -= min;
  108. if (!(s[ch >> 3] & (0X1 << (ch & 0X7)))) {
  109. this.cursor++;
  110. return true;
  111. }
  112. }
  113. return false;
  114. },
  115. out_grouping_b : function(s, min, max) {
  116. if (this.cursor > this.limit_backward) {
  117. var ch = current.charCodeAt(this.cursor - 1);
  118. if (ch > max || ch < min) {
  119. this.cursor--;
  120. return true;
  121. }
  122. ch -= min;
  123. if (!(s[ch >> 3] & (0X1 << (ch & 0X7)))) {
  124. this.cursor--;
  125. return true;
  126. }
  127. }
  128. return false;
  129. },
  130. eq_s : function(s_size, s) {
  131. if (this.limit - this.cursor < s_size)
  132. return false;
  133. for (var i = 0; i < s_size; i++)
  134. if (current.charCodeAt(this.cursor + i) != s.charCodeAt(i))
  135. return false;
  136. this.cursor += s_size;
  137. return true;
  138. },
  139. eq_s_b : function(s_size, s) {
  140. if (this.cursor - this.limit_backward < s_size)
  141. return false;
  142. for (var i = 0; i < s_size; i++)
  143. if (current.charCodeAt(this.cursor - s_size + i) != s
  144. .charCodeAt(i))
  145. return false;
  146. this.cursor -= s_size;
  147. return true;
  148. },
  149. find_among : function(v, v_size) {
  150. var i = 0, j = v_size, c = this.cursor, l = this.limit, common_i = 0, common_j = 0, first_key_inspected = false;
  151. while (true) {
  152. var k = i + ((j - i) >> 1), diff = 0, common = common_i < common_j
  153. ? common_i
  154. : common_j, w = v[k];
  155. for (var i2 = common; i2 < w.s_size; i2++) {
  156. if (c + common == l) {
  157. diff = -1;
  158. break;
  159. }
  160. diff = current.charCodeAt(c + common) - w.s[i2];
  161. if (diff)
  162. break;
  163. common++;
  164. }
  165. if (diff < 0) {
  166. j = k;
  167. common_j = common;
  168. } else {
  169. i = k;
  170. common_i = common;
  171. }
  172. if (j - i <= 1) {
  173. if (i > 0 || j == i || first_key_inspected)
  174. break;
  175. first_key_inspected = true;
  176. }
  177. }
  178. while (true) {
  179. var w = v[i];
  180. if (common_i >= w.s_size) {
  181. this.cursor = c + w.s_size;
  182. if (!w.method)
  183. return w.result;
  184. var res = w.method();
  185. this.cursor = c + w.s_size;
  186. if (res)
  187. return w.result;
  188. }
  189. i = w.substring_i;
  190. if (i < 0)
  191. return 0;
  192. }
  193. },
  194. find_among_b : function(v, v_size) {
  195. var i = 0, j = v_size, c = this.cursor, lb = this.limit_backward, common_i = 0, common_j = 0, first_key_inspected = false;
  196. while (true) {
  197. var k = i + ((j - i) >> 1), diff = 0, common = common_i < common_j
  198. ? common_i
  199. : common_j, w = v[k];
  200. for (var i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
  201. if (c - common == lb) {
  202. diff = -1;
  203. break;
  204. }
  205. diff = current.charCodeAt(c - 1 - common) - w.s[i2];
  206. if (diff)
  207. break;
  208. common++;
  209. }
  210. if (diff < 0) {
  211. j = k;
  212. common_j = common;
  213. } else {
  214. i = k;
  215. common_i = common;
  216. }
  217. if (j - i <= 1) {
  218. if (i > 0 || j == i || first_key_inspected)
  219. break;
  220. first_key_inspected = true;
  221. }
  222. }
  223. while (true) {
  224. var w = v[i];
  225. if (common_i >= w.s_size) {
  226. this.cursor = c - w.s_size;
  227. if (!w.method)
  228. return w.result;
  229. var res = w.method();
  230. this.cursor = c - w.s_size;
  231. if (res)
  232. return w.result;
  233. }
  234. i = w.substring_i;
  235. if (i < 0)
  236. return 0;
  237. }
  238. },
  239. replace_s : function(c_bra, c_ket, s) {
  240. var adjustment = s.length - (c_ket - c_bra), left = current
  241. .substring(0, c_bra), right = current.substring(c_ket);
  242. current = left + s + right;
  243. this.limit += adjustment;
  244. if (this.cursor >= c_ket)
  245. this.cursor += adjustment;
  246. else if (this.cursor > c_bra)
  247. this.cursor = c_bra;
  248. return adjustment;
  249. },
  250. slice_check : function() {
  251. if (this.bra < 0 || this.bra > this.ket || this.ket > this.limit
  252. || this.limit > current.length)
  253. throw ("faulty slice operation");
  254. },
  255. slice_from : function(s) {
  256. this.slice_check();
  257. this.replace_s(this.bra, this.ket, s);
  258. },
  259. slice_del : function() {
  260. this.slice_from("");
  261. },
  262. insert : function(c_bra, c_ket, s) {
  263. var adjustment = this.replace_s(c_bra, c_ket, s);
  264. if (c_bra <= this.bra)
  265. this.bra += adjustment;
  266. if (c_bra <= this.ket)
  267. this.ket += adjustment;
  268. },
  269. slice_to : function() {
  270. this.slice_check();
  271. return current.substring(this.bra, this.ket);
  272. },
  273. eq_v_b : function(s) {
  274. return this.eq_s_b(s.length, s);
  275. }
  276. };
  277. }
  278. };
  279. lunr.trimmerSupport = {
  280. generateTrimmer: function(wordCharacters) {
  281. var startRegex = new RegExp("^[^" + wordCharacters + "]+")
  282. var endRegex = new RegExp("[^" + wordCharacters + "]+$")
  283. return function(token) {
  284. // for lunr version 2
  285. if (typeof token.update === "function") {
  286. return token.update(function (s) {
  287. return s
  288. .replace(startRegex, '')
  289. .replace(endRegex, '');
  290. })
  291. } else { // for lunr version 1
  292. return token
  293. .replace(startRegex, '')
  294. .replace(endRegex, '');
  295. }
  296. };
  297. }
  298. }
  299. }
  300. }));