mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-27 00:23:41 +09:00 
			
		
		
		
	Use weighted algorithm for string matching when finding files in repo (#21370)
This PR is for: * https://github.com/go-gitea/gitea/issues/20231 Now, when a user searches `word`, they always see `/{word}.txt` before `/{w}e-g{o}t-{r}esult.{d}at` Demo: When searching "a", "a.ext" comes first. Then when searching "at", the longer matched "template" comes first. <details>   </details> This PR also makes the frontend tests could import feature JS files by introducing `jestSetup.js` Co-authored-by: delvh <dev.lh@web.de> Co-authored-by: silverwind <me@silverwind.io>
This commit is contained in:
		| @@ -1,3 +1,4 @@ | ||||
| // to run tests with ES6 module, node must run with "--experimental-vm-modules", or see Makefile's "test-frontend" for reference | ||||
| export default { | ||||
|   rootDir: 'web_src', | ||||
|   setupFilesAfterEnv: ['jest-extended/all'], | ||||
| @@ -7,6 +8,8 @@ export default { | ||||
|   transform: { | ||||
|     '\\.svg$': '<rootDir>/js/testUtils/jestRawLoader.js', | ||||
|   }, | ||||
|   setupFiles: [ | ||||
|     './js/testUtils/jestSetup.js', // prepare global variables used by our code (eg: window.config) | ||||
|   ], | ||||
|   verbose: false, | ||||
| }; | ||||
|  | ||||
|   | ||||
| @@ -1,45 +1,96 @@ | ||||
| import $ from 'jquery'; | ||||
|  | ||||
| import {svg} from '../svg.js'; | ||||
| import {strSubMatch} from '../utils.js'; | ||||
| const {csrf} = window.config; | ||||
|  | ||||
| const threshold = 50; | ||||
| let files = []; | ||||
| let $repoFindFileInput, $repoFindFileTableBody, $repoFindFileNoResult; | ||||
|  | ||||
|  | ||||
| // return the case-insensitive sub-match result as an array:  [unmatched, matched, unmatched, matched, ...] | ||||
| // res[even] is unmatched, res[odd] is matched, see unit tests for examples | ||||
| // argument subLower must be a lower-cased string. | ||||
| export function strSubMatch(full, subLower) { | ||||
|   const res = ['']; | ||||
|   let i = 0, j = 0; | ||||
|   const fullLower = full.toLowerCase(); | ||||
|   while (i < subLower.length && j < fullLower.length) { | ||||
|     if (subLower[i] === fullLower[j]) { | ||||
|       if (res.length % 2 !== 0) res.push(''); | ||||
|       res[res.length - 1] += full[j]; | ||||
|       j++; | ||||
|       i++; | ||||
|     } else { | ||||
|       if (res.length % 2 === 0) res.push(''); | ||||
|       res[res.length - 1] += full[j]; | ||||
|       j++; | ||||
|     } | ||||
|   } | ||||
|   if (i !== subLower.length) { | ||||
|     // if the sub string doesn't match the full, only return the full as unmatched. | ||||
|     return [full]; | ||||
|   } | ||||
|   if (j < full.length) { | ||||
|     // append remaining chars from full to result as unmatched | ||||
|     if (res.length % 2 === 0) res.push(''); | ||||
|     res[res.length - 1] += full.substring(j); | ||||
|   } | ||||
|   return res; | ||||
| } | ||||
|  | ||||
| export function calcMatchedWeight(matchResult) { | ||||
|   let weight = 0; | ||||
|   for (let i = 0; i < matchResult.length; i++) { | ||||
|     if (i % 2 === 1) { // matches are on odd indices, see strSubMatch | ||||
|       // use a function f(x+x) > f(x) + f(x) to make the longer matched string has higher weight. | ||||
|       weight += matchResult[i].length * matchResult[i].length; | ||||
|     } | ||||
|   } | ||||
|   return weight; | ||||
| } | ||||
|  | ||||
| export function filterRepoFilesWeighted(files, filter) { | ||||
|   let filterResult = []; | ||||
|   if (filter) { | ||||
|     const filterLower = filter.toLowerCase(); | ||||
|     // TODO: for large repo, this loop could be slow, maybe there could be one more limit: | ||||
|     // ... && filterResult.length < threshold * 20,  wait for more feedbacks | ||||
|     for (let i = 0; i < files.length; i++) { | ||||
|       const res = strSubMatch(files[i], filterLower); | ||||
|       if (res.length > 1) { // length==1 means unmatched, >1 means having matched sub strings | ||||
|         filterResult.push({matchResult: res, matchWeight: calcMatchedWeight(res)}); | ||||
|       } | ||||
|     } | ||||
|     filterResult.sort((a, b) => b.matchWeight - a.matchWeight); | ||||
|     filterResult = filterResult.slice(0, threshold); | ||||
|   } else { | ||||
|     for (let i = 0; i < files.length && i < threshold; i++) { | ||||
|       filterResult.push({matchResult: [files[i]], matchWeight: 0}); | ||||
|     } | ||||
|   } | ||||
|   return filterResult; | ||||
| } | ||||
|  | ||||
| function filterRepoFiles(filter) { | ||||
|   const treeLink = $repoFindFileInput.attr('data-url-tree-link'); | ||||
|   $repoFindFileTableBody.empty(); | ||||
|  | ||||
|   const fileRes = []; | ||||
|   if (filter) { | ||||
|     for (let i = 0; i < files.length && fileRes.length < threshold; i++) { | ||||
|       const subMatch = strSubMatch(files[i], filter); | ||||
|       if (subMatch.length > 1) { | ||||
|         fileRes.push(subMatch); | ||||
|       } | ||||
|     } | ||||
|   } else { | ||||
|     for (let i = 0; i < files.length && i < threshold; i++) { | ||||
|       fileRes.push([files[i]]); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   const filterResult = filterRepoFilesWeighted(files, filter); | ||||
|   const tmplRow = `<tr><td><a></a></td></tr>`; | ||||
|  | ||||
|   $repoFindFileNoResult.toggle(fileRes.length === 0); | ||||
|   for (const matchRes of fileRes) { | ||||
|   $repoFindFileNoResult.toggle(filterResult.length === 0); | ||||
|   for (const r of filterResult) { | ||||
|     const $row = $(tmplRow); | ||||
|     const $a = $row.find('a'); | ||||
|     $a.attr('href', `${treeLink}/${matchRes.join('')}`); | ||||
|     $a.attr('href', `${treeLink}/${r.matchResult.join('')}`); | ||||
|     const $octiconFile = $(svg('octicon-file')).addClass('mr-3'); | ||||
|     $a.append($octiconFile); | ||||
|     // if the target file path is "abc/xyz", to search "bx", then the matchRes is ['a', 'b', 'c/', 'x', 'yz'] | ||||
|     // the matchRes[odd] is matched and highlighted to red. | ||||
|     for (let j = 0; j < matchRes.length; j++) { | ||||
|       if (!matchRes[j]) continue; | ||||
|       const $span = $('<span>').text(matchRes[j]); | ||||
|     // if the target file path is "abc/xyz", to search "bx", then the matchResult is ['a', 'b', 'c/', 'x', 'yz'] | ||||
|     // the matchResult[odd] is matched and highlighted to red. | ||||
|     for (let j = 0; j < r.matchResult.length; j++) { | ||||
|       if (!r.matchResult[j]) continue; | ||||
|       const $span = $('<span>').text(r.matchResult[j]); | ||||
|       if (j % 2 === 1) $span.addClass('ui text red'); | ||||
|       $a.append($span); | ||||
|     } | ||||
|   | ||||
							
								
								
									
										34
									
								
								web_src/js/features/repo-findfile.test.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								web_src/js/features/repo-findfile.test.js
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | ||||
| import {strSubMatch, calcMatchedWeight, filterRepoFilesWeighted} from './repo-findfile.js'; | ||||
|  | ||||
| describe('Repo Find Files', () => { | ||||
|   test('strSubMatch', () => { | ||||
|     expect(strSubMatch('abc', '')).toEqual(['abc']); | ||||
|     expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']); | ||||
|     expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']); | ||||
|     expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']); | ||||
|     expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']); | ||||
|     expect(strSubMatch('abc', 'z')).toEqual(['abc']); | ||||
|     expect(strSubMatch('abc', 'az')).toEqual(['abc']); | ||||
|  | ||||
|     expect(strSubMatch('ABc', 'ac')).toEqual(['', 'A', 'B', 'c']); | ||||
|     expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']); | ||||
|  | ||||
|     expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']); | ||||
|     expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']); | ||||
|   }); | ||||
|  | ||||
|   test('calcMatchedWeight', () => { | ||||
|     expect(calcMatchedWeight(['a', 'b', 'c', 'd']) < calcMatchedWeight(['a', 'bc', 'c'])).toBeTruthy(); | ||||
|   }); | ||||
|  | ||||
|   test('filterRepoFilesWeighted', () => { | ||||
|     // the first matched result should always be the "word.txt" | ||||
|     let res = filterRepoFilesWeighted(['word.txt', 'we-got-result.dat'], 'word'); | ||||
|     expect(res).toHaveLength(2); | ||||
|     expect(res[0].matchResult).toEqual(['', 'word', '.txt']); | ||||
|  | ||||
|     res = filterRepoFilesWeighted(['we-got-result.dat', 'word.txt'], 'word'); | ||||
|     expect(res).toHaveLength(2); | ||||
|     expect(res[0].matchResult).toEqual(['', 'word', '.txt']); | ||||
|   }); | ||||
| }); | ||||
							
								
								
									
										5
									
								
								web_src/js/testUtils/jestSetup.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								web_src/js/testUtils/jestSetup.js
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| window.config = { | ||||
|   csrfToken: 'jest-test-csrf-token-123456', | ||||
|   pageData: {}, | ||||
|   i18n: {}, | ||||
| }; | ||||
| @@ -59,36 +59,6 @@ export function parseIssueHref(href) { | ||||
|   return {owner, repo, type, index}; | ||||
| } | ||||
|  | ||||
| // return the sub-match result as an array:  [unmatched, matched, unmatched, matched, ...] | ||||
| // res[even] is unmatched, res[odd] is matched, see unit tests for examples | ||||
| export function strSubMatch(full, sub) { | ||||
|   const res = ['']; | ||||
|   let i = 0, j = 0; | ||||
|   const subLower = sub.toLowerCase(), fullLower = full.toLowerCase(); | ||||
|   while (i < subLower.length && j < fullLower.length) { | ||||
|     if (subLower[i] === fullLower[j]) { | ||||
|       if (res.length % 2 !== 0) res.push(''); | ||||
|       res[res.length - 1] += full[j]; | ||||
|       j++; | ||||
|       i++; | ||||
|     } else { | ||||
|       if (res.length % 2 === 0) res.push(''); | ||||
|       res[res.length - 1] += full[j]; | ||||
|       j++; | ||||
|     } | ||||
|   } | ||||
|   if (i !== sub.length) { | ||||
|     // if the sub string doesn't match the full, only return the full as unmatched. | ||||
|     return [full]; | ||||
|   } | ||||
|   if (j < full.length) { | ||||
|     // append remaining chars from full to result as unmatched | ||||
|     if (res.length % 2 === 0) res.push(''); | ||||
|     res[res.length - 1] += full.substring(j); | ||||
|   } | ||||
|   return res; | ||||
| } | ||||
|  | ||||
| // pretty-print a number using locale-specific separators, e.g. 1200 -> 1,200 | ||||
| export function prettyNumber(num, locale = 'en-US') { | ||||
|   if (typeof num !== 'number') return ''; | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import { | ||||
|   basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref, strSubMatch, | ||||
|   basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref, | ||||
|   prettyNumber, parseUrl, | ||||
| } from './utils.js'; | ||||
|  | ||||
| @@ -86,22 +86,6 @@ test('parseIssueHref', () => { | ||||
|   expect(parseIssueHref('')).toEqual({owner: undefined, repo: undefined, type: undefined, index: undefined}); | ||||
| }); | ||||
|  | ||||
| test('strSubMatch', () => { | ||||
|   expect(strSubMatch('abc', '')).toEqual(['abc']); | ||||
|   expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']); | ||||
|   expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']); | ||||
|   expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']); | ||||
|   expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']); | ||||
|   expect(strSubMatch('abc', 'z')).toEqual(['abc']); | ||||
|   expect(strSubMatch('abc', 'az')).toEqual(['abc']); | ||||
|  | ||||
|   expect(strSubMatch('abc', 'aC')).toEqual(['', 'a', 'b', 'c']); | ||||
|   expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']); | ||||
|  | ||||
|   expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']); | ||||
|   expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']); | ||||
| }); | ||||
|  | ||||
| test('prettyNumber', () => { | ||||
|   expect(prettyNumber()).toEqual(''); | ||||
|   expect(prettyNumber(null)).toEqual(''); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user