mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-31 21:28:11 +09:00 
			
		
		
		
	Replace linkRegex with xurls library (#6261)
* Replace linkRegex with xurls library Rather than maintaining a complicated regex to match URLs for autolinking, gitea can use this existing go library that takes care of the matching with very little code change to gitea itself. After spending a while trying to find the perfect regex for all cases this library still works better as it is more flexible than a single regex ever will be. This will also fix the following issues: #5844 #3095 #3381 This passes all our current tests and I've added new ones mentioned in those issues as well. * Use xurls.StrictMatchingScheme instead of xurls.Strict This is much faster and we only care about https? links to preserve existing behavior.
This commit is contained in:
		
				
					committed by
					
						 techknowlogick
						techknowlogick
					
				
			
			
				
	
			
			
			
						parent
						
							01bd1fcd33
						
					
				
				
					commit
					f2de5dc8c8
				
			
							
								
								
									
										9
									
								
								Gopkg.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										9
									
								
								Gopkg.lock
									
									
									
										generated
									
									
									
								
							| @@ -725,6 +725,14 @@ | ||||
|   pruneopts = "NUT" | ||||
|   revision = "02ccfbfaf0cc627aa3aec8ef7ed5cfeec5b43f63" | ||||
|  | ||||
| [[projects]] | ||||
|   digest = "1:63953ffb90bbc880c612d576fcfd973a5904277d25ec9e2d8d5719bf67969662" | ||||
|   name = "github.com/mvdan/xurls" | ||||
|   packages = ["."] | ||||
|   pruneopts = "NUT" | ||||
|   revision = "e52e821cbfe8fe163ff6f8628ab5869b11fc05af" | ||||
|   version = "v2.0.0" | ||||
|  | ||||
| [[projects]] | ||||
|   digest = "1:2be1d891535ce3d6d2a3db9087f07415e909744e9eff1a30f8f0b2519df60ae6" | ||||
|   name = "github.com/nfnt/resize" | ||||
| @@ -1293,6 +1301,7 @@ | ||||
|     "github.com/mcuadros/go-version", | ||||
|     "github.com/microcosm-cc/bluemonday", | ||||
|     "github.com/msteinert/pam", | ||||
|     "github.com/mvdan/xurls", | ||||
|     "github.com/nfnt/resize", | ||||
|     "github.com/pquerna/otp", | ||||
|     "github.com/pquerna/otp/totp", | ||||
|   | ||||
| @@ -113,3 +113,7 @@ ignored = ["google.golang.org/appengine*"] | ||||
| [[constraint]] | ||||
|   name = "github.com/prometheus/client_golang" | ||||
|   version = "0.9.0" | ||||
|  | ||||
| [[constraint]] | ||||
|   name = "github.com/mvdan/xurls" | ||||
|   version = "2.0.0" | ||||
|   | ||||
| @@ -17,6 +17,7 @@ import ( | ||||
| 	"code.gitea.io/gitea/modules/util" | ||||
|  | ||||
| 	"github.com/Unknwon/com" | ||||
| 	"github.com/mvdan/xurls" | ||||
| 	"golang.org/x/net/html" | ||||
| 	"golang.org/x/net/html/atom" | ||||
| ) | ||||
| @@ -64,9 +65,7 @@ var ( | ||||
| 	//   https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) | ||||
| 	emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*") | ||||
|  | ||||
| 	// matches http/https links. used for autlinking those. partly modified from | ||||
| 	// the original present in autolink.js | ||||
| 	linkRegex = regexp.MustCompile(`(?:(?:http|https):\/\/(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(?:\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-]*)?\??(?:[\-\+:=&;%@\.\w]*)#?(?:[\.\!\/\\\w]*))?`) | ||||
| 	linkRegex, _ = xurls.StrictMatchingScheme("https?://") | ||||
| ) | ||||
|  | ||||
| // regexp for full links to issues/pulls | ||||
|   | ||||
| @@ -104,6 +104,15 @@ func TestRender_links(t *testing.T) { | ||||
| 	test( | ||||
| 		"http://142.42.1.1/", | ||||
| 		`<p><a href="http://142.42.1.1/" rel="nofollow">http://142.42.1.1/</a></p>`) | ||||
| 	test( | ||||
| 		"https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd", | ||||
| 		`<p><a href="https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd" rel="nofollow">https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd</a></p>`) | ||||
| 	test( | ||||
| 		"https://en.wikipedia.org/wiki/URL_(disambiguation)", | ||||
| 		`<p><a href="https://en.wikipedia.org/wiki/URL_(disambiguation)" rel="nofollow">https://en.wikipedia.org/wiki/URL_(disambiguation)</a></p>`) | ||||
| 	test( | ||||
| 		"https://foo_bar.example.com/", | ||||
| 		`<p><a href="https://foo_bar.example.com/" rel="nofollow">https://foo_bar.example.com/</a></p>`) | ||||
|  | ||||
| 	// Test that should *not* be turned into URL | ||||
| 	test( | ||||
|   | ||||
							
								
								
									
										27
									
								
								vendor/github.com/mvdan/xurls/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								vendor/github.com/mvdan/xurls/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| Copyright (c) 2015, Daniel Martí. All rights reserved. | ||||
|  | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| modification, are permitted provided that the following conditions are | ||||
| met: | ||||
|  | ||||
|    * Redistributions of source code must retain the above copyright | ||||
| notice, this list of conditions and the following disclaimer. | ||||
|    * Redistributions in binary form must reproduce the above | ||||
| copyright notice, this list of conditions and the following disclaimer | ||||
| in the documentation and/or other materials provided with the | ||||
| distribution. | ||||
|    * Neither the name of the copyright holder nor the names of its | ||||
| contributors may be used to endorse or promote products derived from | ||||
| this software without specific prior written permission. | ||||
|  | ||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||
| OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
							
								
								
									
										299
									
								
								vendor/github.com/mvdan/xurls/schemes.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										299
									
								
								vendor/github.com/mvdan/xurls/schemes.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,299 @@ | ||||
| // Generated by schemesgen | ||||
|  | ||||
| package xurls | ||||
|  | ||||
| // Schemes is a sorted list of all IANA assigned schemes. | ||||
| // | ||||
| // Source: | ||||
| //   https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv | ||||
| var Schemes = []string{ | ||||
| 	`aaa`, | ||||
| 	`aaas`, | ||||
| 	`about`, | ||||
| 	`acap`, | ||||
| 	`acct`, | ||||
| 	`acr`, | ||||
| 	`adiumxtra`, | ||||
| 	`afp`, | ||||
| 	`afs`, | ||||
| 	`aim`, | ||||
| 	`appdata`, | ||||
| 	`apt`, | ||||
| 	`attachment`, | ||||
| 	`aw`, | ||||
| 	`barion`, | ||||
| 	`beshare`, | ||||
| 	`bitcoin`, | ||||
| 	`bitcoincash`, | ||||
| 	`blob`, | ||||
| 	`bolo`, | ||||
| 	`browserext`, | ||||
| 	`callto`, | ||||
| 	`cap`, | ||||
| 	`chrome`, | ||||
| 	`chrome-extension`, | ||||
| 	`cid`, | ||||
| 	`coap`, | ||||
| 	`coap+tcp`, | ||||
| 	`coap+ws`, | ||||
| 	`coaps`, | ||||
| 	`coaps+tcp`, | ||||
| 	`coaps+ws`, | ||||
| 	`com-eventbrite-attendee`, | ||||
| 	`content`, | ||||
| 	`conti`, | ||||
| 	`crid`, | ||||
| 	`cvs`, | ||||
| 	`data`, | ||||
| 	`dav`, | ||||
| 	`diaspora`, | ||||
| 	`dict`, | ||||
| 	`did`, | ||||
| 	`dis`, | ||||
| 	`dlna-playcontainer`, | ||||
| 	`dlna-playsingle`, | ||||
| 	`dns`, | ||||
| 	`dntp`, | ||||
| 	`dtn`, | ||||
| 	`dvb`, | ||||
| 	`ed2k`, | ||||
| 	`elsi`, | ||||
| 	`example`, | ||||
| 	`facetime`, | ||||
| 	`fax`, | ||||
| 	`feed`, | ||||
| 	`feedready`, | ||||
| 	`file`, | ||||
| 	`filesystem`, | ||||
| 	`finger`, | ||||
| 	`fish`, | ||||
| 	`ftp`, | ||||
| 	`geo`, | ||||
| 	`gg`, | ||||
| 	`git`, | ||||
| 	`gizmoproject`, | ||||
| 	`go`, | ||||
| 	`gopher`, | ||||
| 	`graph`, | ||||
| 	`gtalk`, | ||||
| 	`h323`, | ||||
| 	`ham`, | ||||
| 	`hcap`, | ||||
| 	`hcp`, | ||||
| 	`http`, | ||||
| 	`https`, | ||||
| 	`hxxp`, | ||||
| 	`hxxps`, | ||||
| 	`hydrazone`, | ||||
| 	`iax`, | ||||
| 	`icap`, | ||||
| 	`icon`, | ||||
| 	`im`, | ||||
| 	`imap`, | ||||
| 	`info`, | ||||
| 	`iotdisco`, | ||||
| 	`ipn`, | ||||
| 	`ipp`, | ||||
| 	`ipps`, | ||||
| 	`irc`, | ||||
| 	`irc6`, | ||||
| 	`ircs`, | ||||
| 	`iris`, | ||||
| 	`iris.beep`, | ||||
| 	`iris.lwz`, | ||||
| 	`iris.xpc`, | ||||
| 	`iris.xpcs`, | ||||
| 	`isostore`, | ||||
| 	`itms`, | ||||
| 	`jabber`, | ||||
| 	`jar`, | ||||
| 	`jms`, | ||||
| 	`keyparc`, | ||||
| 	`lastfm`, | ||||
| 	`ldap`, | ||||
| 	`ldaps`, | ||||
| 	`lvlt`, | ||||
| 	`magnet`, | ||||
| 	`mailserver`, | ||||
| 	`mailto`, | ||||
| 	`maps`, | ||||
| 	`market`, | ||||
| 	`message`, | ||||
| 	`microsoft.windows.camera`, | ||||
| 	`microsoft.windows.camera.multipicker`, | ||||
| 	`microsoft.windows.camera.picker`, | ||||
| 	`mid`, | ||||
| 	`mms`, | ||||
| 	`modem`, | ||||
| 	`mongodb`, | ||||
| 	`moz`, | ||||
| 	`ms-access`, | ||||
| 	`ms-browser-extension`, | ||||
| 	`ms-drive-to`, | ||||
| 	`ms-enrollment`, | ||||
| 	`ms-excel`, | ||||
| 	`ms-gamebarservices`, | ||||
| 	`ms-gamingoverlay`, | ||||
| 	`ms-getoffice`, | ||||
| 	`ms-help`, | ||||
| 	`ms-infopath`, | ||||
| 	`ms-inputapp`, | ||||
| 	`ms-lockscreencomponent-config`, | ||||
| 	`ms-media-stream-id`, | ||||
| 	`ms-mixedrealitycapture`, | ||||
| 	`ms-officeapp`, | ||||
| 	`ms-people`, | ||||
| 	`ms-project`, | ||||
| 	`ms-powerpoint`, | ||||
| 	`ms-publisher`, | ||||
| 	`ms-restoretabcompanion`, | ||||
| 	`ms-screenclip`, | ||||
| 	`ms-screensketch`, | ||||
| 	`ms-search`, | ||||
| 	`ms-search-repair`, | ||||
| 	`ms-secondary-screen-controller`, | ||||
| 	`ms-secondary-screen-setup`, | ||||
| 	`ms-settings`, | ||||
| 	`ms-settings-airplanemode`, | ||||
| 	`ms-settings-bluetooth`, | ||||
| 	`ms-settings-camera`, | ||||
| 	`ms-settings-cellular`, | ||||
| 	`ms-settings-cloudstorage`, | ||||
| 	`ms-settings-connectabledevices`, | ||||
| 	`ms-settings-displays-topology`, | ||||
| 	`ms-settings-emailandaccounts`, | ||||
| 	`ms-settings-language`, | ||||
| 	`ms-settings-location`, | ||||
| 	`ms-settings-lock`, | ||||
| 	`ms-settings-nfctransactions`, | ||||
| 	`ms-settings-notifications`, | ||||
| 	`ms-settings-power`, | ||||
| 	`ms-settings-privacy`, | ||||
| 	`ms-settings-proximity`, | ||||
| 	`ms-settings-screenrotation`, | ||||
| 	`ms-settings-wifi`, | ||||
| 	`ms-settings-workplace`, | ||||
| 	`ms-spd`, | ||||
| 	`ms-sttoverlay`, | ||||
| 	`ms-transit-to`, | ||||
| 	`ms-useractivityset`, | ||||
| 	`ms-virtualtouchpad`, | ||||
| 	`ms-visio`, | ||||
| 	`ms-walk-to`, | ||||
| 	`ms-whiteboard`, | ||||
| 	`ms-whiteboard-cmd`, | ||||
| 	`ms-word`, | ||||
| 	`msnim`, | ||||
| 	`msrp`, | ||||
| 	`msrps`, | ||||
| 	`mtqp`, | ||||
| 	`mumble`, | ||||
| 	`mupdate`, | ||||
| 	`mvn`, | ||||
| 	`news`, | ||||
| 	`nfs`, | ||||
| 	`ni`, | ||||
| 	`nih`, | ||||
| 	`nntp`, | ||||
| 	`notes`, | ||||
| 	`ocf`, | ||||
| 	`oid`, | ||||
| 	`onenote`, | ||||
| 	`onenote-cmd`, | ||||
| 	`opaquelocktoken`, | ||||
| 	`openpgp4fpr`, | ||||
| 	`pack`, | ||||
| 	`palm`, | ||||
| 	`paparazzi`, | ||||
| 	`pkcs11`, | ||||
| 	`platform`, | ||||
| 	`pop`, | ||||
| 	`pres`, | ||||
| 	`prospero`, | ||||
| 	`proxy`, | ||||
| 	`pwid`, | ||||
| 	`psyc`, | ||||
| 	`qb`, | ||||
| 	`query`, | ||||
| 	`redis`, | ||||
| 	`rediss`, | ||||
| 	`reload`, | ||||
| 	`res`, | ||||
| 	`resource`, | ||||
| 	`rmi`, | ||||
| 	`rsync`, | ||||
| 	`rtmfp`, | ||||
| 	`rtmp`, | ||||
| 	`rtsp`, | ||||
| 	`rtsps`, | ||||
| 	`rtspu`, | ||||
| 	`secondlife`, | ||||
| 	`service`, | ||||
| 	`session`, | ||||
| 	`sftp`, | ||||
| 	`sgn`, | ||||
| 	`shttp`, | ||||
| 	`sieve`, | ||||
| 	`simpleledger`, | ||||
| 	`sip`, | ||||
| 	`sips`, | ||||
| 	`skype`, | ||||
| 	`smb`, | ||||
| 	`sms`, | ||||
| 	`smtp`, | ||||
| 	`snews`, | ||||
| 	`snmp`, | ||||
| 	`soap.beep`, | ||||
| 	`soap.beeps`, | ||||
| 	`soldat`, | ||||
| 	`spiffe`, | ||||
| 	`spotify`, | ||||
| 	`ssh`, | ||||
| 	`steam`, | ||||
| 	`stun`, | ||||
| 	`stuns`, | ||||
| 	`submit`, | ||||
| 	`svn`, | ||||
| 	`tag`, | ||||
| 	`teamspeak`, | ||||
| 	`tel`, | ||||
| 	`teliaeid`, | ||||
| 	`telnet`, | ||||
| 	`tftp`, | ||||
| 	`things`, | ||||
| 	`thismessage`, | ||||
| 	`tip`, | ||||
| 	`tn3270`, | ||||
| 	`tool`, | ||||
| 	`turn`, | ||||
| 	`turns`, | ||||
| 	`tv`, | ||||
| 	`udp`, | ||||
| 	`unreal`, | ||||
| 	`urn`, | ||||
| 	`ut2004`, | ||||
| 	`v-event`, | ||||
| 	`vemmi`, | ||||
| 	`ventrilo`, | ||||
| 	`videotex`, | ||||
| 	`vnc`, | ||||
| 	`view-source`, | ||||
| 	`wais`, | ||||
| 	`webcal`, | ||||
| 	`wpid`, | ||||
| 	`ws`, | ||||
| 	`wss`, | ||||
| 	`wtai`, | ||||
| 	`wyciwyg`, | ||||
| 	`xcon`, | ||||
| 	`xcon-userid`, | ||||
| 	`xfire`, | ||||
| 	`xmlrpc.beep`, | ||||
| 	`xmlrpc.beeps`, | ||||
| 	`xmpp`, | ||||
| 	`xri`, | ||||
| 	`ymsgr`, | ||||
| 	`z39.50`, | ||||
| 	`z39.50r`, | ||||
| 	`z39.50s`, | ||||
| } | ||||
							
								
								
									
										1557
									
								
								vendor/github.com/mvdan/xurls/tlds.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1557
									
								
								vendor/github.com/mvdan/xurls/tlds.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										24
									
								
								vendor/github.com/mvdan/xurls/tlds_pseudo.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								vendor/github.com/mvdan/xurls/tlds_pseudo.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | ||||
| // Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc> | ||||
| // See LICENSE for licensing information | ||||
|  | ||||
| package xurls | ||||
|  | ||||
| // PseudoTLDs is a sorted list of some widely used unofficial TLDs. | ||||
| // | ||||
| // Sources: | ||||
| //  * https://en.wikipedia.org/wiki/Pseudo-top-level_domain | ||||
| //  * https://en.wikipedia.org/wiki/Category:Pseudo-top-level_domains | ||||
| //  * https://tools.ietf.org/html/draft-grothoff-iesg-special-use-p2p-names-00 | ||||
| //  * https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml | ||||
| var PseudoTLDs = []string{ | ||||
| 	`bit`,       // Namecoin | ||||
| 	`example`,   // Example domain | ||||
| 	`exit`,      // Tor exit node | ||||
| 	`gnu`,       // GNS by public key | ||||
| 	`i2p`,       // I2P network | ||||
| 	`invalid`,   // Invalid domain | ||||
| 	`local`,     // Local network | ||||
| 	`localhost`, // Local network | ||||
| 	`test`,      // Test domain | ||||
| 	`zkey`,      // GNS domain name | ||||
| } | ||||
							
								
								
									
										107
									
								
								vendor/github.com/mvdan/xurls/xurls.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								vendor/github.com/mvdan/xurls/xurls.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | ||||
| // Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc> | ||||
| // See LICENSE for licensing information | ||||
|  | ||||
| // Package xurls extracts urls from plain text using regular expressions. | ||||
| package xurls | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"regexp" | ||||
| ) | ||||
|  | ||||
| //go:generate go run generate/tldsgen/main.go | ||||
| //go:generate go run generate/schemesgen/main.go | ||||
|  | ||||
| const ( | ||||
| 	letter    = `\p{L}` | ||||
| 	mark      = `\p{M}` | ||||
| 	number    = `\p{N}` | ||||
| 	iriChar   = letter + mark + number | ||||
| 	currency  = `\p{Sc}` | ||||
| 	otherSymb = `\p{So}` | ||||
| 	endChar   = iriChar + `/\-+_&~*%=#` + currency + otherSymb | ||||
| 	otherPunc = `\p{Po}` | ||||
| 	midChar   = endChar + `|` + otherPunc | ||||
| 	wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)` | ||||
| 	wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]` | ||||
| 	wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}` | ||||
| 	wellAll   = wellParen + `|` + wellBrack + `|` + wellBrace | ||||
| 	pathCont  = `([` + midChar + `]*(` + wellAll + `|[` + endChar + `])+)+` | ||||
|  | ||||
| 	iri      = `[` + iriChar + `]([` + iriChar + `\-]*[` + iriChar + `])?` | ||||
| 	domain   = `(` + iri + `\.)+` | ||||
| 	octet    = `(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])` | ||||
| 	ipv4Addr = `\b` + octet + `\.` + octet + `\.` + octet + `\.` + octet + `\b` | ||||
| 	ipv6Addr = `([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:[0-9a-fA-F]{0,4}|:[0-9a-fA-F]{1,4})?|(:[0-9a-fA-F]{1,4}){0,2})|(:[0-9a-fA-F]{1,4}){0,3})|(:[0-9a-fA-F]{1,4}){0,4})|:(:[0-9a-fA-F]{1,4}){0,5})((:[0-9a-fA-F]{1,4}){2}|:(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])(\.(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])){3})|(([0-9a-fA-F]{1,4}:){1,6}|:):[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){7}:` | ||||
| 	ipAddr   = `(` + ipv4Addr + `|` + ipv6Addr + `)` | ||||
| 	port     = `(:[0-9]*)?` | ||||
| ) | ||||
|  | ||||
| // AnyScheme can be passed to StrictMatchingScheme to match any possibly valid | ||||
| // scheme, and not just the known ones. | ||||
| var AnyScheme = `([a-zA-Z][a-zA-Z.\-+]*://|` + anyOf(SchemesNoAuthority...) + `:)` | ||||
|  | ||||
| // SchemesNoAuthority is a sorted list of some well-known url schemes that are | ||||
| // followed by ":" instead of "://". | ||||
| var SchemesNoAuthority = []string{ | ||||
| 	`bitcoin`, // Bitcoin | ||||
| 	`file`,    // Files | ||||
| 	`magnet`,  // Torrent magnets | ||||
| 	`mailto`,  // Mail | ||||
| 	`sms`,     // SMS | ||||
| 	`tel`,     // Telephone | ||||
| 	`xmpp`,    // XMPP | ||||
| } | ||||
|  | ||||
| func anyOf(strs ...string) string { | ||||
| 	var b bytes.Buffer | ||||
| 	b.WriteByte('(') | ||||
| 	for i, s := range strs { | ||||
| 		if i != 0 { | ||||
| 			b.WriteByte('|') | ||||
| 		} | ||||
| 		b.WriteString(regexp.QuoteMeta(s)) | ||||
| 	} | ||||
| 	b.WriteByte(')') | ||||
| 	return b.String() | ||||
| } | ||||
|  | ||||
| func strictExp() string { | ||||
| 	schemes := `(` + anyOf(Schemes...) + `://|` + anyOf(SchemesNoAuthority...) + `:)` | ||||
| 	return `(?i)` + schemes + `(?-i)` + pathCont | ||||
| } | ||||
|  | ||||
| func relaxedExp() string { | ||||
| 	site := domain + `(?i)` + anyOf(append(TLDs, PseudoTLDs...)...) + `(?-i)` | ||||
| 	hostName := `(` + site + `|` + ipAddr + `)` | ||||
| 	webURL := hostName + port + `(/|/` + pathCont + `?|\b|$)` | ||||
| 	return strictExp() + `|` + webURL | ||||
| } | ||||
|  | ||||
| // Strict produces a regexp that matches any URL with a scheme in either the | ||||
| // Schemes or SchemesNoAuthority lists. | ||||
| func Strict() *regexp.Regexp { | ||||
| 	re := regexp.MustCompile(strictExp()) | ||||
| 	re.Longest() | ||||
| 	return re | ||||
| } | ||||
|  | ||||
| // Relaxed produces a regexp that matches any URL matched by Strict, plus any | ||||
| // URL with no scheme. | ||||
| func Relaxed() *regexp.Regexp { | ||||
| 	re := regexp.MustCompile(relaxedExp()) | ||||
| 	re.Longest() | ||||
| 	return re | ||||
| } | ||||
|  | ||||
| // StrictMatchingScheme produces a regexp similar to Strict, but requiring that | ||||
| // the scheme match the given regular expression. See AnyScheme too. | ||||
| func StrictMatchingScheme(exp string) (*regexp.Regexp, error) { | ||||
| 	strictMatching := `(?i)(` + exp + `)(?-i)` + pathCont | ||||
| 	re, err := regexp.Compile(strictMatching) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	re.Longest() | ||||
| 	return re, nil | ||||
| } | ||||
		Reference in New Issue
	
	Block a user