mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-31 21:28:11 +09:00 
			
		
		
		
	Replace linkRegex with xurls library (#6261)
* Replace linkRegex with xurls library Rather than maintaining a complicated regex to match URLs for autolinking, gitea can use this existing go library that takes care of the matching with very little code change to gitea itself. After spending a while trying to find the perfect regex for all cases this library still works better as it is more flexible than a single regex ever will be. This will also fix the following issues: #5844 #3095 #3381 This passes all our current tests and I've added new ones mentioned in those issues as well. * Use xurls.StrictMatchingScheme instead of xurls.Strict This is much faster and we only care about https? links to preserve existing behavior.
This commit is contained in:
		
				
					committed by
					
						 techknowlogick
						techknowlogick
					
				
			
			
				
	
			
			
			
						parent
						
							01bd1fcd33
						
					
				
				
					commit
					f2de5dc8c8
				
			
							
								
								
									
										9
									
								
								Gopkg.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										9
									
								
								Gopkg.lock
									
									
									
										generated
									
									
									
								
							| @@ -725,6 +725,14 @@ | |||||||
|   pruneopts = "NUT" |   pruneopts = "NUT" | ||||||
|   revision = "02ccfbfaf0cc627aa3aec8ef7ed5cfeec5b43f63" |   revision = "02ccfbfaf0cc627aa3aec8ef7ed5cfeec5b43f63" | ||||||
|  |  | ||||||
|  | [[projects]] | ||||||
|  |   digest = "1:63953ffb90bbc880c612d576fcfd973a5904277d25ec9e2d8d5719bf67969662" | ||||||
|  |   name = "github.com/mvdan/xurls" | ||||||
|  |   packages = ["."] | ||||||
|  |   pruneopts = "NUT" | ||||||
|  |   revision = "e52e821cbfe8fe163ff6f8628ab5869b11fc05af" | ||||||
|  |   version = "v2.0.0" | ||||||
|  |  | ||||||
| [[projects]] | [[projects]] | ||||||
|   digest = "1:2be1d891535ce3d6d2a3db9087f07415e909744e9eff1a30f8f0b2519df60ae6" |   digest = "1:2be1d891535ce3d6d2a3db9087f07415e909744e9eff1a30f8f0b2519df60ae6" | ||||||
|   name = "github.com/nfnt/resize" |   name = "github.com/nfnt/resize" | ||||||
| @@ -1293,6 +1301,7 @@ | |||||||
|     "github.com/mcuadros/go-version", |     "github.com/mcuadros/go-version", | ||||||
|     "github.com/microcosm-cc/bluemonday", |     "github.com/microcosm-cc/bluemonday", | ||||||
|     "github.com/msteinert/pam", |     "github.com/msteinert/pam", | ||||||
|  |     "github.com/mvdan/xurls", | ||||||
|     "github.com/nfnt/resize", |     "github.com/nfnt/resize", | ||||||
|     "github.com/pquerna/otp", |     "github.com/pquerna/otp", | ||||||
|     "github.com/pquerna/otp/totp", |     "github.com/pquerna/otp/totp", | ||||||
|   | |||||||
| @@ -113,3 +113,7 @@ ignored = ["google.golang.org/appengine*"] | |||||||
| [[constraint]] | [[constraint]] | ||||||
|   name = "github.com/prometheus/client_golang" |   name = "github.com/prometheus/client_golang" | ||||||
|   version = "0.9.0" |   version = "0.9.0" | ||||||
|  |  | ||||||
|  | [[constraint]] | ||||||
|  |   name = "github.com/mvdan/xurls" | ||||||
|  |   version = "2.0.0" | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ import ( | |||||||
| 	"code.gitea.io/gitea/modules/util" | 	"code.gitea.io/gitea/modules/util" | ||||||
|  |  | ||||||
| 	"github.com/Unknwon/com" | 	"github.com/Unknwon/com" | ||||||
|  | 	"github.com/mvdan/xurls" | ||||||
| 	"golang.org/x/net/html" | 	"golang.org/x/net/html" | ||||||
| 	"golang.org/x/net/html/atom" | 	"golang.org/x/net/html/atom" | ||||||
| ) | ) | ||||||
| @@ -64,9 +65,7 @@ var ( | |||||||
| 	//   https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) | 	//   https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) | ||||||
| 	emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*") | 	emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*") | ||||||
|  |  | ||||||
| 	// matches http/https links. used for autlinking those. partly modified from | 	linkRegex, _ = xurls.StrictMatchingScheme("https?://") | ||||||
| 	// the original present in autolink.js |  | ||||||
| 	linkRegex = regexp.MustCompile(`(?:(?:http|https):\/\/(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(?:\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-]*)?\??(?:[\-\+:=&;%@\.\w]*)#?(?:[\.\!\/\\\w]*))?`) |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // regexp for full links to issues/pulls | // regexp for full links to issues/pulls | ||||||
|   | |||||||
| @@ -104,6 +104,15 @@ func TestRender_links(t *testing.T) { | |||||||
| 	test( | 	test( | ||||||
| 		"http://142.42.1.1/", | 		"http://142.42.1.1/", | ||||||
| 		`<p><a href="http://142.42.1.1/" rel="nofollow">http://142.42.1.1/</a></p>`) | 		`<p><a href="http://142.42.1.1/" rel="nofollow">http://142.42.1.1/</a></p>`) | ||||||
|  | 	test( | ||||||
|  | 		"https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd", | ||||||
|  | 		`<p><a href="https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd" rel="nofollow">https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd</a></p>`) | ||||||
|  | 	test( | ||||||
|  | 		"https://en.wikipedia.org/wiki/URL_(disambiguation)", | ||||||
|  | 		`<p><a href="https://en.wikipedia.org/wiki/URL_(disambiguation)" rel="nofollow">https://en.wikipedia.org/wiki/URL_(disambiguation)</a></p>`) | ||||||
|  | 	test( | ||||||
|  | 		"https://foo_bar.example.com/", | ||||||
|  | 		`<p><a href="https://foo_bar.example.com/" rel="nofollow">https://foo_bar.example.com/</a></p>`) | ||||||
|  |  | ||||||
| 	// Test that should *not* be turned into URL | 	// Test that should *not* be turned into URL | ||||||
| 	test( | 	test( | ||||||
|   | |||||||
							
								
								
									
										27
									
								
								vendor/github.com/mvdan/xurls/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								vendor/github.com/mvdan/xurls/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | |||||||
|  | Copyright (c) 2015, Daniel Martí. All rights reserved. | ||||||
|  |  | ||||||
|  | Redistribution and use in source and binary forms, with or without | ||||||
|  | modification, are permitted provided that the following conditions are | ||||||
|  | met: | ||||||
|  |  | ||||||
|  |    * Redistributions of source code must retain the above copyright | ||||||
|  | notice, this list of conditions and the following disclaimer. | ||||||
|  |    * Redistributions in binary form must reproduce the above | ||||||
|  | copyright notice, this list of conditions and the following disclaimer | ||||||
|  | in the documentation and/or other materials provided with the | ||||||
|  | distribution. | ||||||
|  |    * Neither the name of the copyright holder nor the names of its | ||||||
|  | contributors may be used to endorse or promote products derived from | ||||||
|  | this software without specific prior written permission. | ||||||
|  |  | ||||||
|  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
							
								
								
									
										299
									
								
								vendor/github.com/mvdan/xurls/schemes.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										299
									
								
								vendor/github.com/mvdan/xurls/schemes.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,299 @@ | |||||||
|  | // Generated by schemesgen | ||||||
|  |  | ||||||
|  | package xurls | ||||||
|  |  | ||||||
|  | // Schemes is a sorted list of all IANA assigned schemes. | ||||||
|  | // | ||||||
|  | // Source: | ||||||
|  | //   https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv | ||||||
|  | var Schemes = []string{ | ||||||
|  | 	`aaa`, | ||||||
|  | 	`aaas`, | ||||||
|  | 	`about`, | ||||||
|  | 	`acap`, | ||||||
|  | 	`acct`, | ||||||
|  | 	`acr`, | ||||||
|  | 	`adiumxtra`, | ||||||
|  | 	`afp`, | ||||||
|  | 	`afs`, | ||||||
|  | 	`aim`, | ||||||
|  | 	`appdata`, | ||||||
|  | 	`apt`, | ||||||
|  | 	`attachment`, | ||||||
|  | 	`aw`, | ||||||
|  | 	`barion`, | ||||||
|  | 	`beshare`, | ||||||
|  | 	`bitcoin`, | ||||||
|  | 	`bitcoincash`, | ||||||
|  | 	`blob`, | ||||||
|  | 	`bolo`, | ||||||
|  | 	`browserext`, | ||||||
|  | 	`callto`, | ||||||
|  | 	`cap`, | ||||||
|  | 	`chrome`, | ||||||
|  | 	`chrome-extension`, | ||||||
|  | 	`cid`, | ||||||
|  | 	`coap`, | ||||||
|  | 	`coap+tcp`, | ||||||
|  | 	`coap+ws`, | ||||||
|  | 	`coaps`, | ||||||
|  | 	`coaps+tcp`, | ||||||
|  | 	`coaps+ws`, | ||||||
|  | 	`com-eventbrite-attendee`, | ||||||
|  | 	`content`, | ||||||
|  | 	`conti`, | ||||||
|  | 	`crid`, | ||||||
|  | 	`cvs`, | ||||||
|  | 	`data`, | ||||||
|  | 	`dav`, | ||||||
|  | 	`diaspora`, | ||||||
|  | 	`dict`, | ||||||
|  | 	`did`, | ||||||
|  | 	`dis`, | ||||||
|  | 	`dlna-playcontainer`, | ||||||
|  | 	`dlna-playsingle`, | ||||||
|  | 	`dns`, | ||||||
|  | 	`dntp`, | ||||||
|  | 	`dtn`, | ||||||
|  | 	`dvb`, | ||||||
|  | 	`ed2k`, | ||||||
|  | 	`elsi`, | ||||||
|  | 	`example`, | ||||||
|  | 	`facetime`, | ||||||
|  | 	`fax`, | ||||||
|  | 	`feed`, | ||||||
|  | 	`feedready`, | ||||||
|  | 	`file`, | ||||||
|  | 	`filesystem`, | ||||||
|  | 	`finger`, | ||||||
|  | 	`fish`, | ||||||
|  | 	`ftp`, | ||||||
|  | 	`geo`, | ||||||
|  | 	`gg`, | ||||||
|  | 	`git`, | ||||||
|  | 	`gizmoproject`, | ||||||
|  | 	`go`, | ||||||
|  | 	`gopher`, | ||||||
|  | 	`graph`, | ||||||
|  | 	`gtalk`, | ||||||
|  | 	`h323`, | ||||||
|  | 	`ham`, | ||||||
|  | 	`hcap`, | ||||||
|  | 	`hcp`, | ||||||
|  | 	`http`, | ||||||
|  | 	`https`, | ||||||
|  | 	`hxxp`, | ||||||
|  | 	`hxxps`, | ||||||
|  | 	`hydrazone`, | ||||||
|  | 	`iax`, | ||||||
|  | 	`icap`, | ||||||
|  | 	`icon`, | ||||||
|  | 	`im`, | ||||||
|  | 	`imap`, | ||||||
|  | 	`info`, | ||||||
|  | 	`iotdisco`, | ||||||
|  | 	`ipn`, | ||||||
|  | 	`ipp`, | ||||||
|  | 	`ipps`, | ||||||
|  | 	`irc`, | ||||||
|  | 	`irc6`, | ||||||
|  | 	`ircs`, | ||||||
|  | 	`iris`, | ||||||
|  | 	`iris.beep`, | ||||||
|  | 	`iris.lwz`, | ||||||
|  | 	`iris.xpc`, | ||||||
|  | 	`iris.xpcs`, | ||||||
|  | 	`isostore`, | ||||||
|  | 	`itms`, | ||||||
|  | 	`jabber`, | ||||||
|  | 	`jar`, | ||||||
|  | 	`jms`, | ||||||
|  | 	`keyparc`, | ||||||
|  | 	`lastfm`, | ||||||
|  | 	`ldap`, | ||||||
|  | 	`ldaps`, | ||||||
|  | 	`lvlt`, | ||||||
|  | 	`magnet`, | ||||||
|  | 	`mailserver`, | ||||||
|  | 	`mailto`, | ||||||
|  | 	`maps`, | ||||||
|  | 	`market`, | ||||||
|  | 	`message`, | ||||||
|  | 	`microsoft.windows.camera`, | ||||||
|  | 	`microsoft.windows.camera.multipicker`, | ||||||
|  | 	`microsoft.windows.camera.picker`, | ||||||
|  | 	`mid`, | ||||||
|  | 	`mms`, | ||||||
|  | 	`modem`, | ||||||
|  | 	`mongodb`, | ||||||
|  | 	`moz`, | ||||||
|  | 	`ms-access`, | ||||||
|  | 	`ms-browser-extension`, | ||||||
|  | 	`ms-drive-to`, | ||||||
|  | 	`ms-enrollment`, | ||||||
|  | 	`ms-excel`, | ||||||
|  | 	`ms-gamebarservices`, | ||||||
|  | 	`ms-gamingoverlay`, | ||||||
|  | 	`ms-getoffice`, | ||||||
|  | 	`ms-help`, | ||||||
|  | 	`ms-infopath`, | ||||||
|  | 	`ms-inputapp`, | ||||||
|  | 	`ms-lockscreencomponent-config`, | ||||||
|  | 	`ms-media-stream-id`, | ||||||
|  | 	`ms-mixedrealitycapture`, | ||||||
|  | 	`ms-officeapp`, | ||||||
|  | 	`ms-people`, | ||||||
|  | 	`ms-project`, | ||||||
|  | 	`ms-powerpoint`, | ||||||
|  | 	`ms-publisher`, | ||||||
|  | 	`ms-restoretabcompanion`, | ||||||
|  | 	`ms-screenclip`, | ||||||
|  | 	`ms-screensketch`, | ||||||
|  | 	`ms-search`, | ||||||
|  | 	`ms-search-repair`, | ||||||
|  | 	`ms-secondary-screen-controller`, | ||||||
|  | 	`ms-secondary-screen-setup`, | ||||||
|  | 	`ms-settings`, | ||||||
|  | 	`ms-settings-airplanemode`, | ||||||
|  | 	`ms-settings-bluetooth`, | ||||||
|  | 	`ms-settings-camera`, | ||||||
|  | 	`ms-settings-cellular`, | ||||||
|  | 	`ms-settings-cloudstorage`, | ||||||
|  | 	`ms-settings-connectabledevices`, | ||||||
|  | 	`ms-settings-displays-topology`, | ||||||
|  | 	`ms-settings-emailandaccounts`, | ||||||
|  | 	`ms-settings-language`, | ||||||
|  | 	`ms-settings-location`, | ||||||
|  | 	`ms-settings-lock`, | ||||||
|  | 	`ms-settings-nfctransactions`, | ||||||
|  | 	`ms-settings-notifications`, | ||||||
|  | 	`ms-settings-power`, | ||||||
|  | 	`ms-settings-privacy`, | ||||||
|  | 	`ms-settings-proximity`, | ||||||
|  | 	`ms-settings-screenrotation`, | ||||||
|  | 	`ms-settings-wifi`, | ||||||
|  | 	`ms-settings-workplace`, | ||||||
|  | 	`ms-spd`, | ||||||
|  | 	`ms-sttoverlay`, | ||||||
|  | 	`ms-transit-to`, | ||||||
|  | 	`ms-useractivityset`, | ||||||
|  | 	`ms-virtualtouchpad`, | ||||||
|  | 	`ms-visio`, | ||||||
|  | 	`ms-walk-to`, | ||||||
|  | 	`ms-whiteboard`, | ||||||
|  | 	`ms-whiteboard-cmd`, | ||||||
|  | 	`ms-word`, | ||||||
|  | 	`msnim`, | ||||||
|  | 	`msrp`, | ||||||
|  | 	`msrps`, | ||||||
|  | 	`mtqp`, | ||||||
|  | 	`mumble`, | ||||||
|  | 	`mupdate`, | ||||||
|  | 	`mvn`, | ||||||
|  | 	`news`, | ||||||
|  | 	`nfs`, | ||||||
|  | 	`ni`, | ||||||
|  | 	`nih`, | ||||||
|  | 	`nntp`, | ||||||
|  | 	`notes`, | ||||||
|  | 	`ocf`, | ||||||
|  | 	`oid`, | ||||||
|  | 	`onenote`, | ||||||
|  | 	`onenote-cmd`, | ||||||
|  | 	`opaquelocktoken`, | ||||||
|  | 	`openpgp4fpr`, | ||||||
|  | 	`pack`, | ||||||
|  | 	`palm`, | ||||||
|  | 	`paparazzi`, | ||||||
|  | 	`pkcs11`, | ||||||
|  | 	`platform`, | ||||||
|  | 	`pop`, | ||||||
|  | 	`pres`, | ||||||
|  | 	`prospero`, | ||||||
|  | 	`proxy`, | ||||||
|  | 	`pwid`, | ||||||
|  | 	`psyc`, | ||||||
|  | 	`qb`, | ||||||
|  | 	`query`, | ||||||
|  | 	`redis`, | ||||||
|  | 	`rediss`, | ||||||
|  | 	`reload`, | ||||||
|  | 	`res`, | ||||||
|  | 	`resource`, | ||||||
|  | 	`rmi`, | ||||||
|  | 	`rsync`, | ||||||
|  | 	`rtmfp`, | ||||||
|  | 	`rtmp`, | ||||||
|  | 	`rtsp`, | ||||||
|  | 	`rtsps`, | ||||||
|  | 	`rtspu`, | ||||||
|  | 	`secondlife`, | ||||||
|  | 	`service`, | ||||||
|  | 	`session`, | ||||||
|  | 	`sftp`, | ||||||
|  | 	`sgn`, | ||||||
|  | 	`shttp`, | ||||||
|  | 	`sieve`, | ||||||
|  | 	`simpleledger`, | ||||||
|  | 	`sip`, | ||||||
|  | 	`sips`, | ||||||
|  | 	`skype`, | ||||||
|  | 	`smb`, | ||||||
|  | 	`sms`, | ||||||
|  | 	`smtp`, | ||||||
|  | 	`snews`, | ||||||
|  | 	`snmp`, | ||||||
|  | 	`soap.beep`, | ||||||
|  | 	`soap.beeps`, | ||||||
|  | 	`soldat`, | ||||||
|  | 	`spiffe`, | ||||||
|  | 	`spotify`, | ||||||
|  | 	`ssh`, | ||||||
|  | 	`steam`, | ||||||
|  | 	`stun`, | ||||||
|  | 	`stuns`, | ||||||
|  | 	`submit`, | ||||||
|  | 	`svn`, | ||||||
|  | 	`tag`, | ||||||
|  | 	`teamspeak`, | ||||||
|  | 	`tel`, | ||||||
|  | 	`teliaeid`, | ||||||
|  | 	`telnet`, | ||||||
|  | 	`tftp`, | ||||||
|  | 	`things`, | ||||||
|  | 	`thismessage`, | ||||||
|  | 	`tip`, | ||||||
|  | 	`tn3270`, | ||||||
|  | 	`tool`, | ||||||
|  | 	`turn`, | ||||||
|  | 	`turns`, | ||||||
|  | 	`tv`, | ||||||
|  | 	`udp`, | ||||||
|  | 	`unreal`, | ||||||
|  | 	`urn`, | ||||||
|  | 	`ut2004`, | ||||||
|  | 	`v-event`, | ||||||
|  | 	`vemmi`, | ||||||
|  | 	`ventrilo`, | ||||||
|  | 	`videotex`, | ||||||
|  | 	`vnc`, | ||||||
|  | 	`view-source`, | ||||||
|  | 	`wais`, | ||||||
|  | 	`webcal`, | ||||||
|  | 	`wpid`, | ||||||
|  | 	`ws`, | ||||||
|  | 	`wss`, | ||||||
|  | 	`wtai`, | ||||||
|  | 	`wyciwyg`, | ||||||
|  | 	`xcon`, | ||||||
|  | 	`xcon-userid`, | ||||||
|  | 	`xfire`, | ||||||
|  | 	`xmlrpc.beep`, | ||||||
|  | 	`xmlrpc.beeps`, | ||||||
|  | 	`xmpp`, | ||||||
|  | 	`xri`, | ||||||
|  | 	`ymsgr`, | ||||||
|  | 	`z39.50`, | ||||||
|  | 	`z39.50r`, | ||||||
|  | 	`z39.50s`, | ||||||
|  | } | ||||||
							
								
								
									
										1557
									
								
								vendor/github.com/mvdan/xurls/tlds.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1557
									
								
								vendor/github.com/mvdan/xurls/tlds.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										24
									
								
								vendor/github.com/mvdan/xurls/tlds_pseudo.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								vendor/github.com/mvdan/xurls/tlds_pseudo.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | |||||||
|  | // Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc> | ||||||
|  | // See LICENSE for licensing information | ||||||
|  |  | ||||||
|  | package xurls | ||||||
|  |  | ||||||
|  | // PseudoTLDs is a sorted list of some widely used unofficial TLDs. | ||||||
|  | // | ||||||
|  | // Sources: | ||||||
|  | //  * https://en.wikipedia.org/wiki/Pseudo-top-level_domain | ||||||
|  | //  * https://en.wikipedia.org/wiki/Category:Pseudo-top-level_domains | ||||||
|  | //  * https://tools.ietf.org/html/draft-grothoff-iesg-special-use-p2p-names-00 | ||||||
|  | //  * https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml | ||||||
|  | var PseudoTLDs = []string{ | ||||||
|  | 	`bit`,       // Namecoin | ||||||
|  | 	`example`,   // Example domain | ||||||
|  | 	`exit`,      // Tor exit node | ||||||
|  | 	`gnu`,       // GNS by public key | ||||||
|  | 	`i2p`,       // I2P network | ||||||
|  | 	`invalid`,   // Invalid domain | ||||||
|  | 	`local`,     // Local network | ||||||
|  | 	`localhost`, // Local network | ||||||
|  | 	`test`,      // Test domain | ||||||
|  | 	`zkey`,      // GNS domain name | ||||||
|  | } | ||||||
							
								
								
									
										107
									
								
								vendor/github.com/mvdan/xurls/xurls.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								vendor/github.com/mvdan/xurls/xurls.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | |||||||
|  | // Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc> | ||||||
|  | // See LICENSE for licensing information | ||||||
|  |  | ||||||
|  | // Package xurls extracts urls from plain text using regular expressions. | ||||||
|  | package xurls | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"bytes" | ||||||
|  | 	"regexp" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | //go:generate go run generate/tldsgen/main.go | ||||||
|  | //go:generate go run generate/schemesgen/main.go | ||||||
|  |  | ||||||
|  | const ( | ||||||
|  | 	letter    = `\p{L}` | ||||||
|  | 	mark      = `\p{M}` | ||||||
|  | 	number    = `\p{N}` | ||||||
|  | 	iriChar   = letter + mark + number | ||||||
|  | 	currency  = `\p{Sc}` | ||||||
|  | 	otherSymb = `\p{So}` | ||||||
|  | 	endChar   = iriChar + `/\-+_&~*%=#` + currency + otherSymb | ||||||
|  | 	otherPunc = `\p{Po}` | ||||||
|  | 	midChar   = endChar + `|` + otherPunc | ||||||
|  | 	wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)` | ||||||
|  | 	wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]` | ||||||
|  | 	wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}` | ||||||
|  | 	wellAll   = wellParen + `|` + wellBrack + `|` + wellBrace | ||||||
|  | 	pathCont  = `([` + midChar + `]*(` + wellAll + `|[` + endChar + `])+)+` | ||||||
|  |  | ||||||
|  | 	iri      = `[` + iriChar + `]([` + iriChar + `\-]*[` + iriChar + `])?` | ||||||
|  | 	domain   = `(` + iri + `\.)+` | ||||||
|  | 	octet    = `(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])` | ||||||
|  | 	ipv4Addr = `\b` + octet + `\.` + octet + `\.` + octet + `\.` + octet + `\b` | ||||||
|  | 	ipv6Addr = `([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:[0-9a-fA-F]{0,4}|:[0-9a-fA-F]{1,4})?|(:[0-9a-fA-F]{1,4}){0,2})|(:[0-9a-fA-F]{1,4}){0,3})|(:[0-9a-fA-F]{1,4}){0,4})|:(:[0-9a-fA-F]{1,4}){0,5})((:[0-9a-fA-F]{1,4}){2}|:(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])(\.(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])){3})|(([0-9a-fA-F]{1,4}:){1,6}|:):[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){7}:` | ||||||
|  | 	ipAddr   = `(` + ipv4Addr + `|` + ipv6Addr + `)` | ||||||
|  | 	port     = `(:[0-9]*)?` | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // AnyScheme can be passed to StrictMatchingScheme to match any possibly valid | ||||||
|  | // scheme, and not just the known ones. | ||||||
|  | var AnyScheme = `([a-zA-Z][a-zA-Z.\-+]*://|` + anyOf(SchemesNoAuthority...) + `:)` | ||||||
|  |  | ||||||
|  | // SchemesNoAuthority is a sorted list of some well-known url schemes that are | ||||||
|  | // followed by ":" instead of "://". | ||||||
|  | var SchemesNoAuthority = []string{ | ||||||
|  | 	`bitcoin`, // Bitcoin | ||||||
|  | 	`file`,    // Files | ||||||
|  | 	`magnet`,  // Torrent magnets | ||||||
|  | 	`mailto`,  // Mail | ||||||
|  | 	`sms`,     // SMS | ||||||
|  | 	`tel`,     // Telephone | ||||||
|  | 	`xmpp`,    // XMPP | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func anyOf(strs ...string) string { | ||||||
|  | 	var b bytes.Buffer | ||||||
|  | 	b.WriteByte('(') | ||||||
|  | 	for i, s := range strs { | ||||||
|  | 		if i != 0 { | ||||||
|  | 			b.WriteByte('|') | ||||||
|  | 		} | ||||||
|  | 		b.WriteString(regexp.QuoteMeta(s)) | ||||||
|  | 	} | ||||||
|  | 	b.WriteByte(')') | ||||||
|  | 	return b.String() | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func strictExp() string { | ||||||
|  | 	schemes := `(` + anyOf(Schemes...) + `://|` + anyOf(SchemesNoAuthority...) + `:)` | ||||||
|  | 	return `(?i)` + schemes + `(?-i)` + pathCont | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func relaxedExp() string { | ||||||
|  | 	site := domain + `(?i)` + anyOf(append(TLDs, PseudoTLDs...)...) + `(?-i)` | ||||||
|  | 	hostName := `(` + site + `|` + ipAddr + `)` | ||||||
|  | 	webURL := hostName + port + `(/|/` + pathCont + `?|\b|$)` | ||||||
|  | 	return strictExp() + `|` + webURL | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Strict produces a regexp that matches any URL with a scheme in either the | ||||||
|  | // Schemes or SchemesNoAuthority lists. | ||||||
|  | func Strict() *regexp.Regexp { | ||||||
|  | 	re := regexp.MustCompile(strictExp()) | ||||||
|  | 	re.Longest() | ||||||
|  | 	return re | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Relaxed produces a regexp that matches any URL matched by Strict, plus any | ||||||
|  | // URL with no scheme. | ||||||
|  | func Relaxed() *regexp.Regexp { | ||||||
|  | 	re := regexp.MustCompile(relaxedExp()) | ||||||
|  | 	re.Longest() | ||||||
|  | 	return re | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // StrictMatchingScheme produces a regexp similar to Strict, but requiring that | ||||||
|  | // the scheme match the given regular expression. See AnyScheme too. | ||||||
|  | func StrictMatchingScheme(exp string) (*regexp.Regexp, error) { | ||||||
|  | 	strictMatching := `(?i)(` + exp + `)(?-i)` + pathCont | ||||||
|  | 	re, err := regexp.Compile(strictMatching) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  | 	re.Longest() | ||||||
|  | 	return re, nil | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user