mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-31 21:28:11 +09:00 
			
		
		
		
	Vendor Update Go Libs (#13444)
* denisenkom/go-mssqldb untagged -> v0.9.0 * github.com/editorconfig/editorconfig-core-go v2.3.7 -> v2.3.8 * github.com/go-testfixtures/testfixtures v3.4.0 -> v3.4.1 * github.com/mholt/archiver v3.3.2 -> v3.5.0 * github.com/olivere/elastic v7.0.20 -> v7.0.21 * github.com/urfave/cli v1.22.4 -> v1.22.5 * github.com/xanzy/go-gitlab v0.38.1 -> v0.39.0 * github.com/yuin/goldmark-meta untagged -> v1.0.0 * github.com/ethantkoenig/rupture 0a76f03a811a -> c3b3b810dc77 * github.com/jaytaylor/html2text 8fb95d837f7d -> 3577fbdbcff7 * github.com/kballard/go-shellquote cd60e84ee657 -> 95032a82bc51 * github.com/msteinert/pam 02ccfbfaf0cc -> 913b8f8cdf8b * github.com/unknwon/paginater 7748a72e0141 -> 042474bd0eae * CI.restart() Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
		
							
								
								
									
										12
									
								
								vendor/github.com/jaytaylor/html2text/.travis.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								vendor/github.com/jaytaylor/html2text/.travis.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,12 +1,12 @@ | ||||
| language: go | ||||
| go: | ||||
|   # n.b. For golang release history, see https://golang.org/doc/devel/release.html | ||||
|   - tip | ||||
|   - 1.7 | ||||
|   - 1.6 | ||||
|   - 1.5 | ||||
|   - 1.4 | ||||
|   - 1.3 | ||||
|   - 1.2 | ||||
|   - "1.13.8" | ||||
|   - "1.12.17" | ||||
|   - "1.11.13" | ||||
|   - "1.10.8" | ||||
|   - "1.9.7" | ||||
| notifications: | ||||
|   email: | ||||
|     on_success: change | ||||
|   | ||||
							
								
								
									
										92
									
								
								vendor/github.com/jaytaylor/html2text/README.md
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										92
									
								
								vendor/github.com/jaytaylor/html2text/README.md
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -4,11 +4,15 @@ | ||||
| [](https://travis-ci.org/jaytaylor/html2text) | ||||
| [](https://goreportcard.com/report/github.com/jaytaylor/html2text) | ||||
|  | ||||
| ### Converts HTML into text | ||||
| ### Converts HTML into text of the markdown-flavored variety | ||||
|  | ||||
|  | ||||
| ## Introduction | ||||
|  | ||||
| Ensure your emails are readable by all! | ||||
|  | ||||
| Turns HTML into raw text, useful for sending fancy HTML emails with an equivalently nicely formatted TXT document as a fallback (e.g. for people who don't allow HTML emails or have other display issues). | ||||
|  | ||||
| html2text is a simple golang package for rendering HTML into plaintext. | ||||
|  | ||||
| There are still lots of improvements to be had, but FWIW this has worked fine for my [basic] HTML-2-text needs. | ||||
| @@ -19,7 +23,7 @@ It requires go 1.x or newer ;) | ||||
| ## Download the package | ||||
|  | ||||
| ```bash | ||||
| go get github.com/jaytaylor/html2text | ||||
| go get jaytaylor.com/html2text | ||||
| ``` | ||||
|  | ||||
| ## Example usage | ||||
| @@ -30,39 +34,51 @@ package main | ||||
| import ( | ||||
| 	"fmt" | ||||
|  | ||||
| 	"github.com/jaytaylor/html2text" | ||||
| 	"jaytaylor.com/html2text" | ||||
| ) | ||||
|  | ||||
| func main() { | ||||
| 	inputHtml := ` | ||||
|           <html> | ||||
|             <head> | ||||
|               <title>My Mega Service</title> | ||||
|               <link rel=\"stylesheet\" href=\"main.css\"> | ||||
|               <style type=\"text/css\">body { color: #fff; }</style> | ||||
|             </head> | ||||
|          | ||||
|             <body> | ||||
|               <div class="logo"> | ||||
|                 <a href="http://mymegaservice.com/"><img src="/logo-image.jpg" alt="Mega Service"/></a> | ||||
|               </div> | ||||
|          | ||||
|               <h1>Welcome to your new account on my service!</h1> | ||||
|          | ||||
|               <p> | ||||
|                   Here is some more information: | ||||
|          | ||||
|                   <ul> | ||||
|                       <li>Link 1: <a href="https://example.com">Example.com</a></li> | ||||
|                       <li>Link 2: <a href="https://example2.com">Example2.com</a></li> | ||||
|                       <li>Something else</li> | ||||
|                   </ul> | ||||
|               </p> | ||||
|             </body> | ||||
|           </html> | ||||
| 	` | ||||
| 	inputHTML := ` | ||||
| <html> | ||||
|   <head> | ||||
|     <title>My Mega Service</title> | ||||
|     <link rel=\"stylesheet\" href=\"main.css\"> | ||||
|     <style type=\"text/css\">body { color: #fff; }</style> | ||||
|   </head> | ||||
|  | ||||
| 	text, err := html2text.FromString(inputHtml) | ||||
|   <body> | ||||
|     <div class="logo"> | ||||
|       <a href="http://jaytaylor.com/"><img src="/logo-image.jpg" alt="Mega Service"/></a> | ||||
|     </div> | ||||
|  | ||||
|     <h1>Welcome to your new account on my service!</h1> | ||||
|  | ||||
|     <p> | ||||
|       Here is some more information: | ||||
|  | ||||
|       <ul> | ||||
|         <li>Link 1: <a href="https://example.com">Example.com</a></li> | ||||
|         <li>Link 2: <a href="https://example2.com">Example2.com</a></li> | ||||
|         <li>Something else</li> | ||||
|       </ul> | ||||
|     </p> | ||||
|  | ||||
|     <table> | ||||
|       <thead> | ||||
|         <tr><th>Header 1</th><th>Header 2</th></tr> | ||||
|       </thead> | ||||
|       <tfoot> | ||||
|         <tr><td>Footer 1</td><td>Footer 2</td></tr> | ||||
|       </tfoot> | ||||
|       <tbody> | ||||
|         <tr><td>Row 1 Col 1</td><td>Row 1 Col 2</td></tr> | ||||
|         <tr><td>Row 2 Col 1</td><td>Row 2 Col 2</td></tr> | ||||
|       </tbody> | ||||
|     </table> | ||||
|   </body> | ||||
| </html>` | ||||
|  | ||||
| 	text, err := html2text.FromString(inputHTML, html2text.Options{PrettyTables: true}) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| @@ -72,7 +88,7 @@ func main() { | ||||
|  | ||||
| Output: | ||||
| ``` | ||||
| Mega Service ( http://mymegaservice.com/ ) | ||||
| Mega Service ( http://jaytaylor.com/ ) | ||||
|  | ||||
| ****************************************** | ||||
| Welcome to your new account on my service! | ||||
| @@ -83,6 +99,15 @@ Here is some more information: | ||||
| * Link 1: Example.com ( https://example.com ) | ||||
| * Link 2: Example2.com ( https://example2.com ) | ||||
| * Something else | ||||
|  | ||||
| +-------------+-------------+ | ||||
| |  HEADER 1   |  HEADER 2   | | ||||
| +-------------+-------------+ | ||||
| | Row 1 Col 1 | Row 1 Col 2 | | ||||
| | Row 2 Col 1 | Row 2 Col 2 | | ||||
| +-------------+-------------+ | ||||
| |  FOOTER 1   |  FOOTER 2   | | ||||
| +-------------+-------------+ | ||||
| ``` | ||||
|  | ||||
|  | ||||
| @@ -110,3 +135,6 @@ Email: jay at (my github username).com | ||||
|  | ||||
| Twitter: [@jtaylor](https://twitter.com/jtaylor) | ||||
|  | ||||
| # Alternatives | ||||
|  | ||||
| https://github.com/k3a/html2text - Lightweight | ||||
|   | ||||
							
								
								
									
										615
									
								
								vendor/github.com/jaytaylor/html2text/html2text.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										615
									
								
								vendor/github.com/jaytaylor/html2text/html2text.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -7,174 +7,408 @@ import ( | ||||
| 	"strings" | ||||
| 	"unicode" | ||||
|  | ||||
| 	"github.com/olekukonko/tablewriter" | ||||
| 	"github.com/ssor/bom" | ||||
| 	"golang.org/x/net/html" | ||||
| 	"golang.org/x/net/html/atom" | ||||
| ) | ||||
|  | ||||
| // Options provide toggles and overrides to control specific rendering behaviors. | ||||
| type Options struct { | ||||
| 	PrettyTables        bool                 // Turns on pretty ASCII rendering for table elements. | ||||
| 	PrettyTablesOptions *PrettyTablesOptions // Configures pretty ASCII rendering for table elements. | ||||
| 	OmitLinks           bool                 // Turns on omitting links | ||||
| } | ||||
|  | ||||
| // PrettyTablesOptions overrides tablewriter behaviors | ||||
| type PrettyTablesOptions struct { | ||||
| 	AutoFormatHeader     bool | ||||
| 	AutoWrapText         bool | ||||
| 	ReflowDuringAutoWrap bool | ||||
| 	ColWidth             int | ||||
| 	ColumnSeparator      string | ||||
| 	RowSeparator         string | ||||
| 	CenterSeparator      string | ||||
| 	HeaderAlignment      int | ||||
| 	FooterAlignment      int | ||||
| 	Alignment            int | ||||
| 	ColumnAlignment      []int | ||||
| 	NewLine              string | ||||
| 	HeaderLine           bool | ||||
| 	RowLine              bool | ||||
| 	AutoMergeCells       bool | ||||
| 	Borders              tablewriter.Border | ||||
| } | ||||
|  | ||||
| // NewPrettyTablesOptions creates PrettyTablesOptions with default settings | ||||
| func NewPrettyTablesOptions() *PrettyTablesOptions { | ||||
| 	return &PrettyTablesOptions{ | ||||
| 		AutoFormatHeader:     true, | ||||
| 		AutoWrapText:         true, | ||||
| 		ReflowDuringAutoWrap: true, | ||||
| 		ColWidth:             tablewriter.MAX_ROW_WIDTH, | ||||
| 		ColumnSeparator:      tablewriter.COLUMN, | ||||
| 		RowSeparator:         tablewriter.ROW, | ||||
| 		CenterSeparator:      tablewriter.CENTER, | ||||
| 		HeaderAlignment:      tablewriter.ALIGN_DEFAULT, | ||||
| 		FooterAlignment:      tablewriter.ALIGN_DEFAULT, | ||||
| 		Alignment:            tablewriter.ALIGN_DEFAULT, | ||||
| 		ColumnAlignment:      []int{}, | ||||
| 		NewLine:              tablewriter.NEWLINE, | ||||
| 		HeaderLine:           true, | ||||
| 		RowLine:              false, | ||||
| 		AutoMergeCells:       false, | ||||
| 		Borders:              tablewriter.Border{Left: true, Right: true, Bottom: true, Top: true}, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // FromHTMLNode renders text output from a pre-parsed HTML document. | ||||
| func FromHTMLNode(doc *html.Node, o ...Options) (string, error) { | ||||
| 	var options Options | ||||
| 	if len(o) > 0 { | ||||
| 		options = o[0] | ||||
| 	} | ||||
|  | ||||
| 	ctx := textifyTraverseContext{ | ||||
| 		buf:     bytes.Buffer{}, | ||||
| 		options: options, | ||||
| 	} | ||||
| 	if err := ctx.traverse(doc); err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
|  | ||||
| 	text := strings.TrimSpace(newlineRe.ReplaceAllString( | ||||
| 		strings.Replace(ctx.buf.String(), "\n ", "\n", -1), "\n\n"), | ||||
| 	) | ||||
| 	return text, nil | ||||
| } | ||||
|  | ||||
| // FromReader renders text output after parsing HTML for the specified | ||||
| // io.Reader. | ||||
| func FromReader(reader io.Reader, options ...Options) (string, error) { | ||||
| 	newReader, err := bom.NewReaderWithoutBom(reader) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	doc, err := html.Parse(newReader) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	return FromHTMLNode(doc, options...) | ||||
| } | ||||
|  | ||||
| // FromString parses HTML from the input string, then renders the text form. | ||||
| func FromString(input string, options ...Options) (string, error) { | ||||
| 	bs := bom.CleanBom([]byte(input)) | ||||
| 	text, err := FromReader(bytes.NewReader(bs), options...) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	return text, nil | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	spacingRe = regexp.MustCompile(`[ \r\n\t]+`) | ||||
| 	newlineRe = regexp.MustCompile(`\n\n+`) | ||||
| ) | ||||
|  | ||||
| type textifyTraverseCtx struct { | ||||
| 	Buf bytes.Buffer | ||||
| // traverseTableCtx holds text-related context. | ||||
| type textifyTraverseContext struct { | ||||
| 	buf bytes.Buffer | ||||
|  | ||||
| 	prefix          string | ||||
| 	tableCtx        tableTraverseContext | ||||
| 	options         Options | ||||
| 	endsWithSpace   bool | ||||
| 	justClosedDiv   bool | ||||
| 	blockquoteLevel int | ||||
| 	lineLength      int | ||||
| 	endsWithSpace   bool | ||||
| 	endsWithNewline bool | ||||
| 	justClosedDiv   bool | ||||
| 	isPre           bool | ||||
| } | ||||
|  | ||||
| func (ctx *textifyTraverseCtx) traverse(node *html.Node) error { | ||||
| 	switch node.Type { | ||||
| // tableTraverseContext holds table ASCII-form related context. | ||||
| type tableTraverseContext struct { | ||||
| 	header     []string | ||||
| 	body       [][]string | ||||
| 	footer     []string | ||||
| 	tmpRow     int | ||||
| 	isInFooter bool | ||||
| } | ||||
|  | ||||
| func (tableCtx *tableTraverseContext) init() { | ||||
| 	tableCtx.body = [][]string{} | ||||
| 	tableCtx.header = []string{} | ||||
| 	tableCtx.footer = []string{} | ||||
| 	tableCtx.isInFooter = false | ||||
| 	tableCtx.tmpRow = 0 | ||||
| } | ||||
|  | ||||
| func (ctx *textifyTraverseContext) handleElement(node *html.Node) error { | ||||
| 	ctx.justClosedDiv = false | ||||
|  | ||||
| 	switch node.DataAtom { | ||||
| 	case atom.Br: | ||||
| 		return ctx.emit("\n") | ||||
|  | ||||
| 	case atom.H1, atom.H2, atom.H3: | ||||
| 		subCtx := textifyTraverseContext{} | ||||
| 		if err := subCtx.traverseChildren(node); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		str := subCtx.buf.String() | ||||
| 		dividerLen := 0 | ||||
| 		for _, line := range strings.Split(str, "\n") { | ||||
| 			if lineLen := len([]rune(line)); lineLen-1 > dividerLen { | ||||
| 				dividerLen = lineLen - 1 | ||||
| 			} | ||||
| 		} | ||||
| 		var divider string | ||||
| 		if node.DataAtom == atom.H1 { | ||||
| 			divider = strings.Repeat("*", dividerLen) | ||||
| 		} else { | ||||
| 			divider = strings.Repeat("-", dividerLen) | ||||
| 		} | ||||
|  | ||||
| 		if node.DataAtom == atom.H3 { | ||||
| 			return ctx.emit("\n\n" + str + "\n" + divider + "\n\n") | ||||
| 		} | ||||
| 		return ctx.emit("\n\n" + divider + "\n" + str + "\n" + divider + "\n\n") | ||||
|  | ||||
| 	case atom.Blockquote: | ||||
| 		ctx.blockquoteLevel++ | ||||
| 		ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel) + " " | ||||
| 		if err := ctx.emit("\n"); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		if ctx.blockquoteLevel == 1 { | ||||
| 			if err := ctx.emit("\n"); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 		if err := ctx.traverseChildren(node); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		ctx.blockquoteLevel-- | ||||
| 		ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel) | ||||
| 		if ctx.blockquoteLevel > 0 { | ||||
| 			ctx.prefix += " " | ||||
| 		} | ||||
| 		return ctx.emit("\n\n") | ||||
|  | ||||
| 	case atom.Div: | ||||
| 		if ctx.lineLength > 0 { | ||||
| 			if err := ctx.emit("\n"); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 		if err := ctx.traverseChildren(node); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		var err error | ||||
| 		if !ctx.justClosedDiv { | ||||
| 			err = ctx.emit("\n") | ||||
| 		} | ||||
| 		ctx.justClosedDiv = true | ||||
| 		return err | ||||
|  | ||||
| 	case atom.Li: | ||||
| 		if err := ctx.emit("* "); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		if err := ctx.traverseChildren(node); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		return ctx.emit("\n") | ||||
|  | ||||
| 	case atom.B, atom.Strong: | ||||
| 		subCtx := textifyTraverseContext{} | ||||
| 		subCtx.endsWithSpace = true | ||||
| 		if err := subCtx.traverseChildren(node); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		str := subCtx.buf.String() | ||||
| 		return ctx.emit("*" + str + "*") | ||||
|  | ||||
| 	case atom.A: | ||||
| 		linkText := "" | ||||
| 		// For simple link element content with single text node only, peek at the link text. | ||||
| 		if node.FirstChild != nil && node.FirstChild.NextSibling == nil && node.FirstChild.Type == html.TextNode { | ||||
| 			linkText = node.FirstChild.Data | ||||
| 		} | ||||
|  | ||||
| 		// If image is the only child, take its alt text as the link text. | ||||
| 		if img := node.FirstChild; img != nil && node.LastChild == img && img.DataAtom == atom.Img { | ||||
| 			if altText := getAttrVal(img, "alt"); altText != "" { | ||||
| 				if err := ctx.emit(altText); err != nil { | ||||
| 					return err | ||||
| 				} | ||||
| 			} | ||||
| 		} else if err := ctx.traverseChildren(node); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		hrefLink := "" | ||||
| 		if attrVal := getAttrVal(node, "href"); attrVal != "" { | ||||
| 			attrVal = ctx.normalizeHrefLink(attrVal) | ||||
| 			// Don't print link href if it matches link element content or if the link is empty. | ||||
| 			if !ctx.options.OmitLinks && attrVal != "" && linkText != attrVal { | ||||
| 				hrefLink = "( " + attrVal + " )" | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		return ctx.emit(hrefLink) | ||||
|  | ||||
| 	case atom.P, atom.Ul: | ||||
| 		return ctx.paragraphHandler(node) | ||||
|  | ||||
| 	case atom.Table, atom.Tfoot, atom.Th, atom.Tr, atom.Td: | ||||
| 		if ctx.options.PrettyTables { | ||||
| 			return ctx.handleTableElement(node) | ||||
| 		} else if node.DataAtom == atom.Table { | ||||
| 			return ctx.paragraphHandler(node) | ||||
| 		} | ||||
| 		return ctx.traverseChildren(node) | ||||
|  | ||||
| 	case atom.Pre: | ||||
| 		ctx.isPre = true | ||||
| 		err := ctx.traverseChildren(node) | ||||
| 		ctx.isPre = false | ||||
| 		return err | ||||
|  | ||||
| 	case atom.Style, atom.Script, atom.Head: | ||||
| 		// Ignore the subtree. | ||||
| 		return nil | ||||
|  | ||||
| 	default: | ||||
| 		return ctx.traverseChildren(node) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // paragraphHandler renders node children surrounded by double newlines. | ||||
| func (ctx *textifyTraverseContext) paragraphHandler(node *html.Node) error { | ||||
| 	if err := ctx.emit("\n\n"); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := ctx.traverseChildren(node); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return ctx.emit("\n\n") | ||||
| } | ||||
|  | ||||
| // handleTableElement is only to be invoked when options.PrettyTables is active. | ||||
| func (ctx *textifyTraverseContext) handleTableElement(node *html.Node) error { | ||||
| 	if !ctx.options.PrettyTables { | ||||
| 		panic("handleTableElement invoked when PrettyTables not active") | ||||
| 	} | ||||
|  | ||||
| 	switch node.DataAtom { | ||||
| 	case atom.Table: | ||||
| 		if err := ctx.emit("\n\n"); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		// Re-intialize all table context. | ||||
| 		ctx.tableCtx.init() | ||||
|  | ||||
| 		// Browse children, enriching context with table data. | ||||
| 		if err := ctx.traverseChildren(node); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		buf := &bytes.Buffer{} | ||||
| 		table := tablewriter.NewWriter(buf) | ||||
| 		if ctx.options.PrettyTablesOptions != nil { | ||||
| 			options := ctx.options.PrettyTablesOptions | ||||
| 			table.SetAutoFormatHeaders(options.AutoFormatHeader) | ||||
| 			table.SetAutoWrapText(options.AutoWrapText) | ||||
| 			table.SetReflowDuringAutoWrap(options.ReflowDuringAutoWrap) | ||||
| 			table.SetColWidth(options.ColWidth) | ||||
| 			table.SetColumnSeparator(options.ColumnSeparator) | ||||
| 			table.SetRowSeparator(options.RowSeparator) | ||||
| 			table.SetCenterSeparator(options.CenterSeparator) | ||||
| 			table.SetHeaderAlignment(options.HeaderAlignment) | ||||
| 			table.SetFooterAlignment(options.FooterAlignment) | ||||
| 			table.SetAlignment(options.Alignment) | ||||
| 			table.SetColumnAlignment(options.ColumnAlignment) | ||||
| 			table.SetNewLine(options.NewLine) | ||||
| 			table.SetHeaderLine(options.HeaderLine) | ||||
| 			table.SetRowLine(options.RowLine) | ||||
| 			table.SetAutoMergeCells(options.AutoMergeCells) | ||||
| 			table.SetBorders(options.Borders) | ||||
| 		} | ||||
| 		table.SetHeader(ctx.tableCtx.header) | ||||
| 		table.SetFooter(ctx.tableCtx.footer) | ||||
| 		table.AppendBulk(ctx.tableCtx.body) | ||||
|  | ||||
| 		// Render the table using ASCII. | ||||
| 		table.Render() | ||||
| 		if err := ctx.emit(buf.String()); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		return ctx.emit("\n\n") | ||||
|  | ||||
| 	case atom.Tfoot: | ||||
| 		ctx.tableCtx.isInFooter = true | ||||
| 		if err := ctx.traverseChildren(node); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		ctx.tableCtx.isInFooter = false | ||||
|  | ||||
| 	case atom.Tr: | ||||
| 		ctx.tableCtx.body = append(ctx.tableCtx.body, []string{}) | ||||
| 		if err := ctx.traverseChildren(node); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		ctx.tableCtx.tmpRow++ | ||||
|  | ||||
| 	case atom.Th: | ||||
| 		res, err := ctx.renderEachChild(node) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		ctx.tableCtx.header = append(ctx.tableCtx.header, res) | ||||
|  | ||||
| 	case atom.Td: | ||||
| 		res, err := ctx.renderEachChild(node) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		if ctx.tableCtx.isInFooter { | ||||
| 			ctx.tableCtx.footer = append(ctx.tableCtx.footer, res) | ||||
| 		} else { | ||||
| 			ctx.tableCtx.body[ctx.tableCtx.tmpRow] = append(ctx.tableCtx.body[ctx.tableCtx.tmpRow], res) | ||||
| 		} | ||||
|  | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (ctx *textifyTraverseContext) traverse(node *html.Node) error { | ||||
| 	switch node.Type { | ||||
| 	default: | ||||
| 		return ctx.traverseChildren(node) | ||||
|  | ||||
| 	case html.TextNode: | ||||
| 		data := strings.Trim(spacingRe.ReplaceAllString(node.Data, " "), " ") | ||||
| 		var data string | ||||
| 		if ctx.isPre { | ||||
| 			data = node.Data | ||||
| 		} else { | ||||
| 			data = strings.TrimSpace(spacingRe.ReplaceAllString(node.Data, " ")) | ||||
| 		} | ||||
| 		return ctx.emit(data) | ||||
|  | ||||
| 	case html.ElementNode: | ||||
|  | ||||
| 		ctx.justClosedDiv = false | ||||
| 		switch node.DataAtom { | ||||
| 		case atom.Br: | ||||
| 			return ctx.emit("\n") | ||||
|  | ||||
| 		case atom.H1, atom.H2, atom.H3: | ||||
| 			subCtx := textifyTraverseCtx{} | ||||
| 			if err := subCtx.traverseChildren(node); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
|  | ||||
| 			str := subCtx.Buf.String() | ||||
| 			dividerLen := 0 | ||||
| 			for _, line := range strings.Split(str, "\n") { | ||||
| 				if lineLen := len([]rune(line)); lineLen-1 > dividerLen { | ||||
| 					dividerLen = lineLen - 1 | ||||
| 				} | ||||
| 			} | ||||
| 			divider := "" | ||||
| 			if node.DataAtom == atom.H1 { | ||||
| 				divider = strings.Repeat("*", dividerLen) | ||||
| 			} else { | ||||
| 				divider = strings.Repeat("-", dividerLen) | ||||
| 			} | ||||
|  | ||||
| 			if node.DataAtom == atom.H3 { | ||||
| 				return ctx.emit("\n\n" + str + "\n" + divider + "\n\n") | ||||
| 			} | ||||
| 			return ctx.emit("\n\n" + divider + "\n" + str + "\n" + divider + "\n\n") | ||||
|  | ||||
| 		case atom.Blockquote: | ||||
| 			ctx.blockquoteLevel++ | ||||
| 			ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel) + " " | ||||
| 			if err := ctx.emit("\n"); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			if ctx.blockquoteLevel == 1 { | ||||
| 				if err := ctx.emit("\n"); err != nil { | ||||
| 					return err | ||||
| 				} | ||||
| 			} | ||||
| 			if err := ctx.traverseChildren(node); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			ctx.blockquoteLevel-- | ||||
| 			ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel) | ||||
| 			if ctx.blockquoteLevel > 0 { | ||||
| 				ctx.prefix += " " | ||||
| 			} | ||||
| 			return ctx.emit("\n\n") | ||||
|  | ||||
| 		case atom.Div: | ||||
| 			if ctx.lineLength > 0 { | ||||
| 				if err := ctx.emit("\n"); err != nil { | ||||
| 					return err | ||||
| 				} | ||||
| 			} | ||||
| 			if err := ctx.traverseChildren(node); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			var err error | ||||
| 			if ctx.justClosedDiv == false { | ||||
| 				err = ctx.emit("\n") | ||||
| 			} | ||||
| 			ctx.justClosedDiv = true | ||||
| 			return err | ||||
|  | ||||
| 		case atom.Li: | ||||
| 			if err := ctx.emit("* "); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
|  | ||||
| 			if err := ctx.traverseChildren(node); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
|  | ||||
| 			return ctx.emit("\n") | ||||
|  | ||||
| 		case atom.B, atom.Strong: | ||||
| 			subCtx := textifyTraverseCtx{} | ||||
| 			subCtx.endsWithSpace = true | ||||
| 			if err := subCtx.traverseChildren(node); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			str := subCtx.Buf.String() | ||||
| 			return ctx.emit("*" + str + "*") | ||||
|  | ||||
| 		case atom.A: | ||||
| 			// If image is the only child, take its alt text as the link text | ||||
| 			if img := node.FirstChild; img != nil && node.LastChild == img && img.DataAtom == atom.Img { | ||||
| 				if altText := getAttrVal(img, "alt"); altText != "" { | ||||
| 					ctx.emit(altText) | ||||
| 				} | ||||
| 			} else if err := ctx.traverseChildren(node); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
|  | ||||
| 			hrefLink := "" | ||||
| 			if attrVal := getAttrVal(node, "href"); attrVal != "" { | ||||
| 				attrVal = ctx.normalizeHrefLink(attrVal) | ||||
| 				if attrVal != "" { | ||||
| 					hrefLink = "( " + attrVal + " )" | ||||
| 				} | ||||
| 			} | ||||
|  | ||||
| 			return ctx.emit(hrefLink) | ||||
|  | ||||
| 		case atom.P, atom.Ul, atom.Table: | ||||
| 			if err := ctx.emit("\n\n"); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
|  | ||||
| 			if err := ctx.traverseChildren(node); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
|  | ||||
| 			return ctx.emit("\n\n") | ||||
|  | ||||
| 		case atom.Tr: | ||||
| 			if err := ctx.traverseChildren(node); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
|  | ||||
| 			return ctx.emit("\n") | ||||
|  | ||||
| 		case atom.Style, atom.Script, atom.Head: | ||||
| 			// Ignore the subtree | ||||
| 			return nil | ||||
|  | ||||
| 		default: | ||||
| 			return ctx.traverseChildren(node) | ||||
| 		} | ||||
| 		return ctx.handleElement(node) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (ctx *textifyTraverseCtx) traverseChildren(node *html.Node) error { | ||||
| func (ctx *textifyTraverseContext) traverseChildren(node *html.Node) error { | ||||
| 	for c := node.FirstChild; c != nil; c = c.NextSibling { | ||||
| 		if err := ctx.traverse(c); err != nil { | ||||
| 			return err | ||||
| @@ -184,31 +418,33 @@ func (ctx *textifyTraverseCtx) traverseChildren(node *html.Node) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (ctx *textifyTraverseCtx) emit(data string) error { | ||||
| 	if len(data) == 0 { | ||||
| func (ctx *textifyTraverseContext) emit(data string) error { | ||||
| 	if data == "" { | ||||
| 		return nil | ||||
| 	} | ||||
| 	lines := ctx.breakLongLines(data) | ||||
| 	var err error | ||||
| 	var ( | ||||
| 		lines = ctx.breakLongLines(data) | ||||
| 		err   error | ||||
| 	) | ||||
| 	for _, line := range lines { | ||||
| 		runes := []rune(line) | ||||
| 		startsWithSpace := unicode.IsSpace(runes[0]) | ||||
| 		if !startsWithSpace && !ctx.endsWithSpace { | ||||
| 			ctx.Buf.WriteByte(' ') | ||||
| 		if !startsWithSpace && !ctx.endsWithSpace && !strings.HasPrefix(data, ".") { | ||||
| 			if err = ctx.buf.WriteByte(' '); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			ctx.lineLength++ | ||||
| 		} | ||||
| 		ctx.endsWithSpace = unicode.IsSpace(runes[len(runes)-1]) | ||||
| 		for _, c := range line { | ||||
| 			_, err = ctx.Buf.WriteString(string(c)) | ||||
| 			if err != nil { | ||||
| 			if _, err = ctx.buf.WriteString(string(c)); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			ctx.lineLength++ | ||||
| 			if c == '\n' { | ||||
| 				ctx.lineLength = 0 | ||||
| 				if ctx.prefix != "" { | ||||
| 					_, err = ctx.Buf.WriteString(ctx.prefix) | ||||
| 					if err != nil { | ||||
| 					if _, err = ctx.buf.WriteString(ctx.prefix); err != nil { | ||||
| 						return err | ||||
| 					} | ||||
| 				} | ||||
| @@ -218,27 +454,31 @@ func (ctx *textifyTraverseCtx) emit(data string) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (ctx *textifyTraverseCtx) breakLongLines(data string) []string { | ||||
| 	// only break lines when we are in blockquotes | ||||
| const maxLineLen = 74 | ||||
|  | ||||
| func (ctx *textifyTraverseContext) breakLongLines(data string) []string { | ||||
| 	// Only break lines when in blockquotes. | ||||
| 	if ctx.blockquoteLevel == 0 { | ||||
| 		return []string{data} | ||||
| 	} | ||||
| 	var ret []string | ||||
| 	runes := []rune(data) | ||||
| 	l := len(runes) | ||||
| 	existing := ctx.lineLength | ||||
| 	if existing >= 74 { | ||||
| 	var ( | ||||
| 		ret      = []string{} | ||||
| 		runes    = []rune(data) | ||||
| 		l        = len(runes) | ||||
| 		existing = ctx.lineLength | ||||
| 	) | ||||
| 	if existing >= maxLineLen { | ||||
| 		ret = append(ret, "\n") | ||||
| 		existing = 0 | ||||
| 	} | ||||
| 	for l+existing > 74 { | ||||
| 		i := 74 - existing | ||||
| 	for l+existing > maxLineLen { | ||||
| 		i := maxLineLen - existing | ||||
| 		for i >= 0 && !unicode.IsSpace(runes[i]) { | ||||
| 			i-- | ||||
| 		} | ||||
| 		if i == -1 { | ||||
| 			// no spaces, so go the other way | ||||
| 			i = 74 - existing | ||||
| 			// No spaces, so go the other way. | ||||
| 			i = maxLineLen - existing | ||||
| 			for i < l && !unicode.IsSpace(runes[i]) { | ||||
| 				i++ | ||||
| 			} | ||||
| @@ -257,12 +497,33 @@ func (ctx *textifyTraverseCtx) breakLongLines(data string) []string { | ||||
| 	return ret | ||||
| } | ||||
|  | ||||
| func (ctx *textifyTraverseCtx) normalizeHrefLink(link string) string { | ||||
| func (ctx *textifyTraverseContext) normalizeHrefLink(link string) string { | ||||
| 	link = strings.TrimSpace(link) | ||||
| 	link = strings.TrimPrefix(link, "mailto:") | ||||
| 	return link | ||||
| } | ||||
|  | ||||
| // renderEachChild visits each direct child of a node and collects the sequence of | ||||
| // textuual representaitons separated by a single newline. | ||||
| func (ctx *textifyTraverseContext) renderEachChild(node *html.Node) (string, error) { | ||||
| 	buf := &bytes.Buffer{} | ||||
| 	for c := node.FirstChild; c != nil; c = c.NextSibling { | ||||
| 		s, err := FromHTMLNode(c, ctx.options) | ||||
| 		if err != nil { | ||||
| 			return "", err | ||||
| 		} | ||||
| 		if _, err = buf.WriteString(s); err != nil { | ||||
| 			return "", err | ||||
| 		} | ||||
| 		if c.NextSibling != nil { | ||||
| 			if err = buf.WriteByte('\n'); err != nil { | ||||
| 				return "", err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return buf.String(), nil | ||||
| } | ||||
|  | ||||
| func getAttrVal(node *html.Node, attrName string) string { | ||||
| 	for _, attr := range node.Attr { | ||||
| 		if attr.Key == attrName { | ||||
| @@ -272,29 +533,3 @@ func getAttrVal(node *html.Node, attrName string) string { | ||||
|  | ||||
| 	return "" | ||||
| } | ||||
|  | ||||
| func FromReader(reader io.Reader) (string, error) { | ||||
| 	doc, err := html.Parse(reader) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
|  | ||||
| 	ctx := textifyTraverseCtx{ | ||||
| 		Buf: bytes.Buffer{}, | ||||
| 	} | ||||
| 	if err = ctx.traverse(doc); err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
|  | ||||
| 	text := strings.TrimSpace(newlineRe.ReplaceAllString( | ||||
| 		strings.Replace(ctx.Buf.String(), "\n ", "\n", -1), "\n\n")) | ||||
| 	return text, nil | ||||
| } | ||||
|  | ||||
| func FromString(input string) (string, error) { | ||||
| 	text, err := FromReader(strings.NewReader(input)) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	return text, nil | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user