mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-29 10:57:44 +09:00 
			
		
		
		
	Dump: add output format tar and output to stdout (#10376)
* Dump: Use mholt/archive/v3 to support tar including many compressions Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Allow dump output to stdout Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file" Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: also dump LFS (fixes #10058) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Use log.Info instead of fmt.Fprintf Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * import ordering * make fmt Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Matti R <matti@mdranta.net>
This commit is contained in:
		
							
								
								
									
										110
									
								
								vendor/github.com/dsnet/compress/bzip2/bwt.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								vendor/github.com/dsnet/compress/bzip2/bwt.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,110 @@ | ||||
| // Copyright 2015, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| package bzip2 | ||||
|  | ||||
| import "github.com/dsnet/compress/bzip2/internal/sais" | ||||
|  | ||||
| // The Burrows-Wheeler Transform implementation used here is based on the | ||||
| // Suffix Array by Induced Sorting (SA-IS) methodology by Nong, Zhang, and Chan. | ||||
| // This implementation uses the sais algorithm originally written by Yuta Mori. | ||||
| // | ||||
| // The SA-IS algorithm runs in O(n) and outputs a Suffix Array. There is a | ||||
| // mathematical relationship between Suffix Arrays and the Burrows-Wheeler | ||||
| // Transform, such that a SA can be converted to a BWT in O(n) time. | ||||
| // | ||||
| // References: | ||||
| //	http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf | ||||
| //	https://github.com/cscott/compressjs/blob/master/lib/BWT.js | ||||
| //	https://www.quora.com/How-can-I-optimize-burrows-wheeler-transform-and-inverse-transform-to-work-in-O-n-time-O-n-space | ||||
| type burrowsWheelerTransform struct { | ||||
| 	buf  []byte | ||||
| 	sa   []int | ||||
| 	perm []uint32 | ||||
| } | ||||
|  | ||||
| func (bwt *burrowsWheelerTransform) Encode(buf []byte) (ptr int) { | ||||
| 	if len(buf) == 0 { | ||||
| 		return -1 | ||||
| 	} | ||||
|  | ||||
| 	// TODO(dsnet): Find a way to avoid the duplicate input string method. | ||||
| 	// We only need to do this because suffix arrays (by definition) only | ||||
| 	// operate non-wrapped suffixes of a string. On the other hand, | ||||
| 	// the BWT specifically used in bzip2 operate on a strings that wrap-around | ||||
| 	// when being sorted. | ||||
|  | ||||
| 	// Step 1: Concatenate the input string to itself so that we can use the | ||||
| 	// suffix array algorithm for bzip2's variant of BWT. | ||||
| 	n := len(buf) | ||||
| 	bwt.buf = append(append(bwt.buf[:0], buf...), buf...) | ||||
| 	if cap(bwt.sa) < 2*n { | ||||
| 		bwt.sa = make([]int, 2*n) | ||||
| 	} | ||||
| 	t := bwt.buf[:2*n] | ||||
| 	sa := bwt.sa[:2*n] | ||||
|  | ||||
| 	// Step 2: Compute the suffix array (SA). The input string, t, will not be | ||||
| 	// modified, while the results will be written to the output, sa. | ||||
| 	sais.ComputeSA(t, sa) | ||||
|  | ||||
| 	// Step 3: Convert the SA to a BWT. Since ComputeSA does not mutate the | ||||
| 	// input, we have two copies of the input; in buf and buf2. Thus, we write | ||||
| 	// the transformation to buf, while using buf2. | ||||
| 	var j int | ||||
| 	buf2 := t[n:] | ||||
| 	for _, i := range sa { | ||||
| 		if i < n { | ||||
| 			if i == 0 { | ||||
| 				ptr = j | ||||
| 				i = n | ||||
| 			} | ||||
| 			buf[j] = buf2[i-1] | ||||
| 			j++ | ||||
| 		} | ||||
| 	} | ||||
| 	return ptr | ||||
| } | ||||
|  | ||||
| func (bwt *burrowsWheelerTransform) Decode(buf []byte, ptr int) { | ||||
| 	if len(buf) == 0 { | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	// Step 1: Compute cumm, where cumm[ch] reports the total number of | ||||
| 	// characters that precede the character ch in the alphabet. | ||||
| 	var cumm [256]int | ||||
| 	for _, v := range buf { | ||||
| 		cumm[v]++ | ||||
| 	} | ||||
| 	var sum int | ||||
| 	for i, v := range cumm { | ||||
| 		cumm[i] = sum | ||||
| 		sum += v | ||||
| 	} | ||||
|  | ||||
| 	// Step 2: Compute perm, where perm[ptr] contains a pointer to the next | ||||
| 	// byte in buf and the next pointer in perm itself. | ||||
| 	if cap(bwt.perm) < len(buf) { | ||||
| 		bwt.perm = make([]uint32, len(buf)) | ||||
| 	} | ||||
| 	perm := bwt.perm[:len(buf)] | ||||
| 	for i, b := range buf { | ||||
| 		perm[cumm[b]] = uint32(i) | ||||
| 		cumm[b]++ | ||||
| 	} | ||||
|  | ||||
| 	// Step 3: Follow each pointer in perm to the next byte, starting with the | ||||
| 	// origin pointer. | ||||
| 	if cap(bwt.buf) < len(buf) { | ||||
| 		bwt.buf = make([]byte, len(buf)) | ||||
| 	} | ||||
| 	buf2 := bwt.buf[:len(buf)] | ||||
| 	i := perm[ptr] | ||||
| 	for j := range buf2 { | ||||
| 		buf2[j] = buf[i] | ||||
| 		i = perm[i] | ||||
| 	} | ||||
| 	copy(buf, buf2) | ||||
| } | ||||
							
								
								
									
										110
									
								
								vendor/github.com/dsnet/compress/bzip2/common.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								vendor/github.com/dsnet/compress/bzip2/common.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,110 @@ | ||||
| // Copyright 2015, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| // Package bzip2 implements the BZip2 compressed data format. | ||||
| // | ||||
| // Canonical C implementation: | ||||
| //	http://bzip.org | ||||
| // | ||||
| // Unofficial format specification: | ||||
| //	https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf | ||||
| package bzip2 | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"hash/crc32" | ||||
|  | ||||
| 	"github.com/dsnet/compress/internal" | ||||
| 	"github.com/dsnet/compress/internal/errors" | ||||
| ) | ||||
|  | ||||
| // There does not exist a formal specification of the BZip2 format. As such, | ||||
| // much of this work is derived by either reverse engineering the original C | ||||
| // source code or using secondary sources. | ||||
| // | ||||
| // Significant amounts of fuzz testing is done to ensure that outputs from | ||||
| // this package is properly decoded by the C library. Furthermore, we test that | ||||
| // both this package and the C library agree about what inputs are invalid. | ||||
| // | ||||
| // Compression stack: | ||||
| //	Run-length encoding 1     (RLE1) | ||||
| //	Burrows-Wheeler transform (BWT) | ||||
| //	Move-to-front transform   (MTF) | ||||
| //	Run-length encoding 2     (RLE2) | ||||
| //	Prefix encoding           (PE) | ||||
| // | ||||
| // References: | ||||
| //	http://bzip.org/ | ||||
| //	https://en.wikipedia.org/wiki/Bzip2 | ||||
| //	https://code.google.com/p/jbzip2/ | ||||
|  | ||||
| const ( | ||||
| 	BestSpeed          = 1 | ||||
| 	BestCompression    = 9 | ||||
| 	DefaultCompression = 6 | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	hdrMagic = 0x425a         // Hex of "BZ" | ||||
| 	blkMagic = 0x314159265359 // BCD of PI | ||||
| 	endMagic = 0x177245385090 // BCD of sqrt(PI) | ||||
|  | ||||
| 	blockSize = 100000 | ||||
| ) | ||||
|  | ||||
| func errorf(c int, f string, a ...interface{}) error { | ||||
| 	return errors.Error{Code: c, Pkg: "bzip2", Msg: fmt.Sprintf(f, a...)} | ||||
| } | ||||
|  | ||||
| func panicf(c int, f string, a ...interface{}) { | ||||
| 	errors.Panic(errorf(c, f, a...)) | ||||
| } | ||||
|  | ||||
| // errWrap converts a lower-level errors.Error to be one from this package. | ||||
| // The replaceCode passed in will be used to replace the code for any errors | ||||
| // with the errors.Invalid code. | ||||
| // | ||||
| // For the Reader, set this to errors.Corrupted. | ||||
| // For the Writer, set this to errors.Internal. | ||||
| func errWrap(err error, replaceCode int) error { | ||||
| 	if cerr, ok := err.(errors.Error); ok { | ||||
| 		if errors.IsInvalid(cerr) { | ||||
| 			cerr.Code = replaceCode | ||||
| 		} | ||||
| 		err = errorf(cerr.Code, "%s", cerr.Msg) | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
|  | ||||
| var errClosed = errorf(errors.Closed, "") | ||||
|  | ||||
| // crc computes the CRC-32 used by BZip2. | ||||
| // | ||||
| // The CRC-32 computation in bzip2 treats bytes as having bits in big-endian | ||||
| // order. That is, the MSB is read before the LSB. Thus, we can use the | ||||
| // standard library version of CRC-32 IEEE with some minor adjustments. | ||||
| // | ||||
| // The byte array is used as an intermediate buffer to swap the bits of every | ||||
| // byte of the input. | ||||
| type crc struct { | ||||
| 	val uint32 | ||||
| 	buf [256]byte | ||||
| } | ||||
|  | ||||
| // update computes the CRC-32 of appending buf to c. | ||||
| func (c *crc) update(buf []byte) { | ||||
| 	cval := internal.ReverseUint32(c.val) | ||||
| 	for len(buf) > 0 { | ||||
| 		n := len(buf) | ||||
| 		if n > len(c.buf) { | ||||
| 			n = len(c.buf) | ||||
| 		} | ||||
| 		for i, b := range buf[:n] { | ||||
| 			c.buf[i] = internal.ReverseLUT[b] | ||||
| 		} | ||||
| 		cval = crc32.Update(cval, crc32.IEEETable, c.buf[:n]) | ||||
| 		buf = buf[n:] | ||||
| 	} | ||||
| 	c.val = internal.ReverseUint32(cval) | ||||
| } | ||||
							
								
								
									
										13
									
								
								vendor/github.com/dsnet/compress/bzip2/fuzz_off.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								vendor/github.com/dsnet/compress/bzip2/fuzz_off.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | ||||
| // Copyright 2016, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| // +build !gofuzz | ||||
|  | ||||
| // This file exists to suppress fuzzing details from release builds. | ||||
|  | ||||
| package bzip2 | ||||
|  | ||||
| type fuzzReader struct{} | ||||
|  | ||||
| func (*fuzzReader) updateChecksum(int64, uint32) {} | ||||
							
								
								
									
										77
									
								
								vendor/github.com/dsnet/compress/bzip2/fuzz_on.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								vendor/github.com/dsnet/compress/bzip2/fuzz_on.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,77 @@ | ||||
| // Copyright 2016, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| // +build gofuzz | ||||
|  | ||||
| // This file exists to export internal implementation details for fuzz testing. | ||||
|  | ||||
| package bzip2 | ||||
|  | ||||
| func ForwardBWT(buf []byte) (ptr int) { | ||||
| 	var bwt burrowsWheelerTransform | ||||
| 	return bwt.Encode(buf) | ||||
| } | ||||
|  | ||||
| func ReverseBWT(buf []byte, ptr int) { | ||||
| 	var bwt burrowsWheelerTransform | ||||
| 	bwt.Decode(buf, ptr) | ||||
| } | ||||
|  | ||||
| type fuzzReader struct { | ||||
| 	Checksums Checksums | ||||
| } | ||||
|  | ||||
| // updateChecksum updates Checksums. | ||||
| // | ||||
| // If a valid pos is provided, it appends the (pos, val) pair to the slice. | ||||
| // Otherwise, it will update the last record with the new value. | ||||
| func (fr *fuzzReader) updateChecksum(pos int64, val uint32) { | ||||
| 	if pos >= 0 { | ||||
| 		fr.Checksums = append(fr.Checksums, Checksum{pos, val}) | ||||
| 	} else { | ||||
| 		fr.Checksums[len(fr.Checksums)-1].Value = val | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type Checksum struct { | ||||
| 	Offset int64  // Bit offset of the checksum | ||||
| 	Value  uint32 // Checksum value | ||||
| } | ||||
|  | ||||
| type Checksums []Checksum | ||||
|  | ||||
| // Apply overwrites all checksum fields in d with the ones in cs. | ||||
| func (cs Checksums) Apply(d []byte) []byte { | ||||
| 	d = append([]byte(nil), d...) | ||||
| 	for _, c := range cs { | ||||
| 		setU32(d, c.Offset, c.Value) | ||||
| 	} | ||||
| 	return d | ||||
| } | ||||
|  | ||||
| func setU32(d []byte, pos int64, val uint32) { | ||||
| 	for i := uint(0); i < 32; i++ { | ||||
| 		bpos := uint64(pos) + uint64(i) | ||||
| 		d[bpos/8] &= ^byte(1 << (7 - bpos%8)) | ||||
| 		d[bpos/8] |= byte(val>>(31-i)) << (7 - bpos%8) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Verify checks that all checksum fields in d matches those in cs. | ||||
| func (cs Checksums) Verify(d []byte) bool { | ||||
| 	for _, c := range cs { | ||||
| 		if getU32(d, c.Offset) != c.Value { | ||||
| 			return false | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func getU32(d []byte, pos int64) (val uint32) { | ||||
| 	for i := uint(0); i < 32; i++ { | ||||
| 		bpos := uint64(pos) + uint64(i) | ||||
| 		val |= (uint32(d[bpos/8] >> (7 - bpos%8))) << (31 - i) | ||||
| 	} | ||||
| 	return val | ||||
| } | ||||
							
								
								
									
										28
									
								
								vendor/github.com/dsnet/compress/bzip2/internal/sais/common.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								vendor/github.com/dsnet/compress/bzip2/internal/sais/common.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,28 @@ | ||||
| // Copyright 2015, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| // Package sais implements a linear time suffix array algorithm. | ||||
| package sais | ||||
|  | ||||
| //go:generate go run sais_gen.go byte sais_byte.go | ||||
| //go:generate go run sais_gen.go int sais_int.go | ||||
|  | ||||
| // This package ports the C sais implementation by Yuta Mori. The ports are | ||||
| // located in sais_byte.go and sais_int.go, which are identical to each other | ||||
| // except for the types. Since Go does not support generics, we use generators to | ||||
| // create the two files. | ||||
| // | ||||
| // References: | ||||
| //	https://sites.google.com/site/yuta256/sais | ||||
| //	https://www.researchgate.net/publication/221313676_Linear_Time_Suffix_Array_Construction_Using_D-Critical_Substrings | ||||
| //	https://www.researchgate.net/publication/224176324_Two_Efficient_Algorithms_for_Linear_Time_Suffix_Array_Construction | ||||
|  | ||||
| // ComputeSA computes the suffix array of t and places the result in sa. | ||||
| // Both t and sa must be the same length. | ||||
| func ComputeSA(t []byte, sa []int) { | ||||
| 	if len(sa) != len(t) { | ||||
| 		panic("mismatching sizes") | ||||
| 	} | ||||
| 	computeSA_byte(t, sa, 0, len(t), 256) | ||||
| } | ||||
							
								
								
									
										661
									
								
								vendor/github.com/dsnet/compress/bzip2/internal/sais/sais_byte.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										661
									
								
								vendor/github.com/dsnet/compress/bzip2/internal/sais/sais_byte.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,661 @@ | ||||
| // Copyright 2015, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| // Code generated by sais_gen.go. DO NOT EDIT. | ||||
|  | ||||
| // ==================================================== | ||||
| // Copyright (c) 2008-2010 Yuta Mori All Rights Reserved. | ||||
| // | ||||
| // Permission is hereby granted, free of charge, to any person | ||||
| // obtaining a copy of this software and associated documentation | ||||
| // files (the "Software"), to deal in the Software without | ||||
| // restriction, including without limitation the rights to use, | ||||
| // copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
| // copies of the Software, and to permit persons to whom the | ||||
| // Software is furnished to do so, subject to the following | ||||
| // conditions: | ||||
| // | ||||
| // The above copyright notice and this permission notice shall be | ||||
| // included in all copies or substantial portions of the Software. | ||||
| // | ||||
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||||
| // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | ||||
| // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||||
| // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | ||||
| // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||||
| // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||||
| // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||||
| // OTHER DEALINGS IN THE SOFTWARE. | ||||
| // ==================================================== | ||||
|  | ||||
| package sais | ||||
|  | ||||
| func getCounts_byte(T []byte, C []int, n, k int) { | ||||
| 	var i int | ||||
| 	for i = 0; i < k; i++ { | ||||
| 		C[i] = 0 | ||||
| 	} | ||||
| 	for i = 0; i < n; i++ { | ||||
| 		C[T[i]]++ | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func getBuckets_byte(C, B []int, k int, end bool) { | ||||
| 	var i, sum int | ||||
| 	if end { | ||||
| 		for i = 0; i < k; i++ { | ||||
| 			sum += C[i] | ||||
| 			B[i] = sum | ||||
| 		} | ||||
| 	} else { | ||||
| 		for i = 0; i < k; i++ { | ||||
| 			sum += C[i] | ||||
| 			B[i] = sum - C[i] | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func sortLMS1_byte(T []byte, SA, C, B []int, n, k int) { | ||||
| 	var b, i, j int | ||||
| 	var c0, c1 int | ||||
|  | ||||
| 	// Compute SAl. | ||||
| 	if &C[0] == &B[0] { | ||||
| 		getCounts_byte(T, C, n, k) | ||||
| 	} | ||||
| 	getBuckets_byte(C, B, k, false) // Find starts of buckets | ||||
| 	j = n - 1 | ||||
| 	c1 = int(T[j]) | ||||
| 	b = B[c1] | ||||
| 	j-- | ||||
| 	if int(T[j]) < c1 { | ||||
| 		SA[b] = ^j | ||||
| 	} else { | ||||
| 		SA[b] = j | ||||
| 	} | ||||
| 	b++ | ||||
| 	for i = 0; i < n; i++ { | ||||
| 		if j = SA[i]; j > 0 { | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			j-- | ||||
| 			if int(T[j]) < c1 { | ||||
| 				SA[b] = ^j | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			b++ | ||||
| 			SA[i] = 0 | ||||
| 		} else if j < 0 { | ||||
| 			SA[i] = ^j | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Compute SAs. | ||||
| 	if &C[0] == &B[0] { | ||||
| 		getCounts_byte(T, C, n, k) | ||||
| 	} | ||||
| 	getBuckets_byte(C, B, k, true) // Find ends of buckets | ||||
| 	c1 = 0 | ||||
| 	b = B[c1] | ||||
| 	for i = n - 1; i >= 0; i-- { | ||||
| 		if j = SA[i]; j > 0 { | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			j-- | ||||
| 			b-- | ||||
| 			if int(T[j]) > c1 { | ||||
| 				SA[b] = ^(j + 1) | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			SA[i] = 0 | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func postProcLMS1_byte(T []byte, SA []int, n, m int) int { | ||||
| 	var i, j, p, q, plen, qlen, name int | ||||
| 	var c0, c1 int | ||||
| 	var diff bool | ||||
|  | ||||
| 	// Compact all the sorted substrings into the first m items of SA. | ||||
| 	// 2*m must be not larger than n (provable). | ||||
| 	for i = 0; SA[i] < 0; i++ { | ||||
| 		SA[i] = ^SA[i] | ||||
| 	} | ||||
| 	if i < m { | ||||
| 		for j, i = i, i+1; ; i++ { | ||||
| 			if p = SA[i]; p < 0 { | ||||
| 				SA[j] = ^p | ||||
| 				j++ | ||||
| 				SA[i] = 0 | ||||
| 				if j == m { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Store the length of all substrings. | ||||
| 	i = n - 1 | ||||
| 	j = n - 1 | ||||
| 	c0 = int(T[n-1]) | ||||
| 	for { | ||||
| 		c1 = c0 | ||||
| 		if i--; i < 0 { | ||||
| 			break | ||||
| 		} | ||||
| 		if c0 = int(T[i]); c0 < c1 { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	for i >= 0 { | ||||
| 		for { | ||||
| 			c1 = c0 | ||||
| 			if i--; i < 0 { | ||||
| 				break | ||||
| 			} | ||||
| 			if c0 = int(T[i]); c0 > c1 { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		if i >= 0 { | ||||
| 			SA[m+((i+1)>>1)] = j - i | ||||
| 			j = i + 1 | ||||
| 			for { | ||||
| 				c1 = c0 | ||||
| 				if i--; i < 0 { | ||||
| 					break | ||||
| 				} | ||||
| 				if c0 = int(T[i]); c0 < c1 { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Find the lexicographic names of all substrings. | ||||
| 	name = 0 | ||||
| 	qlen = 0 | ||||
| 	for i, q = 0, n; i < m; i++ { | ||||
| 		p = SA[i] | ||||
| 		plen = SA[m+(p>>1)] | ||||
| 		diff = true | ||||
| 		if (plen == qlen) && ((q + plen) < n) { | ||||
| 			for j = 0; (j < plen) && (T[p+j] == T[q+j]); j++ { | ||||
| 			} | ||||
| 			if j == plen { | ||||
| 				diff = false | ||||
| 			} | ||||
| 		} | ||||
| 		if diff { | ||||
| 			name++ | ||||
| 			q = p | ||||
| 			qlen = plen | ||||
| 		} | ||||
| 		SA[m+(p>>1)] = name | ||||
| 	} | ||||
| 	return name | ||||
| } | ||||
|  | ||||
| func sortLMS2_byte(T []byte, SA, C, B, D []int, n, k int) { | ||||
| 	var b, i, j, t, d int | ||||
| 	var c0, c1 int | ||||
|  | ||||
| 	// Compute SAl. | ||||
| 	getBuckets_byte(C, B, k, false) // Find starts of buckets | ||||
| 	j = n - 1 | ||||
| 	c1 = int(T[j]) | ||||
| 	b = B[c1] | ||||
| 	j-- | ||||
| 	if int(T[j]) < c1 { | ||||
| 		t = 1 | ||||
| 	} else { | ||||
| 		t = 0 | ||||
| 	} | ||||
| 	j += n | ||||
| 	if t&1 > 0 { | ||||
| 		SA[b] = ^j | ||||
| 	} else { | ||||
| 		SA[b] = j | ||||
| 	} | ||||
| 	b++ | ||||
| 	for i, d = 0, 0; i < n; i++ { | ||||
| 		if j = SA[i]; j > 0 { | ||||
| 			if n <= j { | ||||
| 				d += 1 | ||||
| 				j -= n | ||||
| 			} | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			j-- | ||||
| 			t = int(c0) << 1 | ||||
| 			if int(T[j]) < c1 { | ||||
| 				t |= 1 | ||||
| 			} | ||||
| 			if D[t] != d { | ||||
| 				j += n | ||||
| 				D[t] = d | ||||
| 			} | ||||
| 			if t&1 > 0 { | ||||
| 				SA[b] = ^j | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			b++ | ||||
| 			SA[i] = 0 | ||||
| 		} else if j < 0 { | ||||
| 			SA[i] = ^j | ||||
| 		} | ||||
| 	} | ||||
| 	for i = n - 1; 0 <= i; i-- { | ||||
| 		if SA[i] > 0 { | ||||
| 			if SA[i] < n { | ||||
| 				SA[i] += n | ||||
| 				for j = i - 1; SA[j] < n; j-- { | ||||
| 				} | ||||
| 				SA[j] -= n | ||||
| 				i = j | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Compute SAs. | ||||
| 	getBuckets_byte(C, B, k, true) // Find ends of buckets | ||||
| 	c1 = 0 | ||||
| 	b = B[c1] | ||||
| 	for i, d = n-1, d+1; i >= 0; i-- { | ||||
| 		if j = SA[i]; j > 0 { | ||||
| 			if n <= j { | ||||
| 				d += 1 | ||||
| 				j -= n | ||||
| 			} | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			j-- | ||||
| 			t = int(c0) << 1 | ||||
| 			if int(T[j]) > c1 { | ||||
| 				t |= 1 | ||||
| 			} | ||||
| 			if D[t] != d { | ||||
| 				j += n | ||||
| 				D[t] = d | ||||
| 			} | ||||
| 			b-- | ||||
| 			if t&1 > 0 { | ||||
| 				SA[b] = ^(j + 1) | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			SA[i] = 0 | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func postProcLMS2_byte(SA []int, n, m int) int { | ||||
| 	var i, j, d, name int | ||||
|  | ||||
| 	// Compact all the sorted LMS substrings into the first m items of SA. | ||||
| 	name = 0 | ||||
| 	for i = 0; SA[i] < 0; i++ { | ||||
| 		j = ^SA[i] | ||||
| 		if n <= j { | ||||
| 			name += 1 | ||||
| 		} | ||||
| 		SA[i] = j | ||||
| 	} | ||||
| 	if i < m { | ||||
| 		for d, i = i, i+1; ; i++ { | ||||
| 			if j = SA[i]; j < 0 { | ||||
| 				j = ^j | ||||
| 				if n <= j { | ||||
| 					name += 1 | ||||
| 				} | ||||
| 				SA[d] = j | ||||
| 				d++ | ||||
| 				SA[i] = 0 | ||||
| 				if d == m { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if name < m { | ||||
| 		// Store the lexicographic names. | ||||
| 		for i, d = m-1, name+1; 0 <= i; i-- { | ||||
| 			if j = SA[i]; n <= j { | ||||
| 				j -= n | ||||
| 				d-- | ||||
| 			} | ||||
| 			SA[m+(j>>1)] = d | ||||
| 		} | ||||
| 	} else { | ||||
| 		// Unset flags. | ||||
| 		for i = 0; i < m; i++ { | ||||
| 			if j = SA[i]; n <= j { | ||||
| 				j -= n | ||||
| 				SA[i] = j | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return name | ||||
| } | ||||
|  | ||||
| func induceSA_byte(T []byte, SA, C, B []int, n, k int) { | ||||
| 	var b, i, j int | ||||
| 	var c0, c1 int | ||||
|  | ||||
| 	// Compute SAl. | ||||
| 	if &C[0] == &B[0] { | ||||
| 		getCounts_byte(T, C, n, k) | ||||
| 	} | ||||
| 	getBuckets_byte(C, B, k, false) // Find starts of buckets | ||||
| 	j = n - 1 | ||||
| 	c1 = int(T[j]) | ||||
| 	b = B[c1] | ||||
| 	if j > 0 && int(T[j-1]) < c1 { | ||||
| 		SA[b] = ^j | ||||
| 	} else { | ||||
| 		SA[b] = j | ||||
| 	} | ||||
| 	b++ | ||||
| 	for i = 0; i < n; i++ { | ||||
| 		j = SA[i] | ||||
| 		SA[i] = ^j | ||||
| 		if j > 0 { | ||||
| 			j-- | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			if j > 0 && int(T[j-1]) < c1 { | ||||
| 				SA[b] = ^j | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			b++ | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Compute SAs. | ||||
| 	if &C[0] == &B[0] { | ||||
| 		getCounts_byte(T, C, n, k) | ||||
| 	} | ||||
| 	getBuckets_byte(C, B, k, true) // Find ends of buckets | ||||
| 	c1 = 0 | ||||
| 	b = B[c1] | ||||
| 	for i = n - 1; i >= 0; i-- { | ||||
| 		if j = SA[i]; j > 0 { | ||||
| 			j-- | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			b-- | ||||
| 			if (j == 0) || (int(T[j-1]) > c1) { | ||||
| 				SA[b] = ^j | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 		} else { | ||||
| 			SA[i] = ^j | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func computeSA_byte(T []byte, SA []int, fs, n, k int) { | ||||
| 	const ( | ||||
| 		minBucketSize = 512 | ||||
| 		sortLMS2Limit = 0x3fffffff | ||||
| 	) | ||||
|  | ||||
| 	var C, B, D, RA []int | ||||
| 	var bo int // Offset of B relative to SA | ||||
| 	var b, i, j, m, p, q, name, newfs int | ||||
| 	var c0, c1 int | ||||
| 	var flags uint | ||||
|  | ||||
| 	if k <= minBucketSize { | ||||
| 		C = make([]int, k) | ||||
| 		if k <= fs { | ||||
| 			bo = n + fs - k | ||||
| 			B = SA[bo:] | ||||
| 			flags = 1 | ||||
| 		} else { | ||||
| 			B = make([]int, k) | ||||
| 			flags = 3 | ||||
| 		} | ||||
| 	} else if k <= fs { | ||||
| 		C = SA[n+fs-k:] | ||||
| 		if k <= fs-k { | ||||
| 			bo = n + fs - 2*k | ||||
| 			B = SA[bo:] | ||||
| 			flags = 0 | ||||
| 		} else if k <= 4*minBucketSize { | ||||
| 			B = make([]int, k) | ||||
| 			flags = 2 | ||||
| 		} else { | ||||
| 			B = C | ||||
| 			flags = 8 | ||||
| 		} | ||||
| 	} else { | ||||
| 		C = make([]int, k) | ||||
| 		B = C | ||||
| 		flags = 4 | 8 | ||||
| 	} | ||||
| 	if n <= sortLMS2Limit && 2 <= (n/k) { | ||||
| 		if flags&1 > 0 { | ||||
| 			if 2*k <= fs-k { | ||||
| 				flags |= 32 | ||||
| 			} else { | ||||
| 				flags |= 16 | ||||
| 			} | ||||
| 		} else if flags == 0 && 2*k <= (fs-2*k) { | ||||
| 			flags |= 32 | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Stage 1: Reduce the problem by at least 1/2. | ||||
| 	// Sort all the LMS-substrings. | ||||
| 	getCounts_byte(T, C, n, k) | ||||
| 	getBuckets_byte(C, B, k, true) // Find ends of buckets | ||||
| 	for i = 0; i < n; i++ { | ||||
| 		SA[i] = 0 | ||||
| 	} | ||||
| 	b = -1 | ||||
| 	i = n - 1 | ||||
| 	j = n | ||||
| 	m = 0 | ||||
| 	c0 = int(T[n-1]) | ||||
| 	for { | ||||
| 		c1 = c0 | ||||
| 		if i--; i < 0 { | ||||
| 			break | ||||
| 		} | ||||
| 		if c0 = int(T[i]); c0 < c1 { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	for i >= 0 { | ||||
| 		for { | ||||
| 			c1 = c0 | ||||
| 			if i--; i < 0 { | ||||
| 				break | ||||
| 			} | ||||
| 			if c0 = int(T[i]); c0 > c1 { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		if i >= 0 { | ||||
| 			if b >= 0 { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			B[c1]-- | ||||
| 			b = B[c1] | ||||
| 			j = i | ||||
| 			m++ | ||||
| 			for { | ||||
| 				c1 = c0 | ||||
| 				if i--; i < 0 { | ||||
| 					break | ||||
| 				} | ||||
| 				if c0 = int(T[i]); c0 < c1 { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if m > 1 { | ||||
| 		if flags&(16|32) > 0 { | ||||
| 			if flags&16 > 0 { | ||||
| 				D = make([]int, 2*k) | ||||
| 			} else { | ||||
| 				D = SA[bo-2*k:] | ||||
| 			} | ||||
| 			B[T[j+1]]++ | ||||
| 			for i, j = 0, 0; i < k; i++ { | ||||
| 				j += C[i] | ||||
| 				if B[i] != j { | ||||
| 					SA[B[i]] += n | ||||
| 				} | ||||
| 				D[i] = 0 | ||||
| 				D[i+k] = 0 | ||||
| 			} | ||||
| 			sortLMS2_byte(T, SA, C, B, D, n, k) | ||||
| 			name = postProcLMS2_byte(SA, n, m) | ||||
| 		} else { | ||||
| 			sortLMS1_byte(T, SA, C, B, n, k) | ||||
| 			name = postProcLMS1_byte(T, SA, n, m) | ||||
| 		} | ||||
| 	} else if m == 1 { | ||||
| 		SA[b] = j + 1 | ||||
| 		name = 1 | ||||
| 	} else { | ||||
| 		name = 0 | ||||
| 	} | ||||
|  | ||||
| 	// Stage 2: Solve the reduced problem. | ||||
| 	// Recurse if names are not yet unique. | ||||
| 	if name < m { | ||||
| 		newfs = n + fs - 2*m | ||||
| 		if flags&(1|4|8) == 0 { | ||||
| 			if k+name <= newfs { | ||||
| 				newfs -= k | ||||
| 			} else { | ||||
| 				flags |= 8 | ||||
| 			} | ||||
| 		} | ||||
| 		RA = SA[m+newfs:] | ||||
| 		for i, j = m+(n>>1)-1, m-1; m <= i; i-- { | ||||
| 			if SA[i] != 0 { | ||||
| 				RA[j] = SA[i] - 1 | ||||
| 				j-- | ||||
| 			} | ||||
| 		} | ||||
| 		computeSA_int(RA, SA, newfs, m, name) | ||||
|  | ||||
| 		i = n - 1 | ||||
| 		j = m - 1 | ||||
| 		c0 = int(T[n-1]) | ||||
| 		for { | ||||
| 			c1 = c0 | ||||
| 			if i--; i < 0 { | ||||
| 				break | ||||
| 			} | ||||
| 			if c0 = int(T[i]); c0 < c1 { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		for i >= 0 { | ||||
| 			for { | ||||
| 				c1 = c0 | ||||
| 				if i--; i < 0 { | ||||
| 					break | ||||
| 				} | ||||
| 				if c0 = int(T[i]); c0 > c1 { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 			if i >= 0 { | ||||
| 				RA[j] = i + 1 | ||||
| 				j-- | ||||
| 				for { | ||||
| 					c1 = c0 | ||||
| 					if i--; i < 0 { | ||||
| 						break | ||||
| 					} | ||||
| 					if c0 = int(T[i]); c0 < c1 { | ||||
| 						break | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		for i = 0; i < m; i++ { | ||||
| 			SA[i] = RA[SA[i]] | ||||
| 		} | ||||
| 		if flags&4 > 0 { | ||||
| 			B = make([]int, k) | ||||
| 			C = B | ||||
| 		} | ||||
| 		if flags&2 > 0 { | ||||
| 			B = make([]int, k) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Stage 3: Induce the result for the original problem. | ||||
| 	if flags&8 > 0 { | ||||
| 		getCounts_byte(T, C, n, k) | ||||
| 	} | ||||
| 	// Put all left-most S characters into their buckets. | ||||
| 	if m > 1 { | ||||
| 		getBuckets_byte(C, B, k, true) // Find ends of buckets | ||||
| 		i = m - 1 | ||||
| 		j = n | ||||
| 		p = SA[m-1] | ||||
| 		c1 = int(T[p]) | ||||
| 		for { | ||||
| 			c0 = c1 | ||||
| 			q = B[c0] | ||||
| 			for q < j { | ||||
| 				j-- | ||||
| 				SA[j] = 0 | ||||
| 			} | ||||
| 			for { | ||||
| 				j-- | ||||
| 				SA[j] = p | ||||
| 				if i--; i < 0 { | ||||
| 					break | ||||
| 				} | ||||
| 				p = SA[i] | ||||
| 				if c1 = int(T[p]); c1 != c0 { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 			if i < 0 { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		for j > 0 { | ||||
| 			j-- | ||||
| 			SA[j] = 0 | ||||
| 		} | ||||
| 	} | ||||
| 	induceSA_byte(T, SA, C, B, n, k) | ||||
| } | ||||
							
								
								
									
										661
									
								
								vendor/github.com/dsnet/compress/bzip2/internal/sais/sais_int.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										661
									
								
								vendor/github.com/dsnet/compress/bzip2/internal/sais/sais_int.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,661 @@ | ||||
| // Copyright 2015, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| // Code generated by sais_gen.go. DO NOT EDIT. | ||||
|  | ||||
| // ==================================================== | ||||
| // Copyright (c) 2008-2010 Yuta Mori All Rights Reserved. | ||||
| // | ||||
| // Permission is hereby granted, free of charge, to any person | ||||
| // obtaining a copy of this software and associated documentation | ||||
| // files (the "Software"), to deal in the Software without | ||||
| // restriction, including without limitation the rights to use, | ||||
| // copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
| // copies of the Software, and to permit persons to whom the | ||||
| // Software is furnished to do so, subject to the following | ||||
| // conditions: | ||||
| // | ||||
| // The above copyright notice and this permission notice shall be | ||||
| // included in all copies or substantial portions of the Software. | ||||
| // | ||||
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||||
| // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | ||||
| // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||||
| // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | ||||
| // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||||
| // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||||
| // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||||
| // OTHER DEALINGS IN THE SOFTWARE. | ||||
| // ==================================================== | ||||
|  | ||||
| package sais | ||||
|  | ||||
| func getCounts_int(T []int, C []int, n, k int) { | ||||
| 	var i int | ||||
| 	for i = 0; i < k; i++ { | ||||
| 		C[i] = 0 | ||||
| 	} | ||||
| 	for i = 0; i < n; i++ { | ||||
| 		C[T[i]]++ | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func getBuckets_int(C, B []int, k int, end bool) { | ||||
| 	var i, sum int | ||||
| 	if end { | ||||
| 		for i = 0; i < k; i++ { | ||||
| 			sum += C[i] | ||||
| 			B[i] = sum | ||||
| 		} | ||||
| 	} else { | ||||
| 		for i = 0; i < k; i++ { | ||||
| 			sum += C[i] | ||||
| 			B[i] = sum - C[i] | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func sortLMS1_int(T []int, SA, C, B []int, n, k int) { | ||||
| 	var b, i, j int | ||||
| 	var c0, c1 int | ||||
|  | ||||
| 	// Compute SAl. | ||||
| 	if &C[0] == &B[0] { | ||||
| 		getCounts_int(T, C, n, k) | ||||
| 	} | ||||
| 	getBuckets_int(C, B, k, false) // Find starts of buckets | ||||
| 	j = n - 1 | ||||
| 	c1 = int(T[j]) | ||||
| 	b = B[c1] | ||||
| 	j-- | ||||
| 	if int(T[j]) < c1 { | ||||
| 		SA[b] = ^j | ||||
| 	} else { | ||||
| 		SA[b] = j | ||||
| 	} | ||||
| 	b++ | ||||
| 	for i = 0; i < n; i++ { | ||||
| 		if j = SA[i]; j > 0 { | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			j-- | ||||
| 			if int(T[j]) < c1 { | ||||
| 				SA[b] = ^j | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			b++ | ||||
| 			SA[i] = 0 | ||||
| 		} else if j < 0 { | ||||
| 			SA[i] = ^j | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Compute SAs. | ||||
| 	if &C[0] == &B[0] { | ||||
| 		getCounts_int(T, C, n, k) | ||||
| 	} | ||||
| 	getBuckets_int(C, B, k, true) // Find ends of buckets | ||||
| 	c1 = 0 | ||||
| 	b = B[c1] | ||||
| 	for i = n - 1; i >= 0; i-- { | ||||
| 		if j = SA[i]; j > 0 { | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			j-- | ||||
| 			b-- | ||||
| 			if int(T[j]) > c1 { | ||||
| 				SA[b] = ^(j + 1) | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			SA[i] = 0 | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func postProcLMS1_int(T []int, SA []int, n, m int) int { | ||||
| 	var i, j, p, q, plen, qlen, name int | ||||
| 	var c0, c1 int | ||||
| 	var diff bool | ||||
|  | ||||
| 	// Compact all the sorted substrings into the first m items of SA. | ||||
| 	// 2*m must be not larger than n (provable). | ||||
| 	for i = 0; SA[i] < 0; i++ { | ||||
| 		SA[i] = ^SA[i] | ||||
| 	} | ||||
| 	if i < m { | ||||
| 		for j, i = i, i+1; ; i++ { | ||||
| 			if p = SA[i]; p < 0 { | ||||
| 				SA[j] = ^p | ||||
| 				j++ | ||||
| 				SA[i] = 0 | ||||
| 				if j == m { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Store the length of all substrings. | ||||
| 	i = n - 1 | ||||
| 	j = n - 1 | ||||
| 	c0 = int(T[n-1]) | ||||
| 	for { | ||||
| 		c1 = c0 | ||||
| 		if i--; i < 0 { | ||||
| 			break | ||||
| 		} | ||||
| 		if c0 = int(T[i]); c0 < c1 { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	for i >= 0 { | ||||
| 		for { | ||||
| 			c1 = c0 | ||||
| 			if i--; i < 0 { | ||||
| 				break | ||||
| 			} | ||||
| 			if c0 = int(T[i]); c0 > c1 { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		if i >= 0 { | ||||
| 			SA[m+((i+1)>>1)] = j - i | ||||
| 			j = i + 1 | ||||
| 			for { | ||||
| 				c1 = c0 | ||||
| 				if i--; i < 0 { | ||||
| 					break | ||||
| 				} | ||||
| 				if c0 = int(T[i]); c0 < c1 { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Find the lexicographic names of all substrings. | ||||
| 	name = 0 | ||||
| 	qlen = 0 | ||||
| 	for i, q = 0, n; i < m; i++ { | ||||
| 		p = SA[i] | ||||
| 		plen = SA[m+(p>>1)] | ||||
| 		diff = true | ||||
| 		if (plen == qlen) && ((q + plen) < n) { | ||||
| 			for j = 0; (j < plen) && (T[p+j] == T[q+j]); j++ { | ||||
| 			} | ||||
| 			if j == plen { | ||||
| 				diff = false | ||||
| 			} | ||||
| 		} | ||||
| 		if diff { | ||||
| 			name++ | ||||
| 			q = p | ||||
| 			qlen = plen | ||||
| 		} | ||||
| 		SA[m+(p>>1)] = name | ||||
| 	} | ||||
| 	return name | ||||
| } | ||||
|  | ||||
| func sortLMS2_int(T []int, SA, C, B, D []int, n, k int) { | ||||
| 	var b, i, j, t, d int | ||||
| 	var c0, c1 int | ||||
|  | ||||
| 	// Compute SAl. | ||||
| 	getBuckets_int(C, B, k, false) // Find starts of buckets | ||||
| 	j = n - 1 | ||||
| 	c1 = int(T[j]) | ||||
| 	b = B[c1] | ||||
| 	j-- | ||||
| 	if int(T[j]) < c1 { | ||||
| 		t = 1 | ||||
| 	} else { | ||||
| 		t = 0 | ||||
| 	} | ||||
| 	j += n | ||||
| 	if t&1 > 0 { | ||||
| 		SA[b] = ^j | ||||
| 	} else { | ||||
| 		SA[b] = j | ||||
| 	} | ||||
| 	b++ | ||||
| 	for i, d = 0, 0; i < n; i++ { | ||||
| 		if j = SA[i]; j > 0 { | ||||
| 			if n <= j { | ||||
| 				d += 1 | ||||
| 				j -= n | ||||
| 			} | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			j-- | ||||
| 			t = int(c0) << 1 | ||||
| 			if int(T[j]) < c1 { | ||||
| 				t |= 1 | ||||
| 			} | ||||
| 			if D[t] != d { | ||||
| 				j += n | ||||
| 				D[t] = d | ||||
| 			} | ||||
| 			if t&1 > 0 { | ||||
| 				SA[b] = ^j | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			b++ | ||||
| 			SA[i] = 0 | ||||
| 		} else if j < 0 { | ||||
| 			SA[i] = ^j | ||||
| 		} | ||||
| 	} | ||||
| 	for i = n - 1; 0 <= i; i-- { | ||||
| 		if SA[i] > 0 { | ||||
| 			if SA[i] < n { | ||||
| 				SA[i] += n | ||||
| 				for j = i - 1; SA[j] < n; j-- { | ||||
| 				} | ||||
| 				SA[j] -= n | ||||
| 				i = j | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Compute SAs. | ||||
| 	getBuckets_int(C, B, k, true) // Find ends of buckets | ||||
| 	c1 = 0 | ||||
| 	b = B[c1] | ||||
| 	for i, d = n-1, d+1; i >= 0; i-- { | ||||
| 		if j = SA[i]; j > 0 { | ||||
| 			if n <= j { | ||||
| 				d += 1 | ||||
| 				j -= n | ||||
| 			} | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			j-- | ||||
| 			t = int(c0) << 1 | ||||
| 			if int(T[j]) > c1 { | ||||
| 				t |= 1 | ||||
| 			} | ||||
| 			if D[t] != d { | ||||
| 				j += n | ||||
| 				D[t] = d | ||||
| 			} | ||||
| 			b-- | ||||
| 			if t&1 > 0 { | ||||
| 				SA[b] = ^(j + 1) | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			SA[i] = 0 | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func postProcLMS2_int(SA []int, n, m int) int { | ||||
| 	var i, j, d, name int | ||||
|  | ||||
| 	// Compact all the sorted LMS substrings into the first m items of SA. | ||||
| 	name = 0 | ||||
| 	for i = 0; SA[i] < 0; i++ { | ||||
| 		j = ^SA[i] | ||||
| 		if n <= j { | ||||
| 			name += 1 | ||||
| 		} | ||||
| 		SA[i] = j | ||||
| 	} | ||||
| 	if i < m { | ||||
| 		for d, i = i, i+1; ; i++ { | ||||
| 			if j = SA[i]; j < 0 { | ||||
| 				j = ^j | ||||
| 				if n <= j { | ||||
| 					name += 1 | ||||
| 				} | ||||
| 				SA[d] = j | ||||
| 				d++ | ||||
| 				SA[i] = 0 | ||||
| 				if d == m { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if name < m { | ||||
| 		// Store the lexicographic names. | ||||
| 		for i, d = m-1, name+1; 0 <= i; i-- { | ||||
| 			if j = SA[i]; n <= j { | ||||
| 				j -= n | ||||
| 				d-- | ||||
| 			} | ||||
| 			SA[m+(j>>1)] = d | ||||
| 		} | ||||
| 	} else { | ||||
| 		// Unset flags. | ||||
| 		for i = 0; i < m; i++ { | ||||
| 			if j = SA[i]; n <= j { | ||||
| 				j -= n | ||||
| 				SA[i] = j | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return name | ||||
| } | ||||
|  | ||||
| func induceSA_int(T []int, SA, C, B []int, n, k int) { | ||||
| 	var b, i, j int | ||||
| 	var c0, c1 int | ||||
|  | ||||
| 	// Compute SAl. | ||||
| 	if &C[0] == &B[0] { | ||||
| 		getCounts_int(T, C, n, k) | ||||
| 	} | ||||
| 	getBuckets_int(C, B, k, false) // Find starts of buckets | ||||
| 	j = n - 1 | ||||
| 	c1 = int(T[j]) | ||||
| 	b = B[c1] | ||||
| 	if j > 0 && int(T[j-1]) < c1 { | ||||
| 		SA[b] = ^j | ||||
| 	} else { | ||||
| 		SA[b] = j | ||||
| 	} | ||||
| 	b++ | ||||
| 	for i = 0; i < n; i++ { | ||||
| 		j = SA[i] | ||||
| 		SA[i] = ^j | ||||
| 		if j > 0 { | ||||
| 			j-- | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			if j > 0 && int(T[j-1]) < c1 { | ||||
| 				SA[b] = ^j | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			b++ | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Compute SAs. | ||||
| 	if &C[0] == &B[0] { | ||||
| 		getCounts_int(T, C, n, k) | ||||
| 	} | ||||
| 	getBuckets_int(C, B, k, true) // Find ends of buckets | ||||
| 	c1 = 0 | ||||
| 	b = B[c1] | ||||
| 	for i = n - 1; i >= 0; i-- { | ||||
| 		if j = SA[i]; j > 0 { | ||||
| 			j-- | ||||
| 			if c0 = int(T[j]); c0 != c1 { | ||||
| 				B[c1] = b | ||||
| 				c1 = c0 | ||||
| 				b = B[c1] | ||||
| 			} | ||||
| 			b-- | ||||
| 			if (j == 0) || (int(T[j-1]) > c1) { | ||||
| 				SA[b] = ^j | ||||
| 			} else { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 		} else { | ||||
| 			SA[i] = ^j | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func computeSA_int(T []int, SA []int, fs, n, k int) { | ||||
| 	const ( | ||||
| 		minBucketSize = 512 | ||||
| 		sortLMS2Limit = 0x3fffffff | ||||
| 	) | ||||
|  | ||||
| 	var C, B, D, RA []int | ||||
| 	var bo int // Offset of B relative to SA | ||||
| 	var b, i, j, m, p, q, name, newfs int | ||||
| 	var c0, c1 int | ||||
| 	var flags uint | ||||
|  | ||||
| 	if k <= minBucketSize { | ||||
| 		C = make([]int, k) | ||||
| 		if k <= fs { | ||||
| 			bo = n + fs - k | ||||
| 			B = SA[bo:] | ||||
| 			flags = 1 | ||||
| 		} else { | ||||
| 			B = make([]int, k) | ||||
| 			flags = 3 | ||||
| 		} | ||||
| 	} else if k <= fs { | ||||
| 		C = SA[n+fs-k:] | ||||
| 		if k <= fs-k { | ||||
| 			bo = n + fs - 2*k | ||||
| 			B = SA[bo:] | ||||
| 			flags = 0 | ||||
| 		} else if k <= 4*minBucketSize { | ||||
| 			B = make([]int, k) | ||||
| 			flags = 2 | ||||
| 		} else { | ||||
| 			B = C | ||||
| 			flags = 8 | ||||
| 		} | ||||
| 	} else { | ||||
| 		C = make([]int, k) | ||||
| 		B = C | ||||
| 		flags = 4 | 8 | ||||
| 	} | ||||
| 	if n <= sortLMS2Limit && 2 <= (n/k) { | ||||
| 		if flags&1 > 0 { | ||||
| 			if 2*k <= fs-k { | ||||
| 				flags |= 32 | ||||
| 			} else { | ||||
| 				flags |= 16 | ||||
| 			} | ||||
| 		} else if flags == 0 && 2*k <= (fs-2*k) { | ||||
| 			flags |= 32 | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Stage 1: Reduce the problem by at least 1/2. | ||||
| 	// Sort all the LMS-substrings. | ||||
| 	getCounts_int(T, C, n, k) | ||||
| 	getBuckets_int(C, B, k, true) // Find ends of buckets | ||||
| 	for i = 0; i < n; i++ { | ||||
| 		SA[i] = 0 | ||||
| 	} | ||||
| 	b = -1 | ||||
| 	i = n - 1 | ||||
| 	j = n | ||||
| 	m = 0 | ||||
| 	c0 = int(T[n-1]) | ||||
| 	for { | ||||
| 		c1 = c0 | ||||
| 		if i--; i < 0 { | ||||
| 			break | ||||
| 		} | ||||
| 		if c0 = int(T[i]); c0 < c1 { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	for i >= 0 { | ||||
| 		for { | ||||
| 			c1 = c0 | ||||
| 			if i--; i < 0 { | ||||
| 				break | ||||
| 			} | ||||
| 			if c0 = int(T[i]); c0 > c1 { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		if i >= 0 { | ||||
| 			if b >= 0 { | ||||
| 				SA[b] = j | ||||
| 			} | ||||
| 			B[c1]-- | ||||
| 			b = B[c1] | ||||
| 			j = i | ||||
| 			m++ | ||||
| 			for { | ||||
| 				c1 = c0 | ||||
| 				if i--; i < 0 { | ||||
| 					break | ||||
| 				} | ||||
| 				if c0 = int(T[i]); c0 < c1 { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if m > 1 { | ||||
| 		if flags&(16|32) > 0 { | ||||
| 			if flags&16 > 0 { | ||||
| 				D = make([]int, 2*k) | ||||
| 			} else { | ||||
| 				D = SA[bo-2*k:] | ||||
| 			} | ||||
| 			B[T[j+1]]++ | ||||
| 			for i, j = 0, 0; i < k; i++ { | ||||
| 				j += C[i] | ||||
| 				if B[i] != j { | ||||
| 					SA[B[i]] += n | ||||
| 				} | ||||
| 				D[i] = 0 | ||||
| 				D[i+k] = 0 | ||||
| 			} | ||||
| 			sortLMS2_int(T, SA, C, B, D, n, k) | ||||
| 			name = postProcLMS2_int(SA, n, m) | ||||
| 		} else { | ||||
| 			sortLMS1_int(T, SA, C, B, n, k) | ||||
| 			name = postProcLMS1_int(T, SA, n, m) | ||||
| 		} | ||||
| 	} else if m == 1 { | ||||
| 		SA[b] = j + 1 | ||||
| 		name = 1 | ||||
| 	} else { | ||||
| 		name = 0 | ||||
| 	} | ||||
|  | ||||
| 	// Stage 2: Solve the reduced problem. | ||||
| 	// Recurse if names are not yet unique. | ||||
| 	if name < m { | ||||
| 		newfs = n + fs - 2*m | ||||
| 		if flags&(1|4|8) == 0 { | ||||
| 			if k+name <= newfs { | ||||
| 				newfs -= k | ||||
| 			} else { | ||||
| 				flags |= 8 | ||||
| 			} | ||||
| 		} | ||||
| 		RA = SA[m+newfs:] | ||||
| 		for i, j = m+(n>>1)-1, m-1; m <= i; i-- { | ||||
| 			if SA[i] != 0 { | ||||
| 				RA[j] = SA[i] - 1 | ||||
| 				j-- | ||||
| 			} | ||||
| 		} | ||||
| 		computeSA_int(RA, SA, newfs, m, name) | ||||
|  | ||||
| 		i = n - 1 | ||||
| 		j = m - 1 | ||||
| 		c0 = int(T[n-1]) | ||||
| 		for { | ||||
| 			c1 = c0 | ||||
| 			if i--; i < 0 { | ||||
| 				break | ||||
| 			} | ||||
| 			if c0 = int(T[i]); c0 < c1 { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		for i >= 0 { | ||||
| 			for { | ||||
| 				c1 = c0 | ||||
| 				if i--; i < 0 { | ||||
| 					break | ||||
| 				} | ||||
| 				if c0 = int(T[i]); c0 > c1 { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 			if i >= 0 { | ||||
| 				RA[j] = i + 1 | ||||
| 				j-- | ||||
| 				for { | ||||
| 					c1 = c0 | ||||
| 					if i--; i < 0 { | ||||
| 						break | ||||
| 					} | ||||
| 					if c0 = int(T[i]); c0 < c1 { | ||||
| 						break | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		for i = 0; i < m; i++ { | ||||
| 			SA[i] = RA[SA[i]] | ||||
| 		} | ||||
| 		if flags&4 > 0 { | ||||
| 			B = make([]int, k) | ||||
| 			C = B | ||||
| 		} | ||||
| 		if flags&2 > 0 { | ||||
| 			B = make([]int, k) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Stage 3: Induce the result for the original problem. | ||||
| 	if flags&8 > 0 { | ||||
| 		getCounts_int(T, C, n, k) | ||||
| 	} | ||||
| 	// Put all left-most S characters into their buckets. | ||||
| 	if m > 1 { | ||||
| 		getBuckets_int(C, B, k, true) // Find ends of buckets | ||||
| 		i = m - 1 | ||||
| 		j = n | ||||
| 		p = SA[m-1] | ||||
| 		c1 = int(T[p]) | ||||
| 		for { | ||||
| 			c0 = c1 | ||||
| 			q = B[c0] | ||||
| 			for q < j { | ||||
| 				j-- | ||||
| 				SA[j] = 0 | ||||
| 			} | ||||
| 			for { | ||||
| 				j-- | ||||
| 				SA[j] = p | ||||
| 				if i--; i < 0 { | ||||
| 					break | ||||
| 				} | ||||
| 				p = SA[i] | ||||
| 				if c1 = int(T[p]); c1 != c0 { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 			if i < 0 { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		for j > 0 { | ||||
| 			j-- | ||||
| 			SA[j] = 0 | ||||
| 		} | ||||
| 	} | ||||
| 	induceSA_int(T, SA, C, B, n, k) | ||||
| } | ||||
							
								
								
									
										131
									
								
								vendor/github.com/dsnet/compress/bzip2/mtf_rle2.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								vendor/github.com/dsnet/compress/bzip2/mtf_rle2.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,131 @@ | ||||
| // Copyright 2015, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| package bzip2 | ||||
|  | ||||
| import "github.com/dsnet/compress/internal/errors" | ||||
|  | ||||
| // moveToFront implements both the MTF and RLE stages of bzip2 at the same time. | ||||
| // Any runs of zeros in the encoded output will be replaced by a sequence of | ||||
| // RUNA and RUNB symbols are encode the length of the run. | ||||
| // | ||||
| // The RLE encoding used can actually be encoded to and decoded from using | ||||
| // normal two's complement arithmetic. The methodology for doing so is below. | ||||
| // | ||||
| // Assuming the following: | ||||
| //	num: The value being encoded by RLE encoding. | ||||
| //	run: A sequence of RUNA and RUNB symbols represented as a binary integer, | ||||
| //	where RUNA is the 0 bit, RUNB is the 1 bit, and least-significant RUN | ||||
| //	symbols are at the least-significant bit positions. | ||||
| //	cnt: The number of RUNA and RUNB symbols. | ||||
| // | ||||
| // Then the RLE encoding used by bzip2 has this mathematical property: | ||||
| //	num+1 == (1<<cnt) | run | ||||
| type moveToFront struct { | ||||
| 	dictBuf [256]uint8 | ||||
| 	dictLen int | ||||
|  | ||||
| 	vals    []byte | ||||
| 	syms    []uint16 | ||||
| 	blkSize int | ||||
| } | ||||
|  | ||||
| func (mtf *moveToFront) Init(dict []uint8, blkSize int) { | ||||
| 	if len(dict) > len(mtf.dictBuf) { | ||||
| 		panicf(errors.Internal, "alphabet too large") | ||||
| 	} | ||||
| 	copy(mtf.dictBuf[:], dict) | ||||
| 	mtf.dictLen = len(dict) | ||||
| 	mtf.blkSize = blkSize | ||||
| } | ||||
|  | ||||
| func (mtf *moveToFront) Encode(vals []byte) (syms []uint16) { | ||||
| 	dict := mtf.dictBuf[:mtf.dictLen] | ||||
| 	syms = mtf.syms[:0] | ||||
|  | ||||
| 	if len(vals) > mtf.blkSize { | ||||
| 		panicf(errors.Internal, "exceeded block size") | ||||
| 	} | ||||
|  | ||||
| 	var lastNum uint32 | ||||
| 	for _, val := range vals { | ||||
| 		// Normal move-to-front transform. | ||||
| 		var idx uint8 // Reverse lookup idx in dict | ||||
| 		for di, dv := range dict { | ||||
| 			if dv == val { | ||||
| 				idx = uint8(di) | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		copy(dict[1:], dict[:idx]) | ||||
| 		dict[0] = val | ||||
|  | ||||
| 		// Run-length encoding augmentation. | ||||
| 		if idx == 0 { | ||||
| 			lastNum++ | ||||
| 			continue | ||||
| 		} | ||||
| 		if lastNum > 0 { | ||||
| 			for rc := lastNum + 1; rc != 1; rc >>= 1 { | ||||
| 				syms = append(syms, uint16(rc&1)) | ||||
| 			} | ||||
| 			lastNum = 0 | ||||
| 		} | ||||
| 		syms = append(syms, uint16(idx)+1) | ||||
| 	} | ||||
| 	if lastNum > 0 { | ||||
| 		for rc := lastNum + 1; rc != 1; rc >>= 1 { | ||||
| 			syms = append(syms, uint16(rc&1)) | ||||
| 		} | ||||
| 	} | ||||
| 	mtf.syms = syms | ||||
| 	return syms | ||||
| } | ||||
|  | ||||
| func (mtf *moveToFront) Decode(syms []uint16) (vals []byte) { | ||||
| 	dict := mtf.dictBuf[:mtf.dictLen] | ||||
| 	vals = mtf.vals[:0] | ||||
|  | ||||
| 	var lastCnt uint | ||||
| 	var lastRun uint32 | ||||
| 	for _, sym := range syms { | ||||
| 		// Run-length encoding augmentation. | ||||
| 		if sym < 2 { | ||||
| 			lastRun |= uint32(sym) << lastCnt | ||||
| 			lastCnt++ | ||||
| 			continue | ||||
| 		} | ||||
| 		if lastCnt > 0 { | ||||
| 			cnt := int((1<<lastCnt)|lastRun) - 1 | ||||
| 			if len(vals)+cnt > mtf.blkSize || lastCnt > 24 { | ||||
| 				panicf(errors.Corrupted, "run-length decoding exceeded block size") | ||||
| 			} | ||||
| 			for i := cnt; i > 0; i-- { | ||||
| 				vals = append(vals, dict[0]) | ||||
| 			} | ||||
| 			lastCnt, lastRun = 0, 0 | ||||
| 		} | ||||
|  | ||||
| 		// Normal move-to-front transform. | ||||
| 		val := dict[sym-1] // Forward lookup val in dict | ||||
| 		copy(dict[1:], dict[:sym-1]) | ||||
| 		dict[0] = val | ||||
|  | ||||
| 		if len(vals) >= mtf.blkSize { | ||||
| 			panicf(errors.Corrupted, "run-length decoding exceeded block size") | ||||
| 		} | ||||
| 		vals = append(vals, val) | ||||
| 	} | ||||
| 	if lastCnt > 0 { | ||||
| 		cnt := int((1<<lastCnt)|lastRun) - 1 | ||||
| 		if len(vals)+cnt > mtf.blkSize || lastCnt > 24 { | ||||
| 			panicf(errors.Corrupted, "run-length decoding exceeded block size") | ||||
| 		} | ||||
| 		for i := cnt; i > 0; i-- { | ||||
| 			vals = append(vals, dict[0]) | ||||
| 		} | ||||
| 	} | ||||
| 	mtf.vals = vals | ||||
| 	return vals | ||||
| } | ||||
							
								
								
									
										374
									
								
								vendor/github.com/dsnet/compress/bzip2/prefix.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										374
									
								
								vendor/github.com/dsnet/compress/bzip2/prefix.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,374 @@ | ||||
| // Copyright 2015, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| package bzip2 | ||||
|  | ||||
| import ( | ||||
| 	"io" | ||||
|  | ||||
| 	"github.com/dsnet/compress/internal" | ||||
| 	"github.com/dsnet/compress/internal/errors" | ||||
| 	"github.com/dsnet/compress/internal/prefix" | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	minNumTrees = 2 | ||||
| 	maxNumTrees = 6 | ||||
|  | ||||
| 	maxPrefixBits = 20      // Maximum bit-width of a prefix code | ||||
| 	maxNumSyms    = 256 + 2 // Maximum number of symbols in the alphabet | ||||
| 	numBlockSyms  = 50      // Number of bytes in a block | ||||
| ) | ||||
|  | ||||
| // encSel and decSel are used to handle the prefix encoding for tree selectors. | ||||
| // The prefix encoding is as follows: | ||||
| // | ||||
| //	Code         TreeIdx | ||||
| //	0        <=> 0 | ||||
| //	10       <=> 1 | ||||
| //	110      <=> 2 | ||||
| //	1110     <=> 3 | ||||
| //	11110    <=> 4 | ||||
| //	111110   <=> 5 | ||||
| //	111111   <=> 6	Invalid tree index, so should fail | ||||
| // | ||||
| var encSel, decSel = func() (e prefix.Encoder, d prefix.Decoder) { | ||||
| 	var selCodes [maxNumTrees + 1]prefix.PrefixCode | ||||
| 	for i := range selCodes { | ||||
| 		selCodes[i] = prefix.PrefixCode{Sym: uint32(i), Len: uint32(i + 1)} | ||||
| 	} | ||||
| 	selCodes[maxNumTrees] = prefix.PrefixCode{Sym: maxNumTrees, Len: maxNumTrees} | ||||
| 	prefix.GeneratePrefixes(selCodes[:]) | ||||
| 	e.Init(selCodes[:]) | ||||
| 	d.Init(selCodes[:]) | ||||
| 	return | ||||
| }() | ||||
|  | ||||
| type prefixReader struct{ prefix.Reader } | ||||
|  | ||||
| func (pr *prefixReader) Init(r io.Reader) { | ||||
| 	pr.Reader.Init(r, true) | ||||
| } | ||||
|  | ||||
| func (pr *prefixReader) ReadBitsBE64(nb uint) uint64 { | ||||
| 	if nb <= 32 { | ||||
| 		v := uint32(pr.ReadBits(nb)) | ||||
| 		return uint64(internal.ReverseUint32N(v, nb)) | ||||
| 	} | ||||
| 	v0 := internal.ReverseUint32(uint32(pr.ReadBits(32))) | ||||
| 	v1 := internal.ReverseUint32(uint32(pr.ReadBits(nb - 32))) | ||||
| 	v := uint64(v0)<<32 | uint64(v1) | ||||
| 	return v >> (64 - nb) | ||||
| } | ||||
|  | ||||
| func (pr *prefixReader) ReadPrefixCodes(codes []prefix.PrefixCodes, trees []prefix.Decoder) { | ||||
| 	for i, pc := range codes { | ||||
| 		clen := int(pr.ReadBitsBE64(5)) | ||||
| 		sum := 1 << maxPrefixBits | ||||
| 		for sym := range pc { | ||||
| 			for { | ||||
| 				if clen < 1 || clen > maxPrefixBits { | ||||
| 					panicf(errors.Corrupted, "invalid prefix bit-length: %d", clen) | ||||
| 				} | ||||
|  | ||||
| 				b, ok := pr.TryReadBits(1) | ||||
| 				if !ok { | ||||
| 					b = pr.ReadBits(1) | ||||
| 				} | ||||
| 				if b == 0 { | ||||
| 					break | ||||
| 				} | ||||
|  | ||||
| 				b, ok = pr.TryReadBits(1) | ||||
| 				if !ok { | ||||
| 					b = pr.ReadBits(1) | ||||
| 				} | ||||
| 				clen -= int(b*2) - 1 // +1 or -1 | ||||
| 			} | ||||
| 			pc[sym] = prefix.PrefixCode{Sym: uint32(sym), Len: uint32(clen)} | ||||
| 			sum -= (1 << maxPrefixBits) >> uint(clen) | ||||
| 		} | ||||
|  | ||||
| 		if sum == 0 { | ||||
| 			// Fast path, but only handles complete trees. | ||||
| 			if err := prefix.GeneratePrefixes(pc); err != nil { | ||||
| 				errors.Panic(err) // Using complete trees; should never fail | ||||
| 			} | ||||
| 		} else { | ||||
| 			// Slow path, but handles anything. | ||||
| 			pc = handleDegenerateCodes(pc) // Never fails, but may fail later | ||||
| 			codes[i] = pc | ||||
| 		} | ||||
| 		trees[i].Init(pc) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type prefixWriter struct{ prefix.Writer } | ||||
|  | ||||
| func (pw *prefixWriter) Init(w io.Writer) { | ||||
| 	pw.Writer.Init(w, true) | ||||
| } | ||||
|  | ||||
| func (pw *prefixWriter) WriteBitsBE64(v uint64, nb uint) { | ||||
| 	if nb <= 32 { | ||||
| 		v := internal.ReverseUint32N(uint32(v), nb) | ||||
| 		pw.WriteBits(uint(v), nb) | ||||
| 		return | ||||
| 	} | ||||
| 	v <<= (64 - nb) | ||||
| 	v0 := internal.ReverseUint32(uint32(v >> 32)) | ||||
| 	v1 := internal.ReverseUint32(uint32(v)) | ||||
| 	pw.WriteBits(uint(v0), 32) | ||||
| 	pw.WriteBits(uint(v1), nb-32) | ||||
| 	return | ||||
| } | ||||
|  | ||||
| func (pw *prefixWriter) WritePrefixCodes(codes []prefix.PrefixCodes, trees []prefix.Encoder) { | ||||
| 	for i, pc := range codes { | ||||
| 		if err := prefix.GeneratePrefixes(pc); err != nil { | ||||
| 			errors.Panic(err) // Using complete trees; should never fail | ||||
| 		} | ||||
| 		trees[i].Init(pc) | ||||
|  | ||||
| 		clen := int(pc[0].Len) | ||||
| 		pw.WriteBitsBE64(uint64(clen), 5) | ||||
| 		for _, c := range pc { | ||||
| 			for int(c.Len) < clen { | ||||
| 				pw.WriteBits(3, 2) // 11 | ||||
| 				clen-- | ||||
| 			} | ||||
| 			for int(c.Len) > clen { | ||||
| 				pw.WriteBits(1, 2) // 10 | ||||
| 				clen++ | ||||
| 			} | ||||
| 			pw.WriteBits(0, 1) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // handleDegenerateCodes converts a degenerate tree into a canonical tree. | ||||
| // | ||||
| // For example, when the input is an under-subscribed tree: | ||||
| //	input:  []PrefixCode{ | ||||
| //		{Sym: 0, Len: 3}, | ||||
| //		{Sym: 1, Len: 4}, | ||||
| //		{Sym: 2, Len: 3}, | ||||
| //	} | ||||
| //	output: []PrefixCode{ | ||||
| //		{Sym:   0, Len: 3, Val:  0}, //  000 | ||||
| //		{Sym:   1, Len: 4, Val:  2}, // 0010 | ||||
| //		{Sym:   2, Len: 3, Val:  4}, //  100 | ||||
| //		{Sym: 258, Len: 4, Val: 10}, // 1010 | ||||
| //		{Sym: 259, Len: 3, Val:  6}, //  110 | ||||
| //		{Sym: 260, Len: 1, Val:  1}, //    1 | ||||
| //	} | ||||
| // | ||||
| // For example, when the input is an over-subscribed tree: | ||||
| //	input:  []PrefixCode{ | ||||
| //		{Sym: 0, Len: 1}, | ||||
| //		{Sym: 1, Len: 3}, | ||||
| //		{Sym: 2, Len: 4}, | ||||
| //		{Sym: 3, Len: 3}, | ||||
| //		{Sym: 4, Len: 2}, | ||||
| //	} | ||||
| //	output: []PrefixCode{ | ||||
| //		{Sym: 0, Len: 1, Val: 0}, //   0 | ||||
| //		{Sym: 1, Len: 3, Val: 3}, // 011 | ||||
| //		{Sym: 3, Len: 3, Val: 7}, // 111 | ||||
| //		{Sym: 4, Len: 2, Val: 1}, //  01 | ||||
| //	} | ||||
| func handleDegenerateCodes(codes prefix.PrefixCodes) prefix.PrefixCodes { | ||||
| 	// Since there is no formal definition for the BZip2 format, there is no | ||||
| 	// specification that says that the code lengths must form a complete | ||||
| 	// prefix tree (IE: it is neither over-subscribed nor under-subscribed). | ||||
| 	// Thus, the original C implementation becomes the reference for how prefix | ||||
| 	// decoding is done in these edge cases. Unfortunately, the C version does | ||||
| 	// not error when an invalid tree is used, but rather allows decoding to | ||||
| 	// continue and only errors if some bit pattern happens to cause an error. | ||||
| 	// Thus, it is possible for an invalid tree to end up decoding an input | ||||
| 	// "properly" so long as invalid bit patterns are not present. In order to | ||||
| 	// replicate this non-specified behavior, we use a ported version of the | ||||
| 	// C code to generate the codes as a valid canonical tree by substituting | ||||
| 	// invalid nodes with invalid symbols. | ||||
| 	// | ||||
| 	// ==================================================== | ||||
| 	// This program, "bzip2", the associated library "libbzip2", and all | ||||
| 	// documentation, are copyright (C) 1996-2010 Julian R Seward.  All | ||||
| 	// rights reserved. | ||||
| 	// | ||||
| 	// Redistribution and use in source and binary forms, with or without | ||||
| 	// modification, are permitted provided that the following conditions | ||||
| 	// are met: | ||||
| 	// | ||||
| 	// 1. Redistributions of source code must retain the above copyright | ||||
| 	//    notice, this list of conditions and the following disclaimer. | ||||
| 	// | ||||
| 	// 2. The origin of this software must not be misrepresented; you must | ||||
| 	//    not claim that you wrote the original software.  If you use this | ||||
| 	//    software in a product, an acknowledgment in the product | ||||
| 	//    documentation would be appreciated but is not required. | ||||
| 	// | ||||
| 	// 3. Altered source versions must be plainly marked as such, and must | ||||
| 	//    not be misrepresented as being the original software. | ||||
| 	// | ||||
| 	// 4. The name of the author may not be used to endorse or promote | ||||
| 	//    products derived from this software without specific prior written | ||||
| 	//    permission. | ||||
| 	// | ||||
| 	// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS | ||||
| 	// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||||
| 	// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
| 	// ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | ||||
| 	// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||||
| 	// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE | ||||
| 	// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||||
| 	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | ||||
| 	// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||||
| 	// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||||
| 	// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| 	// | ||||
| 	// Julian Seward, jseward@bzip.org | ||||
| 	// bzip2/libbzip2 version 1.0.6 of 6 September 2010 | ||||
| 	// ==================================================== | ||||
| 	var ( | ||||
| 		limits [maxPrefixBits + 2]int32 | ||||
| 		bases  [maxPrefixBits + 2]int32 | ||||
| 		perms  [maxNumSyms]int32 | ||||
|  | ||||
| 		minLen = uint32(maxPrefixBits) | ||||
| 		maxLen = uint32(0) | ||||
| 	) | ||||
|  | ||||
| 	const ( | ||||
| 		statusOkay = iota | ||||
| 		statusInvalid | ||||
| 		statusNeedBits | ||||
| 		statusMaxBits | ||||
| 	) | ||||
|  | ||||
| 	// createTables is the BZ2_hbCreateDecodeTables function from the C code. | ||||
| 	createTables := func(codes []prefix.PrefixCode) { | ||||
| 		for _, c := range codes { | ||||
| 			if c.Len > maxLen { | ||||
| 				maxLen = c.Len | ||||
| 			} | ||||
| 			if c.Len < minLen { | ||||
| 				minLen = c.Len | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		var pp int | ||||
| 		for i := minLen; i <= maxLen; i++ { | ||||
| 			for j, c := range codes { | ||||
| 				if c.Len == i { | ||||
| 					perms[pp] = int32(j) | ||||
| 					pp++ | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		var vec int32 | ||||
| 		for _, c := range codes { | ||||
| 			bases[c.Len+1]++ | ||||
| 		} | ||||
| 		for i := 1; i < len(bases); i++ { | ||||
| 			bases[i] += bases[i-1] | ||||
| 		} | ||||
| 		for i := minLen; i <= maxLen; i++ { | ||||
| 			vec += bases[i+1] - bases[i] | ||||
| 			limits[i] = vec - 1 | ||||
| 			vec <<= 1 | ||||
| 		} | ||||
| 		for i := minLen + 1; i <= maxLen; i++ { | ||||
| 			bases[i] = ((limits[i-1] + 1) << 1) - bases[i] | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// getSymbol is the GET_MTF_VAL macro from the C code. | ||||
| 	getSymbol := func(c prefix.PrefixCode) (uint32, int) { | ||||
| 		v := internal.ReverseUint32(c.Val) | ||||
| 		n := c.Len | ||||
|  | ||||
| 		zn := minLen | ||||
| 		if zn > n { | ||||
| 			return 0, statusNeedBits | ||||
| 		} | ||||
| 		zvec := int32(v >> (32 - zn)) | ||||
| 		v <<= zn | ||||
| 		for { | ||||
| 			if zn > maxLen { | ||||
| 				return 0, statusMaxBits | ||||
| 			} | ||||
| 			if zvec <= limits[zn] { | ||||
| 				break | ||||
| 			} | ||||
| 			zn++ | ||||
| 			if zn > n { | ||||
| 				return 0, statusNeedBits | ||||
| 			} | ||||
| 			zvec = (zvec << 1) | int32(v>>31) | ||||
| 			v <<= 1 | ||||
| 		} | ||||
| 		if zvec-bases[zn] < 0 || zvec-bases[zn] >= maxNumSyms { | ||||
| 			return 0, statusInvalid | ||||
| 		} | ||||
| 		return uint32(perms[zvec-bases[zn]]), statusOkay | ||||
| 	} | ||||
|  | ||||
| 	// Step 1: Create the prefix trees using the C algorithm. | ||||
| 	createTables(codes) | ||||
|  | ||||
| 	// Step 2: Starting with the shortest bit pattern, explore the whole tree. | ||||
| 	// If tree is under-subscribed, the worst-case runtime is O(1<<maxLen). | ||||
| 	// If tree is over-subscribed, the worst-case runtime is O(maxNumSyms). | ||||
| 	var pcodesArr [2 * maxNumSyms]prefix.PrefixCode | ||||
| 	pcodes := pcodesArr[:maxNumSyms] | ||||
| 	var exploreCode func(prefix.PrefixCode) bool | ||||
| 	exploreCode = func(c prefix.PrefixCode) (term bool) { | ||||
| 		sym, status := getSymbol(c) | ||||
| 		switch status { | ||||
| 		case statusOkay: | ||||
| 			// This code is valid, so insert it. | ||||
| 			c.Sym = sym | ||||
| 			pcodes[sym] = c | ||||
| 			term = true | ||||
| 		case statusInvalid: | ||||
| 			// This code is invalid, so insert an invalid symbol. | ||||
| 			c.Sym = uint32(len(pcodes)) | ||||
| 			pcodes = append(pcodes, c) | ||||
| 			term = true | ||||
| 		case statusNeedBits: | ||||
| 			// This code is too short, so explore both children. | ||||
| 			c.Len++ | ||||
| 			c0, c1 := c, c | ||||
| 			c1.Val |= 1 << (c.Len - 1) | ||||
|  | ||||
| 			b0 := exploreCode(c0) | ||||
| 			b1 := exploreCode(c1) | ||||
| 			switch { | ||||
| 			case !b0 && b1: | ||||
| 				c0.Sym = uint32(len(pcodes)) | ||||
| 				pcodes = append(pcodes, c0) | ||||
| 			case !b1 && b0: | ||||
| 				c1.Sym = uint32(len(pcodes)) | ||||
| 				pcodes = append(pcodes, c1) | ||||
| 			} | ||||
| 			term = b0 || b1 | ||||
| 		case statusMaxBits: | ||||
| 			// This code is too long, so report it upstream. | ||||
| 			term = false | ||||
| 		} | ||||
| 		return term // Did this code terminate? | ||||
| 	} | ||||
| 	exploreCode(prefix.PrefixCode{}) | ||||
|  | ||||
| 	// Step 3: Copy new sparse codes to old output codes. | ||||
| 	codes = codes[:0] | ||||
| 	for _, c := range pcodes { | ||||
| 		if c.Len > 0 { | ||||
| 			codes = append(codes, c) | ||||
| 		} | ||||
| 	} | ||||
| 	return codes | ||||
| } | ||||
							
								
								
									
										274
									
								
								vendor/github.com/dsnet/compress/bzip2/reader.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										274
									
								
								vendor/github.com/dsnet/compress/bzip2/reader.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,274 @@ | ||||
| // Copyright 2015, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| package bzip2 | ||||
|  | ||||
| import ( | ||||
| 	"io" | ||||
|  | ||||
| 	"github.com/dsnet/compress/internal" | ||||
| 	"github.com/dsnet/compress/internal/errors" | ||||
| 	"github.com/dsnet/compress/internal/prefix" | ||||
| ) | ||||
|  | ||||
| type Reader struct { | ||||
| 	InputOffset  int64 // Total number of bytes read from underlying io.Reader | ||||
| 	OutputOffset int64 // Total number of bytes emitted from Read | ||||
|  | ||||
| 	rd       prefixReader | ||||
| 	err      error | ||||
| 	level    int    // The current compression level | ||||
| 	rdHdrFtr int    // Number of times we read the stream header and footer | ||||
| 	blkCRC   uint32 // CRC-32 IEEE of each block (as stored) | ||||
| 	endCRC   uint32 // Checksum of all blocks using bzip2's custom method | ||||
|  | ||||
| 	crc crc | ||||
| 	mtf moveToFront | ||||
| 	bwt burrowsWheelerTransform | ||||
| 	rle runLengthEncoding | ||||
|  | ||||
| 	// These fields are allocated with Reader and re-used later. | ||||
| 	treeSels []uint8 | ||||
| 	codes2D  [maxNumTrees][maxNumSyms]prefix.PrefixCode | ||||
| 	codes1D  [maxNumTrees]prefix.PrefixCodes | ||||
| 	trees1D  [maxNumTrees]prefix.Decoder | ||||
| 	syms     []uint16 | ||||
|  | ||||
| 	fuzzReader // Exported functionality when fuzz testing | ||||
| } | ||||
|  | ||||
| type ReaderConfig struct { | ||||
| 	_ struct{} // Blank field to prevent unkeyed struct literals | ||||
| } | ||||
|  | ||||
| func NewReader(r io.Reader, conf *ReaderConfig) (*Reader, error) { | ||||
| 	zr := new(Reader) | ||||
| 	zr.Reset(r) | ||||
| 	return zr, nil | ||||
| } | ||||
|  | ||||
| func (zr *Reader) Reset(r io.Reader) error { | ||||
| 	*zr = Reader{ | ||||
| 		rd: zr.rd, | ||||
|  | ||||
| 		mtf: zr.mtf, | ||||
| 		bwt: zr.bwt, | ||||
| 		rle: zr.rle, | ||||
|  | ||||
| 		treeSels: zr.treeSels, | ||||
| 		trees1D:  zr.trees1D, | ||||
| 		syms:     zr.syms, | ||||
| 	} | ||||
| 	zr.rd.Init(r) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (zr *Reader) Read(buf []byte) (int, error) { | ||||
| 	for { | ||||
| 		cnt, err := zr.rle.Read(buf) | ||||
| 		if err != rleDone && zr.err == nil { | ||||
| 			zr.err = err | ||||
| 		} | ||||
| 		if cnt > 0 { | ||||
| 			zr.crc.update(buf[:cnt]) | ||||
| 			zr.OutputOffset += int64(cnt) | ||||
| 			return cnt, nil | ||||
| 		} | ||||
| 		if zr.err != nil || len(buf) == 0 { | ||||
| 			return 0, zr.err | ||||
| 		} | ||||
|  | ||||
| 		// Read the next chunk. | ||||
| 		zr.rd.Offset = zr.InputOffset | ||||
| 		func() { | ||||
| 			defer errors.Recover(&zr.err) | ||||
| 			if zr.rdHdrFtr%2 == 0 { | ||||
| 				// Check if we are already at EOF. | ||||
| 				if err := zr.rd.PullBits(1); err != nil { | ||||
| 					if err == io.ErrUnexpectedEOF && zr.rdHdrFtr > 0 { | ||||
| 						err = io.EOF // EOF is okay if we read at least one stream | ||||
| 					} | ||||
| 					errors.Panic(err) | ||||
| 				} | ||||
|  | ||||
| 				// Read stream header. | ||||
| 				if zr.rd.ReadBitsBE64(16) != hdrMagic { | ||||
| 					panicf(errors.Corrupted, "invalid stream magic") | ||||
| 				} | ||||
| 				if ver := zr.rd.ReadBitsBE64(8); ver != 'h' { | ||||
| 					if ver == '0' { | ||||
| 						panicf(errors.Deprecated, "bzip1 format is not supported") | ||||
| 					} | ||||
| 					panicf(errors.Corrupted, "invalid version: %q", ver) | ||||
| 				} | ||||
| 				lvl := int(zr.rd.ReadBitsBE64(8)) - '0' | ||||
| 				if lvl < BestSpeed || lvl > BestCompression { | ||||
| 					panicf(errors.Corrupted, "invalid block size: %d", lvl*blockSize) | ||||
| 				} | ||||
| 				zr.level = lvl | ||||
| 				zr.rdHdrFtr++ | ||||
| 			} else { | ||||
| 				// Check and update the CRC. | ||||
| 				if internal.GoFuzz { | ||||
| 					zr.updateChecksum(-1, zr.crc.val) // Update with value | ||||
| 					zr.blkCRC = zr.crc.val            // Suppress CRC failures | ||||
| 				} | ||||
| 				if zr.blkCRC != zr.crc.val { | ||||
| 					panicf(errors.Corrupted, "mismatching block checksum") | ||||
| 				} | ||||
| 				zr.endCRC = (zr.endCRC<<1 | zr.endCRC>>31) ^ zr.blkCRC | ||||
| 			} | ||||
| 			buf := zr.decodeBlock() | ||||
| 			zr.rle.Init(buf) | ||||
| 		}() | ||||
| 		if zr.InputOffset, err = zr.rd.Flush(); zr.err == nil { | ||||
| 			zr.err = err | ||||
| 		} | ||||
| 		if zr.err != nil { | ||||
| 			zr.err = errWrap(zr.err, errors.Corrupted) | ||||
| 			return 0, zr.err | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (zr *Reader) Close() error { | ||||
| 	if zr.err == io.EOF || zr.err == errClosed { | ||||
| 		zr.rle.Init(nil) // Make sure future reads fail | ||||
| 		zr.err = errClosed | ||||
| 		return nil | ||||
| 	} | ||||
| 	return zr.err // Return the persistent error | ||||
| } | ||||
|  | ||||
| func (zr *Reader) decodeBlock() []byte { | ||||
| 	if magic := zr.rd.ReadBitsBE64(48); magic != blkMagic { | ||||
| 		if magic == endMagic { | ||||
| 			endCRC := uint32(zr.rd.ReadBitsBE64(32)) | ||||
| 			if internal.GoFuzz { | ||||
| 				zr.updateChecksum(zr.rd.BitsRead()-32, zr.endCRC) | ||||
| 				endCRC = zr.endCRC // Suppress CRC failures | ||||
| 			} | ||||
| 			if zr.endCRC != endCRC { | ||||
| 				panicf(errors.Corrupted, "mismatching stream checksum") | ||||
| 			} | ||||
| 			zr.endCRC = 0 | ||||
| 			zr.rd.ReadPads() | ||||
| 			zr.rdHdrFtr++ | ||||
| 			return nil | ||||
| 		} | ||||
| 		panicf(errors.Corrupted, "invalid block or footer magic") | ||||
| 	} | ||||
|  | ||||
| 	zr.crc.val = 0 | ||||
| 	zr.blkCRC = uint32(zr.rd.ReadBitsBE64(32)) | ||||
| 	if internal.GoFuzz { | ||||
| 		zr.updateChecksum(zr.rd.BitsRead()-32, 0) // Record offset only | ||||
| 	} | ||||
| 	if zr.rd.ReadBitsBE64(1) != 0 { | ||||
| 		panicf(errors.Deprecated, "block randomization is not supported") | ||||
| 	} | ||||
|  | ||||
| 	// Read BWT related fields. | ||||
| 	ptr := int(zr.rd.ReadBitsBE64(24)) // BWT origin pointer | ||||
|  | ||||
| 	// Read MTF related fields. | ||||
| 	var dictArr [256]uint8 | ||||
| 	dict := dictArr[:0] | ||||
| 	bmapHi := uint16(zr.rd.ReadBits(16)) | ||||
| 	for i := 0; i < 256; i, bmapHi = i+16, bmapHi>>1 { | ||||
| 		if bmapHi&1 > 0 { | ||||
| 			bmapLo := uint16(zr.rd.ReadBits(16)) | ||||
| 			for j := 0; j < 16; j, bmapLo = j+1, bmapLo>>1 { | ||||
| 				if bmapLo&1 > 0 { | ||||
| 					dict = append(dict, uint8(i+j)) | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Step 1: Prefix encoding. | ||||
| 	syms := zr.decodePrefix(len(dict)) | ||||
|  | ||||
| 	// Step 2: Move-to-front transform and run-length encoding. | ||||
| 	zr.mtf.Init(dict, zr.level*blockSize) | ||||
| 	buf := zr.mtf.Decode(syms) | ||||
|  | ||||
| 	// Step 3: Burrows-Wheeler transformation. | ||||
| 	if ptr >= len(buf) { | ||||
| 		panicf(errors.Corrupted, "origin pointer (0x%06x) exceeds block size: %d", ptr, len(buf)) | ||||
| 	} | ||||
| 	zr.bwt.Decode(buf, ptr) | ||||
|  | ||||
| 	return buf | ||||
| } | ||||
|  | ||||
| func (zr *Reader) decodePrefix(numSyms int) (syms []uint16) { | ||||
| 	numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOF symbols | ||||
| 	if numSyms < 3 { | ||||
| 		panicf(errors.Corrupted, "not enough prefix symbols: %d", numSyms) | ||||
| 	} | ||||
|  | ||||
| 	// Read information about the trees and tree selectors. | ||||
| 	var mtf internal.MoveToFront | ||||
| 	numTrees := int(zr.rd.ReadBitsBE64(3)) | ||||
| 	if numTrees < minNumTrees || numTrees > maxNumTrees { | ||||
| 		panicf(errors.Corrupted, "invalid number of prefix trees: %d", numTrees) | ||||
| 	} | ||||
| 	numSels := int(zr.rd.ReadBitsBE64(15)) | ||||
| 	if cap(zr.treeSels) < numSels { | ||||
| 		zr.treeSels = make([]uint8, numSels) | ||||
| 	} | ||||
| 	treeSels := zr.treeSels[:numSels] | ||||
| 	for i := range treeSels { | ||||
| 		sym, ok := zr.rd.TryReadSymbol(&decSel) | ||||
| 		if !ok { | ||||
| 			sym = zr.rd.ReadSymbol(&decSel) | ||||
| 		} | ||||
| 		if int(sym) >= numTrees { | ||||
| 			panicf(errors.Corrupted, "invalid prefix tree selector: %d", sym) | ||||
| 		} | ||||
| 		treeSels[i] = uint8(sym) | ||||
| 	} | ||||
| 	mtf.Decode(treeSels) | ||||
| 	zr.treeSels = treeSels | ||||
|  | ||||
| 	// Initialize prefix codes. | ||||
| 	for i := range zr.codes2D[:numTrees] { | ||||
| 		zr.codes1D[i] = zr.codes2D[i][:numSyms] | ||||
| 	} | ||||
| 	zr.rd.ReadPrefixCodes(zr.codes1D[:numTrees], zr.trees1D[:numTrees]) | ||||
|  | ||||
| 	// Read prefix encoded symbols of compressed data. | ||||
| 	var tree *prefix.Decoder | ||||
| 	var blkLen, selIdx int | ||||
| 	syms = zr.syms[:0] | ||||
| 	for { | ||||
| 		if blkLen == 0 { | ||||
| 			blkLen = numBlockSyms | ||||
| 			if selIdx >= len(treeSels) { | ||||
| 				panicf(errors.Corrupted, "not enough prefix tree selectors") | ||||
| 			} | ||||
| 			tree = &zr.trees1D[treeSels[selIdx]] | ||||
| 			selIdx++ | ||||
| 		} | ||||
| 		blkLen-- | ||||
| 		sym, ok := zr.rd.TryReadSymbol(tree) | ||||
| 		if !ok { | ||||
| 			sym = zr.rd.ReadSymbol(tree) | ||||
| 		} | ||||
|  | ||||
| 		if int(sym) == numSyms-1 { | ||||
| 			break // EOF marker | ||||
| 		} | ||||
| 		if int(sym) >= numSyms { | ||||
| 			panicf(errors.Corrupted, "invalid prefix symbol: %d", sym) | ||||
| 		} | ||||
| 		if len(syms) >= zr.level*blockSize { | ||||
| 			panicf(errors.Corrupted, "number of prefix symbols exceeds block size") | ||||
| 		} | ||||
| 		syms = append(syms, uint16(sym)) | ||||
| 	} | ||||
| 	zr.syms = syms | ||||
| 	return syms | ||||
| } | ||||
							
								
								
									
										101
									
								
								vendor/github.com/dsnet/compress/bzip2/rle1.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								vendor/github.com/dsnet/compress/bzip2/rle1.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,101 @@ | ||||
| // Copyright 2015, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| package bzip2 | ||||
|  | ||||
| import "github.com/dsnet/compress/internal/errors" | ||||
|  | ||||
| // rleDone is a special "error" to indicate that the RLE stage is done. | ||||
| var rleDone = errorf(errors.Unknown, "RLE1 stage is completed") | ||||
|  | ||||
| // runLengthEncoding implements the first RLE stage of bzip2. Every sequence | ||||
| // of 4..255 duplicated bytes is replaced by only the first 4 bytes, and a | ||||
| // single byte representing the repeat length. Similar to the C bzip2 | ||||
| // implementation, the encoder will always terminate repeat sequences with a | ||||
| // count (even if it is the end of the buffer), and it will also never produce | ||||
| // run lengths of 256..259. The decoder can handle the latter case. | ||||
| // | ||||
| // For example, if the input was: | ||||
| //	input:  "AAAAAAABBBBCCCD" | ||||
| // | ||||
| // Then the output will be: | ||||
| //	output: "AAAA\x03BBBB\x00CCCD" | ||||
| type runLengthEncoding struct { | ||||
| 	buf     []byte | ||||
| 	idx     int | ||||
| 	lastVal byte | ||||
| 	lastCnt int | ||||
| } | ||||
|  | ||||
| func (rle *runLengthEncoding) Init(buf []byte) { | ||||
| 	*rle = runLengthEncoding{buf: buf} | ||||
| } | ||||
|  | ||||
| func (rle *runLengthEncoding) Write(buf []byte) (int, error) { | ||||
| 	for i, b := range buf { | ||||
| 		if rle.lastVal != b { | ||||
| 			rle.lastCnt = 0 | ||||
| 		} | ||||
| 		rle.lastCnt++ | ||||
| 		switch { | ||||
| 		case rle.lastCnt < 4: | ||||
| 			if rle.idx >= len(rle.buf) { | ||||
| 				return i, rleDone | ||||
| 			} | ||||
| 			rle.buf[rle.idx] = b | ||||
| 			rle.idx++ | ||||
| 		case rle.lastCnt == 4: | ||||
| 			if rle.idx+1 >= len(rle.buf) { | ||||
| 				return i, rleDone | ||||
| 			} | ||||
| 			rle.buf[rle.idx] = b | ||||
| 			rle.idx++ | ||||
| 			rle.buf[rle.idx] = 0 | ||||
| 			rle.idx++ | ||||
| 		case rle.lastCnt < 256: | ||||
| 			rle.buf[rle.idx-1]++ | ||||
| 		default: | ||||
| 			if rle.idx >= len(rle.buf) { | ||||
| 				return i, rleDone | ||||
| 			} | ||||
| 			rle.lastCnt = 1 | ||||
| 			rle.buf[rle.idx] = b | ||||
| 			rle.idx++ | ||||
| 		} | ||||
| 		rle.lastVal = b | ||||
| 	} | ||||
| 	return len(buf), nil | ||||
| } | ||||
|  | ||||
| func (rle *runLengthEncoding) Read(buf []byte) (int, error) { | ||||
| 	for i := range buf { | ||||
| 		switch { | ||||
| 		case rle.lastCnt == -4: | ||||
| 			if rle.idx >= len(rle.buf) { | ||||
| 				return i, errorf(errors.Corrupted, "missing terminating run-length repeater") | ||||
| 			} | ||||
| 			rle.lastCnt = int(rle.buf[rle.idx]) | ||||
| 			rle.idx++ | ||||
| 			if rle.lastCnt > 0 { | ||||
| 				break // Break the switch | ||||
| 			} | ||||
| 			fallthrough // Count was zero, continue the work | ||||
| 		case rle.lastCnt <= 0: | ||||
| 			if rle.idx >= len(rle.buf) { | ||||
| 				return i, rleDone | ||||
| 			} | ||||
| 			b := rle.buf[rle.idx] | ||||
| 			rle.idx++ | ||||
| 			if b != rle.lastVal { | ||||
| 				rle.lastCnt = 0 | ||||
| 				rle.lastVal = b | ||||
| 			} | ||||
| 		} | ||||
| 		buf[i] = rle.lastVal | ||||
| 		rle.lastCnt-- | ||||
| 	} | ||||
| 	return len(buf), nil | ||||
| } | ||||
|  | ||||
| func (rle *runLengthEncoding) Bytes() []byte { return rle.buf[:rle.idx] } | ||||
							
								
								
									
										307
									
								
								vendor/github.com/dsnet/compress/bzip2/writer.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										307
									
								
								vendor/github.com/dsnet/compress/bzip2/writer.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,307 @@ | ||||
| // Copyright 2015, Joe Tsai. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE.md file. | ||||
|  | ||||
| package bzip2 | ||||
|  | ||||
| import ( | ||||
| 	"io" | ||||
|  | ||||
| 	"github.com/dsnet/compress/internal" | ||||
| 	"github.com/dsnet/compress/internal/errors" | ||||
| 	"github.com/dsnet/compress/internal/prefix" | ||||
| ) | ||||
|  | ||||
| type Writer struct { | ||||
| 	InputOffset  int64 // Total number of bytes issued to Write | ||||
| 	OutputOffset int64 // Total number of bytes written to underlying io.Writer | ||||
|  | ||||
| 	wr     prefixWriter | ||||
| 	err    error | ||||
| 	level  int    // The current compression level | ||||
| 	wrHdr  bool   // Have we written the stream header? | ||||
| 	blkCRC uint32 // CRC-32 IEEE of each block | ||||
| 	endCRC uint32 // Checksum of all blocks using bzip2's custom method | ||||
|  | ||||
| 	crc crc | ||||
| 	rle runLengthEncoding | ||||
| 	bwt burrowsWheelerTransform | ||||
| 	mtf moveToFront | ||||
|  | ||||
| 	// These fields are allocated with Writer and re-used later. | ||||
| 	buf         []byte | ||||
| 	treeSels    []uint8 | ||||
| 	treeSelsMTF []uint8 | ||||
| 	codes2D     [maxNumTrees][maxNumSyms]prefix.PrefixCode | ||||
| 	codes1D     [maxNumTrees]prefix.PrefixCodes | ||||
| 	trees1D     [maxNumTrees]prefix.Encoder | ||||
| } | ||||
|  | ||||
| type WriterConfig struct { | ||||
| 	Level int | ||||
|  | ||||
| 	_ struct{} // Blank field to prevent unkeyed struct literals | ||||
| } | ||||
|  | ||||
| func NewWriter(w io.Writer, conf *WriterConfig) (*Writer, error) { | ||||
| 	var lvl int | ||||
| 	if conf != nil { | ||||
| 		lvl = conf.Level | ||||
| 	} | ||||
| 	if lvl == 0 { | ||||
| 		lvl = DefaultCompression | ||||
| 	} | ||||
| 	if lvl < BestSpeed || lvl > BestCompression { | ||||
| 		return nil, errorf(errors.Invalid, "compression level: %d", lvl) | ||||
| 	} | ||||
| 	zw := new(Writer) | ||||
| 	zw.level = lvl | ||||
| 	zw.Reset(w) | ||||
| 	return zw, nil | ||||
| } | ||||
|  | ||||
| func (zw *Writer) Reset(w io.Writer) error { | ||||
| 	*zw = Writer{ | ||||
| 		wr:    zw.wr, | ||||
| 		level: zw.level, | ||||
|  | ||||
| 		rle: zw.rle, | ||||
| 		bwt: zw.bwt, | ||||
| 		mtf: zw.mtf, | ||||
|  | ||||
| 		buf:         zw.buf, | ||||
| 		treeSels:    zw.treeSels, | ||||
| 		treeSelsMTF: zw.treeSelsMTF, | ||||
| 		trees1D:     zw.trees1D, | ||||
| 	} | ||||
| 	zw.wr.Init(w) | ||||
| 	if len(zw.buf) != zw.level*blockSize { | ||||
| 		zw.buf = make([]byte, zw.level*blockSize) | ||||
| 	} | ||||
| 	zw.rle.Init(zw.buf) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (zw *Writer) Write(buf []byte) (int, error) { | ||||
| 	if zw.err != nil { | ||||
| 		return 0, zw.err | ||||
| 	} | ||||
|  | ||||
| 	cnt := len(buf) | ||||
| 	for { | ||||
| 		wrCnt, err := zw.rle.Write(buf) | ||||
| 		if err != rleDone && zw.err == nil { | ||||
| 			zw.err = err | ||||
| 		} | ||||
| 		zw.crc.update(buf[:wrCnt]) | ||||
| 		buf = buf[wrCnt:] | ||||
| 		if len(buf) == 0 { | ||||
| 			zw.InputOffset += int64(cnt) | ||||
| 			return cnt, nil | ||||
| 		} | ||||
| 		if zw.err = zw.flush(); zw.err != nil { | ||||
| 			return 0, zw.err | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (zw *Writer) flush() error { | ||||
| 	vals := zw.rle.Bytes() | ||||
| 	if len(vals) == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
| 	zw.wr.Offset = zw.OutputOffset | ||||
| 	func() { | ||||
| 		defer errors.Recover(&zw.err) | ||||
| 		if !zw.wrHdr { | ||||
| 			// Write stream header. | ||||
| 			zw.wr.WriteBitsBE64(hdrMagic, 16) | ||||
| 			zw.wr.WriteBitsBE64('h', 8) | ||||
| 			zw.wr.WriteBitsBE64(uint64('0'+zw.level), 8) | ||||
| 			zw.wrHdr = true | ||||
| 		} | ||||
| 		zw.encodeBlock(vals) | ||||
| 	}() | ||||
| 	var err error | ||||
| 	if zw.OutputOffset, err = zw.wr.Flush(); zw.err == nil { | ||||
| 		zw.err = err | ||||
| 	} | ||||
| 	if zw.err != nil { | ||||
| 		zw.err = errWrap(zw.err, errors.Internal) | ||||
| 		return zw.err | ||||
| 	} | ||||
| 	zw.endCRC = (zw.endCRC<<1 | zw.endCRC>>31) ^ zw.blkCRC | ||||
| 	zw.blkCRC = 0 | ||||
| 	zw.rle.Init(zw.buf) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (zw *Writer) Close() error { | ||||
| 	if zw.err == errClosed { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	// Flush RLE buffer if there is left-over data. | ||||
| 	if zw.err = zw.flush(); zw.err != nil { | ||||
| 		return zw.err | ||||
| 	} | ||||
|  | ||||
| 	// Write stream footer. | ||||
| 	zw.wr.Offset = zw.OutputOffset | ||||
| 	func() { | ||||
| 		defer errors.Recover(&zw.err) | ||||
| 		if !zw.wrHdr { | ||||
| 			// Write stream header. | ||||
| 			zw.wr.WriteBitsBE64(hdrMagic, 16) | ||||
| 			zw.wr.WriteBitsBE64('h', 8) | ||||
| 			zw.wr.WriteBitsBE64(uint64('0'+zw.level), 8) | ||||
| 			zw.wrHdr = true | ||||
| 		} | ||||
| 		zw.wr.WriteBitsBE64(endMagic, 48) | ||||
| 		zw.wr.WriteBitsBE64(uint64(zw.endCRC), 32) | ||||
| 		zw.wr.WritePads(0) | ||||
| 	}() | ||||
| 	var err error | ||||
| 	if zw.OutputOffset, err = zw.wr.Flush(); zw.err == nil { | ||||
| 		zw.err = err | ||||
| 	} | ||||
| 	if zw.err != nil { | ||||
| 		zw.err = errWrap(zw.err, errors.Internal) | ||||
| 		return zw.err | ||||
| 	} | ||||
|  | ||||
| 	zw.err = errClosed | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (zw *Writer) encodeBlock(buf []byte) { | ||||
| 	zw.blkCRC = zw.crc.val | ||||
| 	zw.wr.WriteBitsBE64(blkMagic, 48) | ||||
| 	zw.wr.WriteBitsBE64(uint64(zw.blkCRC), 32) | ||||
| 	zw.wr.WriteBitsBE64(0, 1) | ||||
| 	zw.crc.val = 0 | ||||
|  | ||||
| 	// Step 1: Burrows-Wheeler transformation. | ||||
| 	ptr := zw.bwt.Encode(buf) | ||||
| 	zw.wr.WriteBitsBE64(uint64(ptr), 24) | ||||
|  | ||||
| 	// Step 2: Move-to-front transform and run-length encoding. | ||||
| 	var dictMap [256]bool | ||||
| 	for _, c := range buf { | ||||
| 		dictMap[c] = true | ||||
| 	} | ||||
|  | ||||
| 	var dictArr [256]uint8 | ||||
| 	var bmapLo [16]uint16 | ||||
| 	dict := dictArr[:0] | ||||
| 	bmapHi := uint16(0) | ||||
| 	for i, b := range dictMap { | ||||
| 		if b { | ||||
| 			c := uint8(i) | ||||
| 			dict = append(dict, c) | ||||
| 			bmapHi |= 1 << (c >> 4) | ||||
| 			bmapLo[c>>4] |= 1 << (c & 0xf) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	zw.wr.WriteBits(uint(bmapHi), 16) | ||||
| 	for _, m := range bmapLo { | ||||
| 		if m > 0 { | ||||
| 			zw.wr.WriteBits(uint(m), 16) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	zw.mtf.Init(dict, len(buf)) | ||||
| 	syms := zw.mtf.Encode(buf) | ||||
|  | ||||
| 	// Step 3: Prefix encoding. | ||||
| 	zw.encodePrefix(syms, len(dict)) | ||||
| } | ||||
|  | ||||
| func (zw *Writer) encodePrefix(syms []uint16, numSyms int) { | ||||
| 	numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOB symbols | ||||
| 	if numSyms < 3 { | ||||
| 		panicf(errors.Internal, "unable to encode EOB marker") | ||||
| 	} | ||||
| 	syms = append(syms, uint16(numSyms-1)) // EOB marker | ||||
|  | ||||
| 	// Compute number of prefix trees needed. | ||||
| 	numTrees := maxNumTrees | ||||
| 	for i, lim := range []int{200, 600, 1200, 2400} { | ||||
| 		if len(syms) < lim { | ||||
| 			numTrees = minNumTrees + i | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Compute number of block selectors. | ||||
| 	numSels := (len(syms) + numBlockSyms - 1) / numBlockSyms | ||||
| 	if cap(zw.treeSels) < numSels { | ||||
| 		zw.treeSels = make([]uint8, numSels) | ||||
| 	} | ||||
| 	treeSels := zw.treeSels[:numSels] | ||||
| 	for i := range treeSels { | ||||
| 		treeSels[i] = uint8(i % numTrees) | ||||
| 	} | ||||
|  | ||||
| 	// Initialize prefix codes. | ||||
| 	for i := range zw.codes2D[:numTrees] { | ||||
| 		pc := zw.codes2D[i][:numSyms] | ||||
| 		for j := range pc { | ||||
| 			pc[j] = prefix.PrefixCode{Sym: uint32(j)} | ||||
| 		} | ||||
| 		zw.codes1D[i] = pc | ||||
| 	} | ||||
|  | ||||
| 	// First cut at assigning prefix trees to each group. | ||||
| 	var codes prefix.PrefixCodes | ||||
| 	var blkLen, selIdx int | ||||
| 	for _, sym := range syms { | ||||
| 		if blkLen == 0 { | ||||
| 			blkLen = numBlockSyms | ||||
| 			codes = zw.codes2D[treeSels[selIdx]][:numSyms] | ||||
| 			selIdx++ | ||||
| 		} | ||||
| 		blkLen-- | ||||
| 		codes[sym].Cnt++ | ||||
| 	} | ||||
|  | ||||
| 	// TODO(dsnet): Use K-means to cluster groups to each prefix tree. | ||||
|  | ||||
| 	// Generate lengths and prefixes based on symbol frequencies. | ||||
| 	for i := range zw.trees1D[:numTrees] { | ||||
| 		pc := prefix.PrefixCodes(zw.codes2D[i][:numSyms]) | ||||
| 		pc.SortByCount() | ||||
| 		if err := prefix.GenerateLengths(pc, maxPrefixBits); err != nil { | ||||
| 			errors.Panic(err) | ||||
| 		} | ||||
| 		pc.SortBySymbol() | ||||
| 	} | ||||
|  | ||||
| 	// Write out information about the trees and tree selectors. | ||||
| 	var mtf internal.MoveToFront | ||||
| 	zw.wr.WriteBitsBE64(uint64(numTrees), 3) | ||||
| 	zw.wr.WriteBitsBE64(uint64(numSels), 15) | ||||
| 	zw.treeSelsMTF = append(zw.treeSelsMTF[:0], treeSels...) | ||||
| 	mtf.Encode(zw.treeSelsMTF) | ||||
| 	for _, sym := range zw.treeSelsMTF { | ||||
| 		zw.wr.WriteSymbol(uint(sym), &encSel) | ||||
| 	} | ||||
| 	zw.wr.WritePrefixCodes(zw.codes1D[:numTrees], zw.trees1D[:numTrees]) | ||||
|  | ||||
| 	// Write out prefix encoded symbols of compressed data. | ||||
| 	var tree *prefix.Encoder | ||||
| 	blkLen, selIdx = 0, 0 | ||||
| 	for _, sym := range syms { | ||||
| 		if blkLen == 0 { | ||||
| 			blkLen = numBlockSyms | ||||
| 			tree = &zw.trees1D[treeSels[selIdx]] | ||||
| 			selIdx++ | ||||
| 		} | ||||
| 		blkLen-- | ||||
| 		ok := zw.wr.TryWriteSymbol(uint(sym), tree) | ||||
| 		if !ok { | ||||
| 			zw.wr.WriteSymbol(uint(sym), tree) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
		Reference in New Issue
	
	Block a user