mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-29 10:57:44 +09:00 
			
		
		
		
	Dump: add output format tar and output to stdout (#10376)
* Dump: Use mholt/archive/v3 to support tar including many compressions Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Allow dump output to stdout Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file" Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: also dump LFS (fixes #10058) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Use log.Info instead of fmt.Fprintf Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * import ordering * make fmt Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Matti R <matti@mdranta.net>
This commit is contained in:
		
							
								
								
									
										684
									
								
								vendor/github.com/klauspost/compress/fse/compress.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										684
									
								
								vendor/github.com/klauspost/compress/fse/compress.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,684 @@ | ||||
| // Copyright 2018 Klaus Post. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| // Based on work Copyright (c) 2013, Yann Collet, released under BSD License. | ||||
|  | ||||
| package fse | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| ) | ||||
|  | ||||
| // Compress the input bytes. Input must be < 2GB. | ||||
| // Provide a Scratch buffer to avoid memory allocations. | ||||
| // Note that the output is also kept in the scratch buffer. | ||||
| // If input is too hard to compress, ErrIncompressible is returned. | ||||
| // If input is a single byte value repeated ErrUseRLE is returned. | ||||
| func Compress(in []byte, s *Scratch) ([]byte, error) { | ||||
| 	if len(in) <= 1 { | ||||
| 		return nil, ErrIncompressible | ||||
| 	} | ||||
| 	if len(in) > (2<<30)-1 { | ||||
| 		return nil, errors.New("input too big, must be < 2GB") | ||||
| 	} | ||||
| 	s, err := s.prepare(in) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	// Create histogram, if none was provided. | ||||
| 	maxCount := s.maxCount | ||||
| 	if maxCount == 0 { | ||||
| 		maxCount = s.countSimple(in) | ||||
| 	} | ||||
| 	// Reset for next run. | ||||
| 	s.clearCount = true | ||||
| 	s.maxCount = 0 | ||||
| 	if maxCount == len(in) { | ||||
| 		// One symbol, use RLE | ||||
| 		return nil, ErrUseRLE | ||||
| 	} | ||||
| 	if maxCount == 1 || maxCount < (len(in)>>7) { | ||||
| 		// Each symbol present maximum once or too well distributed. | ||||
| 		return nil, ErrIncompressible | ||||
| 	} | ||||
| 	s.optimalTableLog() | ||||
| 	err = s.normalizeCount() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	err = s.writeCount() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	if false { | ||||
| 		err = s.validateNorm() | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	err = s.buildCTable() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	err = s.compress(in) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	s.Out = s.bw.out | ||||
| 	// Check if we compressed. | ||||
| 	if len(s.Out) >= len(in) { | ||||
| 		return nil, ErrIncompressible | ||||
| 	} | ||||
| 	return s.Out, nil | ||||
| } | ||||
|  | ||||
| // cState contains the compression state of a stream. | ||||
| type cState struct { | ||||
| 	bw         *bitWriter | ||||
| 	stateTable []uint16 | ||||
| 	state      uint16 | ||||
| } | ||||
|  | ||||
| // init will initialize the compression state to the first symbol of the stream. | ||||
| func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTransform) { | ||||
| 	c.bw = bw | ||||
| 	c.stateTable = ct.stateTable | ||||
|  | ||||
| 	nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16 | ||||
| 	im := int32((nbBitsOut << 16) - first.deltaNbBits) | ||||
| 	lu := (im >> nbBitsOut) + first.deltaFindState | ||||
| 	c.state = c.stateTable[lu] | ||||
| 	return | ||||
| } | ||||
|  | ||||
| // encode the output symbol provided and write it to the bitstream. | ||||
| func (c *cState) encode(symbolTT symbolTransform) { | ||||
| 	nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 | ||||
| 	dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState | ||||
| 	c.bw.addBits16NC(c.state, uint8(nbBitsOut)) | ||||
| 	c.state = c.stateTable[dstState] | ||||
| } | ||||
|  | ||||
| // encode the output symbol provided and write it to the bitstream. | ||||
| func (c *cState) encodeZero(symbolTT symbolTransform) { | ||||
| 	nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 | ||||
| 	dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState | ||||
| 	c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut)) | ||||
| 	c.state = c.stateTable[dstState] | ||||
| } | ||||
|  | ||||
| // flush will write the tablelog to the output and flush the remaining full bytes. | ||||
| func (c *cState) flush(tableLog uint8) { | ||||
| 	c.bw.flush32() | ||||
| 	c.bw.addBits16NC(c.state, tableLog) | ||||
| 	c.bw.flush() | ||||
| } | ||||
|  | ||||
| // compress is the main compression loop that will encode the input from the last byte to the first. | ||||
| func (s *Scratch) compress(src []byte) error { | ||||
| 	if len(src) <= 2 { | ||||
| 		return errors.New("compress: src too small") | ||||
| 	} | ||||
| 	tt := s.ct.symbolTT[:256] | ||||
| 	s.bw.reset(s.Out) | ||||
|  | ||||
| 	// Our two states each encodes every second byte. | ||||
| 	// Last byte encoded (first byte decoded) will always be encoded by c1. | ||||
| 	var c1, c2 cState | ||||
|  | ||||
| 	// Encode so remaining size is divisible by 4. | ||||
| 	ip := len(src) | ||||
| 	if ip&1 == 1 { | ||||
| 		c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) | ||||
| 		c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) | ||||
| 		c1.encodeZero(tt[src[ip-3]]) | ||||
| 		ip -= 3 | ||||
| 	} else { | ||||
| 		c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) | ||||
| 		c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) | ||||
| 		ip -= 2 | ||||
| 	} | ||||
| 	if ip&2 != 0 { | ||||
| 		c2.encodeZero(tt[src[ip-1]]) | ||||
| 		c1.encodeZero(tt[src[ip-2]]) | ||||
| 		ip -= 2 | ||||
| 	} | ||||
|  | ||||
| 	// Main compression loop. | ||||
| 	switch { | ||||
| 	case !s.zeroBits && s.actualTableLog <= 8: | ||||
| 		// We can encode 4 symbols without requiring a flush. | ||||
| 		// We do not need to check if any output is 0 bits. | ||||
| 		for ip >= 4 { | ||||
| 			s.bw.flush32() | ||||
| 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] | ||||
| 			c2.encode(tt[v0]) | ||||
| 			c1.encode(tt[v1]) | ||||
| 			c2.encode(tt[v2]) | ||||
| 			c1.encode(tt[v3]) | ||||
| 			ip -= 4 | ||||
| 		} | ||||
| 	case !s.zeroBits: | ||||
| 		// We do not need to check if any output is 0 bits. | ||||
| 		for ip >= 4 { | ||||
| 			s.bw.flush32() | ||||
| 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] | ||||
| 			c2.encode(tt[v0]) | ||||
| 			c1.encode(tt[v1]) | ||||
| 			s.bw.flush32() | ||||
| 			c2.encode(tt[v2]) | ||||
| 			c1.encode(tt[v3]) | ||||
| 			ip -= 4 | ||||
| 		} | ||||
| 	case s.actualTableLog <= 8: | ||||
| 		// We can encode 4 symbols without requiring a flush | ||||
| 		for ip >= 4 { | ||||
| 			s.bw.flush32() | ||||
| 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] | ||||
| 			c2.encodeZero(tt[v0]) | ||||
| 			c1.encodeZero(tt[v1]) | ||||
| 			c2.encodeZero(tt[v2]) | ||||
| 			c1.encodeZero(tt[v3]) | ||||
| 			ip -= 4 | ||||
| 		} | ||||
| 	default: | ||||
| 		for ip >= 4 { | ||||
| 			s.bw.flush32() | ||||
| 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] | ||||
| 			c2.encodeZero(tt[v0]) | ||||
| 			c1.encodeZero(tt[v1]) | ||||
| 			s.bw.flush32() | ||||
| 			c2.encodeZero(tt[v2]) | ||||
| 			c1.encodeZero(tt[v3]) | ||||
| 			ip -= 4 | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Flush final state. | ||||
| 	// Used to initialize state when decoding. | ||||
| 	c2.flush(s.actualTableLog) | ||||
| 	c1.flush(s.actualTableLog) | ||||
|  | ||||
| 	return s.bw.close() | ||||
| } | ||||
|  | ||||
| // writeCount will write the normalized histogram count to header. | ||||
| // This is read back by readNCount. | ||||
| func (s *Scratch) writeCount() error { | ||||
| 	var ( | ||||
| 		tableLog  = s.actualTableLog | ||||
| 		tableSize = 1 << tableLog | ||||
| 		previous0 bool | ||||
| 		charnum   uint16 | ||||
|  | ||||
| 		maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 | ||||
|  | ||||
| 		// Write Table Size | ||||
| 		bitStream = uint32(tableLog - minTablelog) | ||||
| 		bitCount  = uint(4) | ||||
| 		remaining = int16(tableSize + 1) /* +1 for extra accuracy */ | ||||
| 		threshold = int16(tableSize) | ||||
| 		nbBits    = uint(tableLog + 1) | ||||
| 	) | ||||
| 	if cap(s.Out) < maxHeaderSize { | ||||
| 		s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize) | ||||
| 	} | ||||
| 	outP := uint(0) | ||||
| 	out := s.Out[:maxHeaderSize] | ||||
|  | ||||
| 	// stops at 1 | ||||
| 	for remaining > 1 { | ||||
| 		if previous0 { | ||||
| 			start := charnum | ||||
| 			for s.norm[charnum] == 0 { | ||||
| 				charnum++ | ||||
| 			} | ||||
| 			for charnum >= start+24 { | ||||
| 				start += 24 | ||||
| 				bitStream += uint32(0xFFFF) << bitCount | ||||
| 				out[outP] = byte(bitStream) | ||||
| 				out[outP+1] = byte(bitStream >> 8) | ||||
| 				outP += 2 | ||||
| 				bitStream >>= 16 | ||||
| 			} | ||||
| 			for charnum >= start+3 { | ||||
| 				start += 3 | ||||
| 				bitStream += 3 << bitCount | ||||
| 				bitCount += 2 | ||||
| 			} | ||||
| 			bitStream += uint32(charnum-start) << bitCount | ||||
| 			bitCount += 2 | ||||
| 			if bitCount > 16 { | ||||
| 				out[outP] = byte(bitStream) | ||||
| 				out[outP+1] = byte(bitStream >> 8) | ||||
| 				outP += 2 | ||||
| 				bitStream >>= 16 | ||||
| 				bitCount -= 16 | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		count := s.norm[charnum] | ||||
| 		charnum++ | ||||
| 		max := (2*threshold - 1) - remaining | ||||
| 		if count < 0 { | ||||
| 			remaining += count | ||||
| 		} else { | ||||
| 			remaining -= count | ||||
| 		} | ||||
| 		count++ // +1 for extra accuracy | ||||
| 		if count >= threshold { | ||||
| 			count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ | ||||
| 		} | ||||
| 		bitStream += uint32(count) << bitCount | ||||
| 		bitCount += nbBits | ||||
| 		if count < max { | ||||
| 			bitCount-- | ||||
| 		} | ||||
|  | ||||
| 		previous0 = count == 1 | ||||
| 		if remaining < 1 { | ||||
| 			return errors.New("internal error: remaining<1") | ||||
| 		} | ||||
| 		for remaining < threshold { | ||||
| 			nbBits-- | ||||
| 			threshold >>= 1 | ||||
| 		} | ||||
|  | ||||
| 		if bitCount > 16 { | ||||
| 			out[outP] = byte(bitStream) | ||||
| 			out[outP+1] = byte(bitStream >> 8) | ||||
| 			outP += 2 | ||||
| 			bitStream >>= 16 | ||||
| 			bitCount -= 16 | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	out[outP] = byte(bitStream) | ||||
| 	out[outP+1] = byte(bitStream >> 8) | ||||
| 	outP += (bitCount + 7) / 8 | ||||
|  | ||||
| 	if uint16(charnum) > s.symbolLen { | ||||
| 		return errors.New("internal error: charnum > s.symbolLen") | ||||
| 	} | ||||
| 	s.Out = out[:outP] | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // symbolTransform contains the state transform for a symbol. | ||||
| type symbolTransform struct { | ||||
| 	deltaFindState int32 | ||||
| 	deltaNbBits    uint32 | ||||
| } | ||||
|  | ||||
| // String prints values as a human readable string. | ||||
| func (s symbolTransform) String() string { | ||||
| 	return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState) | ||||
| } | ||||
|  | ||||
| // cTable contains tables used for compression. | ||||
| type cTable struct { | ||||
| 	tableSymbol []byte | ||||
| 	stateTable  []uint16 | ||||
| 	symbolTT    []symbolTransform | ||||
| } | ||||
|  | ||||
| // allocCtable will allocate tables needed for compression. | ||||
| // If existing tables a re big enough, they are simply re-used. | ||||
| func (s *Scratch) allocCtable() { | ||||
| 	tableSize := 1 << s.actualTableLog | ||||
| 	// get tableSymbol that is big enough. | ||||
| 	if cap(s.ct.tableSymbol) < int(tableSize) { | ||||
| 		s.ct.tableSymbol = make([]byte, tableSize) | ||||
| 	} | ||||
| 	s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] | ||||
|  | ||||
| 	ctSize := tableSize | ||||
| 	if cap(s.ct.stateTable) < ctSize { | ||||
| 		s.ct.stateTable = make([]uint16, ctSize) | ||||
| 	} | ||||
| 	s.ct.stateTable = s.ct.stateTable[:ctSize] | ||||
|  | ||||
| 	if cap(s.ct.symbolTT) < 256 { | ||||
| 		s.ct.symbolTT = make([]symbolTransform, 256) | ||||
| 	} | ||||
| 	s.ct.symbolTT = s.ct.symbolTT[:256] | ||||
| } | ||||
|  | ||||
| // buildCTable will populate the compression table so it is ready to be used. | ||||
| func (s *Scratch) buildCTable() error { | ||||
| 	tableSize := uint32(1 << s.actualTableLog) | ||||
| 	highThreshold := tableSize - 1 | ||||
| 	var cumul [maxSymbolValue + 2]int16 | ||||
|  | ||||
| 	s.allocCtable() | ||||
| 	tableSymbol := s.ct.tableSymbol[:tableSize] | ||||
| 	// symbol start positions | ||||
| 	{ | ||||
| 		cumul[0] = 0 | ||||
| 		for ui, v := range s.norm[:s.symbolLen-1] { | ||||
| 			u := byte(ui) // one less than reference | ||||
| 			if v == -1 { | ||||
| 				// Low proba symbol | ||||
| 				cumul[u+1] = cumul[u] + 1 | ||||
| 				tableSymbol[highThreshold] = u | ||||
| 				highThreshold-- | ||||
| 			} else { | ||||
| 				cumul[u+1] = cumul[u] + v | ||||
| 			} | ||||
| 		} | ||||
| 		// Encode last symbol separately to avoid overflowing u | ||||
| 		u := int(s.symbolLen - 1) | ||||
| 		v := s.norm[s.symbolLen-1] | ||||
| 		if v == -1 { | ||||
| 			// Low proba symbol | ||||
| 			cumul[u+1] = cumul[u] + 1 | ||||
| 			tableSymbol[highThreshold] = byte(u) | ||||
| 			highThreshold-- | ||||
| 		} else { | ||||
| 			cumul[u+1] = cumul[u] + v | ||||
| 		} | ||||
| 		if uint32(cumul[s.symbolLen]) != tableSize { | ||||
| 			return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize) | ||||
| 		} | ||||
| 		cumul[s.symbolLen] = int16(tableSize) + 1 | ||||
| 	} | ||||
| 	// Spread symbols | ||||
| 	s.zeroBits = false | ||||
| 	{ | ||||
| 		step := tableStep(tableSize) | ||||
| 		tableMask := tableSize - 1 | ||||
| 		var position uint32 | ||||
| 		// if any symbol > largeLimit, we may have 0 bits output. | ||||
| 		largeLimit := int16(1 << (s.actualTableLog - 1)) | ||||
| 		for ui, v := range s.norm[:s.symbolLen] { | ||||
| 			symbol := byte(ui) | ||||
| 			if v > largeLimit { | ||||
| 				s.zeroBits = true | ||||
| 			} | ||||
| 			for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ { | ||||
| 				tableSymbol[position] = symbol | ||||
| 				position = (position + step) & tableMask | ||||
| 				for position > highThreshold { | ||||
| 					position = (position + step) & tableMask | ||||
| 				} /* Low proba area */ | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		// Check if we have gone through all positions | ||||
| 		if position != 0 { | ||||
| 			return errors.New("position!=0") | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Build table | ||||
| 	table := s.ct.stateTable | ||||
| 	{ | ||||
| 		tsi := int(tableSize) | ||||
| 		for u, v := range tableSymbol { | ||||
| 			// TableU16 : sorted by symbol order; gives next state value | ||||
| 			table[cumul[v]] = uint16(tsi + u) | ||||
| 			cumul[v]++ | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Build Symbol Transformation Table | ||||
| 	{ | ||||
| 		total := int16(0) | ||||
| 		symbolTT := s.ct.symbolTT[:s.symbolLen] | ||||
| 		tableLog := s.actualTableLog | ||||
| 		tl := (uint32(tableLog) << 16) - (1 << tableLog) | ||||
| 		for i, v := range s.norm[:s.symbolLen] { | ||||
| 			switch v { | ||||
| 			case 0: | ||||
| 			case -1, 1: | ||||
| 				symbolTT[i].deltaNbBits = tl | ||||
| 				symbolTT[i].deltaFindState = int32(total - 1) | ||||
| 				total++ | ||||
| 			default: | ||||
| 				maxBitsOut := uint32(tableLog) - highBits(uint32(v-1)) | ||||
| 				minStatePlus := uint32(v) << maxBitsOut | ||||
| 				symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus | ||||
| 				symbolTT[i].deltaFindState = int32(total - v) | ||||
| 				total += v | ||||
| 			} | ||||
| 		} | ||||
| 		if total != int16(tableSize) { | ||||
| 			return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // countSimple will create a simple histogram in s.count. | ||||
| // Returns the biggest count. | ||||
| // Does not update s.clearCount. | ||||
| func (s *Scratch) countSimple(in []byte) (max int) { | ||||
| 	for _, v := range in { | ||||
| 		s.count[v]++ | ||||
| 	} | ||||
| 	m := uint32(0) | ||||
| 	for i, v := range s.count[:] { | ||||
| 		if v > m { | ||||
| 			m = v | ||||
| 		} | ||||
| 		if v > 0 { | ||||
| 			s.symbolLen = uint16(i) + 1 | ||||
| 		} | ||||
| 	} | ||||
| 	return int(m) | ||||
| } | ||||
|  | ||||
| // minTableLog provides the minimum logSize to safely represent a distribution. | ||||
| func (s *Scratch) minTableLog() uint8 { | ||||
| 	minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1 | ||||
| 	minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2 | ||||
| 	if minBitsSrc < minBitsSymbols { | ||||
| 		return uint8(minBitsSrc) | ||||
| 	} | ||||
| 	return uint8(minBitsSymbols) | ||||
| } | ||||
|  | ||||
| // optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog | ||||
| func (s *Scratch) optimalTableLog() { | ||||
| 	tableLog := s.TableLog | ||||
| 	minBits := s.minTableLog() | ||||
| 	maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2 | ||||
| 	if maxBitsSrc < tableLog { | ||||
| 		// Accuracy can be reduced | ||||
| 		tableLog = maxBitsSrc | ||||
| 	} | ||||
| 	if minBits > tableLog { | ||||
| 		tableLog = minBits | ||||
| 	} | ||||
| 	// Need a minimum to safely represent all symbol values | ||||
| 	if tableLog < minTablelog { | ||||
| 		tableLog = minTablelog | ||||
| 	} | ||||
| 	if tableLog > maxTableLog { | ||||
| 		tableLog = maxTableLog | ||||
| 	} | ||||
| 	s.actualTableLog = tableLog | ||||
| } | ||||
|  | ||||
| var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000} | ||||
|  | ||||
| // normalizeCount will normalize the count of the symbols so | ||||
| // the total is equal to the table size. | ||||
| func (s *Scratch) normalizeCount() error { | ||||
| 	var ( | ||||
| 		tableLog          = s.actualTableLog | ||||
| 		scale             = 62 - uint64(tableLog) | ||||
| 		step              = (1 << 62) / uint64(s.br.remain()) | ||||
| 		vStep             = uint64(1) << (scale - 20) | ||||
| 		stillToDistribute = int16(1 << tableLog) | ||||
| 		largest           int | ||||
| 		largestP          int16 | ||||
| 		lowThreshold      = (uint32)(s.br.remain() >> tableLog) | ||||
| 	) | ||||
|  | ||||
| 	for i, cnt := range s.count[:s.symbolLen] { | ||||
| 		// already handled | ||||
| 		// if (count[s] == s.length) return 0;   /* rle special case */ | ||||
|  | ||||
| 		if cnt == 0 { | ||||
| 			s.norm[i] = 0 | ||||
| 			continue | ||||
| 		} | ||||
| 		if cnt <= lowThreshold { | ||||
| 			s.norm[i] = -1 | ||||
| 			stillToDistribute-- | ||||
| 		} else { | ||||
| 			proba := (int16)((uint64(cnt) * step) >> scale) | ||||
| 			if proba < 8 { | ||||
| 				restToBeat := vStep * uint64(rtbTable[proba]) | ||||
| 				v := uint64(cnt)*step - (uint64(proba) << scale) | ||||
| 				if v > restToBeat { | ||||
| 					proba++ | ||||
| 				} | ||||
| 			} | ||||
| 			if proba > largestP { | ||||
| 				largestP = proba | ||||
| 				largest = i | ||||
| 			} | ||||
| 			s.norm[i] = proba | ||||
| 			stillToDistribute -= proba | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if -stillToDistribute >= (s.norm[largest] >> 1) { | ||||
| 		// corner case, need another normalization method | ||||
| 		return s.normalizeCount2() | ||||
| 	} | ||||
| 	s.norm[largest] += stillToDistribute | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Secondary normalization method. | ||||
| // To be used when primary method fails. | ||||
| func (s *Scratch) normalizeCount2() error { | ||||
| 	const notYetAssigned = -2 | ||||
| 	var ( | ||||
| 		distributed  uint32 | ||||
| 		total        = uint32(s.br.remain()) | ||||
| 		tableLog     = s.actualTableLog | ||||
| 		lowThreshold = uint32(total >> tableLog) | ||||
| 		lowOne       = uint32((total * 3) >> (tableLog + 1)) | ||||
| 	) | ||||
| 	for i, cnt := range s.count[:s.symbolLen] { | ||||
| 		if cnt == 0 { | ||||
| 			s.norm[i] = 0 | ||||
| 			continue | ||||
| 		} | ||||
| 		if cnt <= lowThreshold { | ||||
| 			s.norm[i] = -1 | ||||
| 			distributed++ | ||||
| 			total -= cnt | ||||
| 			continue | ||||
| 		} | ||||
| 		if cnt <= lowOne { | ||||
| 			s.norm[i] = 1 | ||||
| 			distributed++ | ||||
| 			total -= cnt | ||||
| 			continue | ||||
| 		} | ||||
| 		s.norm[i] = notYetAssigned | ||||
| 	} | ||||
| 	toDistribute := (1 << tableLog) - distributed | ||||
|  | ||||
| 	if (total / toDistribute) > lowOne { | ||||
| 		// risk of rounding to zero | ||||
| 		lowOne = uint32((total * 3) / (toDistribute * 2)) | ||||
| 		for i, cnt := range s.count[:s.symbolLen] { | ||||
| 			if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { | ||||
| 				s.norm[i] = 1 | ||||
| 				distributed++ | ||||
| 				total -= cnt | ||||
| 				continue | ||||
| 			} | ||||
| 		} | ||||
| 		toDistribute = (1 << tableLog) - distributed | ||||
| 	} | ||||
| 	if distributed == uint32(s.symbolLen)+1 { | ||||
| 		// all values are pretty poor; | ||||
| 		//   probably incompressible data (should have already been detected); | ||||
| 		//   find max, then give all remaining points to max | ||||
| 		var maxV int | ||||
| 		var maxC uint32 | ||||
| 		for i, cnt := range s.count[:s.symbolLen] { | ||||
| 			if cnt > maxC { | ||||
| 				maxV = i | ||||
| 				maxC = cnt | ||||
| 			} | ||||
| 		} | ||||
| 		s.norm[maxV] += int16(toDistribute) | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	if total == 0 { | ||||
| 		// all of the symbols were low enough for the lowOne or lowThreshold | ||||
| 		for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) { | ||||
| 			if s.norm[i] > 0 { | ||||
| 				toDistribute-- | ||||
| 				s.norm[i]++ | ||||
| 			} | ||||
| 		} | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	var ( | ||||
| 		vStepLog = 62 - uint64(tableLog) | ||||
| 		mid      = uint64((1 << (vStepLog - 1)) - 1) | ||||
| 		rStep    = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining | ||||
| 		tmpTotal = mid | ||||
| 	) | ||||
| 	for i, cnt := range s.count[:s.symbolLen] { | ||||
| 		if s.norm[i] == notYetAssigned { | ||||
| 			var ( | ||||
| 				end    = tmpTotal + uint64(cnt)*rStep | ||||
| 				sStart = uint32(tmpTotal >> vStepLog) | ||||
| 				sEnd   = uint32(end >> vStepLog) | ||||
| 				weight = sEnd - sStart | ||||
| 			) | ||||
| 			if weight < 1 { | ||||
| 				return errors.New("weight < 1") | ||||
| 			} | ||||
| 			s.norm[i] = int16(weight) | ||||
| 			tmpTotal = end | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // validateNorm validates the normalized histogram table. | ||||
| func (s *Scratch) validateNorm() (err error) { | ||||
| 	var total int | ||||
| 	for _, v := range s.norm[:s.symbolLen] { | ||||
| 		if v >= 0 { | ||||
| 			total += int(v) | ||||
| 		} else { | ||||
| 			total -= int(v) | ||||
| 		} | ||||
| 	} | ||||
| 	defer func() { | ||||
| 		if err == nil { | ||||
| 			return | ||||
| 		} | ||||
| 		fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen) | ||||
| 		for i, v := range s.norm[:s.symbolLen] { | ||||
| 			fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v) | ||||
| 		} | ||||
| 	}() | ||||
| 	if total != (1 << s.actualTableLog) { | ||||
| 		return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog) | ||||
| 	} | ||||
| 	for i, v := range s.count[s.symbolLen:] { | ||||
| 		if v != 0 { | ||||
| 			return fmt.Errorf("warning: Found symbol out of range, %d after cut", i) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
		Reference in New Issue
	
	Block a user