mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-31 21:28:11 +09:00 
			
		
		
		
	Update to last common bleve (#3986)
This commit is contained in:
		
				
					committed by
					
						 Lunny Xiao
						Lunny Xiao
					
				
			
			
				
	
			
			
			
						parent
						
							1b7cd3d0b0
						
					
				
				
					commit
					917b9641ec
				
			
							
								
								
									
										22
									
								
								vendor/github.com/Smerity/govarint/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								vendor/github.com/Smerity/govarint/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| The MIT License (MIT) | ||||
|  | ||||
| Copyright (c) 2015 Stephen Merity | ||||
|  | ||||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
| of this software and associated documentation files (the "Software"), to deal | ||||
| in the Software without restriction, including without limitation the rights | ||||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
| copies of the Software, and to permit persons to whom the Software is | ||||
| furnished to do so, subject to the following conditions: | ||||
|  | ||||
| The above copyright notice and this permission notice shall be included in all | ||||
| copies or substantial portions of the Software. | ||||
|  | ||||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
| SOFTWARE. | ||||
|  | ||||
							
								
								
									
										67
									
								
								vendor/github.com/Smerity/govarint/README.md
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								vendor/github.com/Smerity/govarint/README.md
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,67 @@ | ||||
| # Govarint | ||||
|  | ||||
| This project aims to provide a simple API for the performant encoding and decoding of 32 and 64 bit integers using a variety of algorithms. | ||||
|  | ||||
| [](https://www.flickr.com/photos/tsevis/8648521649/) | ||||
|  | ||||
| ## Usage | ||||
|  | ||||
| Each integer encoding algorithm conforms to an encoding and decoding interface. | ||||
| The interfaces also specify the size of the unsigned integer, either 32 or 64 bits, and will be referred to as XX below. | ||||
| To create an encoder: | ||||
|  | ||||
|     NewU32Base128Encoder(w io.Writer) | ||||
|     NewU64Base128Encoder(w io.Writer) | ||||
|     NewU32GroupVarintEncoder(w io.Writer) | ||||
|  | ||||
| For encoders, the only two commands are `PutUXX` and `Close`. | ||||
| `Close` must be called as some integer encoding algorithms write in multiples. | ||||
|  | ||||
|     var buf bytes.Buffer | ||||
|     enc := NewU32Base128Encoder(&buf) | ||||
|     enc.PutU32(117) | ||||
|     enc.PutU32(343) | ||||
|     enc.Close() | ||||
|  | ||||
| To create a decoder: | ||||
|  | ||||
|     NewU32Base128Decoder(r io.ByteReader) | ||||
|     NewU64Base128Decoder(r io.ByteReader) | ||||
|     NewU32GroupVarintDecoder(r io.ByteReader) | ||||
|  | ||||
| For decoders, the only command is `GetUXX`. | ||||
| `GetUXX` returns the value and any potential errors. | ||||
| When reading is complete, `GetUXX` will return an `EOF` (End Of File). | ||||
|  | ||||
|     dec := NewU32Base128Decoder(&buf) | ||||
|     x, err := dec.GetU32() | ||||
|  | ||||
| ## Use Cases | ||||
|  | ||||
| Using fixed width integers, such as uint32 and uint64, usually waste large amounts of space, especially when encoding small values. | ||||
| Optimally, smaller numbers should take less space to represent. | ||||
|  | ||||
| Using integer encoding algorithms is especially common in specific applications, such as storing edge lists or indexes for search engines. | ||||
| In these situations, you have a sorted list of numbers that you want to keep as compactly as possible in memory. | ||||
| Additionally, by storing only the difference between the given number and the previous (delta encoding), the numbers are quite small, and thus compress well. | ||||
|  | ||||
| For an explicit example, the Web Data Commons Hyperlink Graph contains 128 billion edges linking page A to page B, where each page is represented by a 32 bit integer. | ||||
| By converting all these edges to 64 bit integers (32 | 32), sorting them, and then using delta encoding, memory usage can be reduced from 64 bits per edge down to only 9 bits per edge using the Base128 integer encoding algorithm. | ||||
| This figure improves even further if compressed using conventional compression algorithms (3 bits per edge). | ||||
|  | ||||
| ## Encodings supported | ||||
|  | ||||
| `govarint` supports: | ||||
|  | ||||
| + Base128 [32, 64] - each byte uses 7 bits for encoding the integer and 1 bit for indicating if the integer requires another byte | ||||
| + Group Varint [32] - integers are encoded in blocks of four - one byte encodes the size of the following four integers, then the values of the four integers follows | ||||
|  | ||||
| Group Varint consistently beats Base128 in decompression speed but Base128 may offer improved compression ratios depending on the distribution of the supplied integers. | ||||
|  | ||||
| ## Tests | ||||
|  | ||||
|     go test -v -bench=. | ||||
|  | ||||
| ## License | ||||
|  | ||||
| MIT License, as per `LICENSE` | ||||
							
								
								
									
										229
									
								
								vendor/github.com/Smerity/govarint/govarint.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										229
									
								
								vendor/github.com/Smerity/govarint/govarint.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,229 @@ | ||||
| package govarint | ||||
|  | ||||
| import "encoding/binary" | ||||
| import "io" | ||||
|  | ||||
| type U32VarintEncoder interface { | ||||
| 	PutU32(x uint32) int | ||||
| 	Close() | ||||
| } | ||||
|  | ||||
| type U32VarintDecoder interface { | ||||
| 	GetU32() (uint32, error) | ||||
| } | ||||
|  | ||||
| /// | ||||
|  | ||||
| type U64VarintEncoder interface { | ||||
| 	PutU64(x uint64) int | ||||
| 	Close() | ||||
| } | ||||
|  | ||||
| type U64VarintDecoder interface { | ||||
| 	GetU64() (uint64, error) | ||||
| } | ||||
|  | ||||
| /// | ||||
|  | ||||
| type U32GroupVarintEncoder struct { | ||||
| 	w     io.Writer | ||||
| 	index int | ||||
| 	store [4]uint32 | ||||
| 	temp  [17]byte | ||||
| } | ||||
|  | ||||
| func NewU32GroupVarintEncoder(w io.Writer) *U32GroupVarintEncoder { return &U32GroupVarintEncoder{w: w} } | ||||
|  | ||||
| func (b *U32GroupVarintEncoder) Flush() (int, error) { | ||||
| 	// TODO: Is it more efficient to have a tailored version that's called only in Close()? | ||||
| 	// If index is zero, there are no integers to flush | ||||
| 	if b.index == 0 { | ||||
| 		return 0, nil | ||||
| 	} | ||||
| 	// In the case we're flushing (the group isn't of size four), the non-values should be zero | ||||
| 	// This ensures the unused entries are all zero in the sizeByte | ||||
| 	for i := b.index; i < 4; i++ { | ||||
| 		b.store[i] = 0 | ||||
| 	} | ||||
| 	length := 1 | ||||
| 	// We need to reset the size byte to zero as we only bitwise OR into it, we don't overwrite it | ||||
| 	b.temp[0] = 0 | ||||
| 	for i, x := range b.store { | ||||
| 		size := byte(0) | ||||
| 		shifts := []byte{24, 16, 8, 0} | ||||
| 		for _, shift := range shifts { | ||||
| 			// Always writes at least one byte -- the first one (shift = 0) | ||||
| 			// Will write more bytes until the rest of the integer is all zeroes | ||||
| 			if (x>>shift) != 0 || shift == 0 { | ||||
| 				size += 1 | ||||
| 				b.temp[length] = byte(x >> shift) | ||||
| 				length += 1 | ||||
| 			} | ||||
| 		} | ||||
| 		// We store the size in two of the eight bits in the first byte (sizeByte) | ||||
| 		// 0 means there is one byte in total, hence why we subtract one from size | ||||
| 		b.temp[0] |= (size - 1) << (uint8(3-i) * 2) | ||||
| 	} | ||||
| 	// If we're flushing without a full group of four, remove the unused bytes we computed | ||||
| 	// This enables us to realize it's a partial group on decoding thanks to EOF | ||||
| 	if b.index != 4 { | ||||
| 		length -= 4 - b.index | ||||
| 	} | ||||
| 	_, err := b.w.Write(b.temp[:length]) | ||||
| 	return length, err | ||||
| } | ||||
|  | ||||
| func (b *U32GroupVarintEncoder) PutU32(x uint32) (int, error) { | ||||
| 	bytesWritten := 0 | ||||
| 	b.store[b.index] = x | ||||
| 	b.index += 1 | ||||
| 	if b.index == 4 { | ||||
| 		n, err := b.Flush() | ||||
| 		if err != nil { | ||||
| 			return n, err | ||||
| 		} | ||||
| 		bytesWritten += n | ||||
| 		b.index = 0 | ||||
| 	} | ||||
| 	return bytesWritten, nil | ||||
| } | ||||
|  | ||||
| func (b *U32GroupVarintEncoder) Close() { | ||||
| 	// On Close, we flush any remaining values that might not have been in a full group | ||||
| 	b.Flush() | ||||
| } | ||||
|  | ||||
| /// | ||||
|  | ||||
| type U32GroupVarintDecoder struct { | ||||
| 	r        io.ByteReader | ||||
| 	group    [4]uint32 | ||||
| 	pos      int | ||||
| 	finished bool | ||||
| 	capacity int | ||||
| } | ||||
|  | ||||
| func NewU32GroupVarintDecoder(r io.ByteReader) *U32GroupVarintDecoder { | ||||
| 	return &U32GroupVarintDecoder{r: r, pos: 4, capacity: 4} | ||||
| } | ||||
|  | ||||
| func (b *U32GroupVarintDecoder) getGroup() error { | ||||
| 	// We should always receive a sizeByte if there are more values to read | ||||
| 	sizeByte, err := b.r.ReadByte() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	// Calculate the size of the four incoming 32 bit integers | ||||
| 	// 0b00 means 1 byte to read, 0b01 = 2, etc | ||||
| 	b.group[0] = uint32((sizeByte >> 6) & 3) | ||||
| 	b.group[1] = uint32((sizeByte >> 4) & 3) | ||||
| 	b.group[2] = uint32((sizeByte >> 2) & 3) | ||||
| 	b.group[3] = uint32(sizeByte & 3) | ||||
| 	// | ||||
| 	for index, size := range b.group { | ||||
| 		b.group[index] = 0 | ||||
| 		// Any error that occurs in earlier byte reads should be repeated at the end one | ||||
| 		// Hence we only catch and report the final ReadByte's error | ||||
| 		var err error | ||||
| 		switch size { | ||||
| 		case 0: | ||||
| 			var x byte | ||||
| 			x, err = b.r.ReadByte() | ||||
| 			b.group[index] = uint32(x) | ||||
| 		case 1: | ||||
| 			var x, y byte | ||||
| 			x, _ = b.r.ReadByte() | ||||
| 			y, err = b.r.ReadByte() | ||||
| 			b.group[index] = uint32(x)<<8 | uint32(y) | ||||
| 		case 2: | ||||
| 			var x, y, z byte | ||||
| 			x, _ = b.r.ReadByte() | ||||
| 			y, _ = b.r.ReadByte() | ||||
| 			z, err = b.r.ReadByte() | ||||
| 			b.group[index] = uint32(x)<<16 | uint32(y)<<8 | uint32(z) | ||||
| 		case 3: | ||||
| 			var x, y, z, zz byte | ||||
| 			x, _ = b.r.ReadByte() | ||||
| 			y, _ = b.r.ReadByte() | ||||
| 			z, _ = b.r.ReadByte() | ||||
| 			zz, err = b.r.ReadByte() | ||||
| 			b.group[index] = uint32(x)<<24 | uint32(y)<<16 | uint32(z)<<8 | uint32(zz) | ||||
| 		} | ||||
| 		if err != nil { | ||||
| 			if err == io.EOF { | ||||
| 				// If we hit EOF here, we have found a partial group | ||||
| 				// We've return any valid entries we have read and return EOF once we run out | ||||
| 				b.capacity = index | ||||
| 				b.finished = true | ||||
| 				break | ||||
| 			} else { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	// Reset the pos pointer to the beginning of the read values | ||||
| 	b.pos = 0 | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (b *U32GroupVarintDecoder) GetU32() (uint32, error) { | ||||
| 	// Check if we have any more values to give out - if not, let's get them | ||||
| 	if b.pos == b.capacity { | ||||
| 		// If finished is set, there is nothing else to do | ||||
| 		if b.finished { | ||||
| 			return 0, io.EOF | ||||
| 		} | ||||
| 		err := b.getGroup() | ||||
| 		if err != nil { | ||||
| 			return 0, err | ||||
| 		} | ||||
| 	} | ||||
| 	// Increment pointer and return the value stored at that point | ||||
| 	b.pos += 1 | ||||
| 	return b.group[b.pos-1], nil | ||||
| } | ||||
|  | ||||
| /// | ||||
|  | ||||
| type Base128Encoder struct { | ||||
| 	w        io.Writer | ||||
| 	tmpBytes []byte | ||||
| } | ||||
|  | ||||
| func NewU32Base128Encoder(w io.Writer) *Base128Encoder { | ||||
| 	return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen32)} | ||||
| } | ||||
| func NewU64Base128Encoder(w io.Writer) *Base128Encoder { | ||||
| 	return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen64)} | ||||
| } | ||||
|  | ||||
| func (b *Base128Encoder) PutU32(x uint32) (int, error) { | ||||
| 	writtenBytes := binary.PutUvarint(b.tmpBytes, uint64(x)) | ||||
| 	return b.w.Write(b.tmpBytes[:writtenBytes]) | ||||
| } | ||||
|  | ||||
| func (b *Base128Encoder) PutU64(x uint64) (int, error) { | ||||
| 	writtenBytes := binary.PutUvarint(b.tmpBytes, x) | ||||
| 	return b.w.Write(b.tmpBytes[:writtenBytes]) | ||||
| } | ||||
|  | ||||
| func (b *Base128Encoder) Close() { | ||||
| } | ||||
|  | ||||
| /// | ||||
|  | ||||
| type Base128Decoder struct { | ||||
| 	r io.ByteReader | ||||
| } | ||||
|  | ||||
| func NewU32Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} } | ||||
| func NewU64Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} } | ||||
|  | ||||
| func (b *Base128Decoder) GetU32() (uint32, error) { | ||||
| 	v, err := binary.ReadUvarint(b.r) | ||||
| 	return uint32(v), err | ||||
| } | ||||
|  | ||||
| func (b *Base128Decoder) GetU64() (uint64, error) { | ||||
| 	return binary.ReadUvarint(b.r) | ||||
| } | ||||
		Reference in New Issue
	
	Block a user