You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
144 lines
4.6 KiB
144 lines
4.6 KiB
// Copyright 2018 Klaus Post. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file. |
|
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. |
|
|
|
// Package fse provides Finite State Entropy encoding and decoding. |
|
// |
|
// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding |
|
// for byte blocks as implemented in zstd. |
|
// |
|
// See https://github.com/klauspost/compress/tree/master/fse for more information. |
|
package fse |
|
|
|
import ( |
|
"errors" |
|
"fmt" |
|
"math/bits" |
|
) |
|
|
|
const ( |
|
/*!MEMORY_USAGE : |
|
* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) |
|
* Increasing memory usage improves compression ratio |
|
* Reduced memory usage can improve speed, due to cache effect |
|
* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ |
|
maxMemoryUsage = 14 |
|
defaultMemoryUsage = 13 |
|
|
|
maxTableLog = maxMemoryUsage - 2 |
|
maxTablesize = 1 << maxTableLog |
|
defaultTablelog = defaultMemoryUsage - 2 |
|
minTablelog = 5 |
|
maxSymbolValue = 255 |
|
) |
|
|
|
var ( |
|
// ErrIncompressible is returned when input is judged to be too hard to compress. |
|
ErrIncompressible = errors.New("input is not compressible") |
|
|
|
// ErrUseRLE is returned from the compressor when the input is a single byte value repeated. |
|
ErrUseRLE = errors.New("input is single value repeated") |
|
) |
|
|
|
// Scratch provides temporary storage for compression and decompression. |
|
type Scratch struct { |
|
// Private |
|
count [maxSymbolValue + 1]uint32 |
|
norm [maxSymbolValue + 1]int16 |
|
br byteReader |
|
bits bitReader |
|
bw bitWriter |
|
ct cTable // Compression tables. |
|
decTable []decSymbol // Decompression table. |
|
maxCount int // count of the most probable symbol |
|
|
|
// Per block parameters. |
|
// These can be used to override compression parameters of the block. |
|
// Do not touch, unless you know what you are doing. |
|
|
|
// Out is output buffer. |
|
// If the scratch is re-used before the caller is done processing the output, |
|
// set this field to nil. |
|
// Otherwise the output buffer will be re-used for next Compression/Decompression step |
|
// and allocation will be avoided. |
|
Out []byte |
|
|
|
// DecompressLimit limits the maximum decoded size acceptable. |
|
// If > 0 decompression will stop when approximately this many bytes |
|
// has been decoded. |
|
// If 0, maximum size will be 2GB. |
|
DecompressLimit int |
|
|
|
symbolLen uint16 // Length of active part of the symbol table. |
|
actualTableLog uint8 // Selected tablelog. |
|
zeroBits bool // no bits has prob > 50%. |
|
clearCount bool // clear count |
|
|
|
// MaxSymbolValue will override the maximum symbol value of the next block. |
|
MaxSymbolValue uint8 |
|
|
|
// TableLog will attempt to override the tablelog for the next block. |
|
TableLog uint8 |
|
} |
|
|
|
// Histogram allows to populate the histogram and skip that step in the compression, |
|
// It otherwise allows to inspect the histogram when compression is done. |
|
// To indicate that you have populated the histogram call HistogramFinished |
|
// with the value of the highest populated symbol, as well as the number of entries |
|
// in the most populated entry. These are accepted at face value. |
|
// The returned slice will always be length 256. |
|
func (s *Scratch) Histogram() []uint32 { |
|
return s.count[:] |
|
} |
|
|
|
// HistogramFinished can be called to indicate that the histogram has been populated. |
|
// maxSymbol is the index of the highest set symbol of the next data segment. |
|
// maxCount is the number of entries in the most populated entry. |
|
// These are accepted at face value. |
|
func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) { |
|
s.maxCount = maxCount |
|
s.symbolLen = uint16(maxSymbol) + 1 |
|
s.clearCount = maxCount != 0 |
|
} |
|
|
|
// prepare will prepare and allocate scratch tables used for both compression and decompression. |
|
func (s *Scratch) prepare(in []byte) (*Scratch, error) { |
|
if s == nil { |
|
s = &Scratch{} |
|
} |
|
if s.MaxSymbolValue == 0 { |
|
s.MaxSymbolValue = 255 |
|
} |
|
if s.TableLog == 0 { |
|
s.TableLog = defaultTablelog |
|
} |
|
if s.TableLog > maxTableLog { |
|
return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog) |
|
} |
|
if cap(s.Out) == 0 { |
|
s.Out = make([]byte, 0, len(in)) |
|
} |
|
if s.clearCount && s.maxCount == 0 { |
|
for i := range s.count { |
|
s.count[i] = 0 |
|
} |
|
s.clearCount = false |
|
} |
|
s.br.init(in) |
|
if s.DecompressLimit == 0 { |
|
// Max size 2GB. |
|
s.DecompressLimit = (2 << 30) - 1 |
|
} |
|
|
|
return s, nil |
|
} |
|
|
|
// tableStep returns the next table index. |
|
func tableStep(tableSize uint32) uint32 { |
|
return (tableSize >> 1) + (tableSize >> 3) + 3 |
|
} |
|
|
|
func highBits(val uint32) (n uint32) { |
|
return uint32(bits.Len32(val) - 1) |
|
}
|
|
|