vendor: github.com/klauspost/compress v1.17.9

full diff: https://github.com/klauspost/compress/compare/v1.17.4...v1.17.9

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
Sebastiaan van Stijn 2024-06-25 22:57:14 +02:00
parent 4c3af9becf
commit a3c8a72b54
No known key found for this signature in database
GPG Key ID: 76698F39D527CE8C
20 changed files with 256 additions and 138 deletions

2
go.mod
View File

@ -112,7 +112,7 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.4 // indirect github.com/klauspost/compress v1.17.9 // indirect
github.com/mailru/easyjson v0.7.7 // indirect github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/mattn/go-shellwords v1.0.12 // indirect github.com/mattn/go-shellwords v1.0.12 // indirect

4
go.sum
View File

@ -263,8 +263,8 @@ github.com/juju/loggo v0.0.0-20190526231331-6e530bcce5d8/go.mod h1:vgyd7OREkbtVE
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=

View File

@ -3,7 +3,6 @@
before: before:
hooks: hooks:
- ./gen.sh - ./gen.sh
- go install mvdan.cc/garble@v0.10.1
builds: builds:
- -
@ -32,7 +31,6 @@ builds:
- mips64le - mips64le
goarm: goarm:
- 7 - 7
gobinary: garble
- -
id: "s2d" id: "s2d"
binary: s2d binary: s2d
@ -59,7 +57,6 @@ builds:
- mips64le - mips64le
goarm: goarm:
- 7 - 7
gobinary: garble
- -
id: "s2sx" id: "s2sx"
binary: s2sx binary: s2sx
@ -87,7 +84,6 @@ builds:
- mips64le - mips64le
goarm: goarm:
- 7 - 7
gobinary: garble
archives: archives:
- -

View File

@ -16,6 +16,30 @@ This package provides various compression algorithms.
# changelog # changelog
* Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6)
* zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923
* s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925
* Jan 26th, 2024 - [v1.17.5](https://github.com/klauspost/compress/releases/tag/v1.17.5)
* flate: Fix reset with dictionary on custom window encodes https://github.com/klauspost/compress/pull/912
* zstd: Add Frame header encoding and stripping https://github.com/klauspost/compress/pull/908
* zstd: Limit better/best default window to 8MB https://github.com/klauspost/compress/pull/913
* zstd: Speed improvements by @greatroar in https://github.com/klauspost/compress/pull/896 https://github.com/klauspost/compress/pull/910
* s2: Fix callbacks for skippable blocks and disallow 0xfe (Padding) by @Jille in https://github.com/klauspost/compress/pull/916 https://github.com/klauspost/compress/pull/917
https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/compress/pull/918
* Dec 1st, 2023 - [v1.17.4](https://github.com/klauspost/compress/releases/tag/v1.17.4)
* huff0: Speed up symbol counting by @greatroar in https://github.com/klauspost/compress/pull/887
* huff0: Remove byteReader by @greatroar in https://github.com/klauspost/compress/pull/886
* gzhttp: Allow overriding decompression on transport https://github.com/klauspost/compress/pull/892
* gzhttp: Clamp compression level https://github.com/klauspost/compress/pull/890
* gzip: Error out if reserved bits are set https://github.com/klauspost/compress/pull/891
* Nov 15th, 2023 - [v1.17.3](https://github.com/klauspost/compress/releases/tag/v1.17.3)
* fse: Fix max header size https://github.com/klauspost/compress/pull/881
* zstd: Improve better/best compression https://github.com/klauspost/compress/pull/877
* gzhttp: Fix missing content type on Close https://github.com/klauspost/compress/pull/883
* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2) * Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2)
* zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876 * zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876
@ -31,6 +55,10 @@ This package provides various compression algorithms.
* s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839 * s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839
* flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837 * flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837
* gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860 * gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860
<details>
<summary>See changes to v1.16.x</summary>
* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7) * July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7)
* zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829 * zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829
@ -69,6 +97,7 @@ This package provides various compression algorithms.
* s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748 * s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748
* s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747 * s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747
* s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746 * s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746
</details>
<details> <details>
<summary>See changes to v1.15.x</summary> <summary>See changes to v1.15.x</summary>
@ -536,6 +565,8 @@ the stateless compress described below.
For compression performance, see: [this spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing). For compression performance, see: [this spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing).
To disable all assembly add `-tags=noasm`. This works across all packages.
# Stateless compression # Stateless compression
This package offers stateless compression as a special option for gzip/deflate. This package offers stateless compression as a special option for gzip/deflate.
@ -554,7 +585,7 @@ For direct deflate use, NewStatelessWriter and StatelessDeflate are available. S
A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer: A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer:
``` ```go
// replace 'ioutil.Discard' with your output. // replace 'ioutil.Discard' with your output.
gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression) gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression)
if err != nil { if err != nil {

View File

@ -51,7 +51,7 @@ func emitCopy(dst []byte, offset, length int) int {
i := 0 i := 0
// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
// threshold for this loop is a little higher (at 68 = 64 + 4), and the // threshold for this loop is a little higher (at 68 = 64 + 4), and the
// length emitted down below is is a little lower (at 60 = 64 - 4), because // length emitted down below is a little lower (at 60 = 64 - 4), because
// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as

View File

@ -1,4 +1,4 @@
module github.com/klauspost/compress module github.com/klauspost/compress
go 1.16 go 1.19

View File

@ -554,6 +554,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
if debugDecoder { if debugDecoder {
printf("Compression modes: 0b%b", compMode) printf("Compression modes: 0b%b", compMode)
} }
if compMode&3 != 0 {
return errors.New("corrupt block: reserved bits not zero")
}
for i := uint(0); i < 3; i++ { for i := uint(0); i < 3; i++ {
mode := seqCompMode((compMode >> (6 - i*2)) & 3) mode := seqCompMode((compMode >> (6 - i*2)) & 3)
if debugDecoder { if debugDecoder {

View File

@ -427,6 +427,16 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
return nil return nil
} }
// encodeRLE will encode an RLE block.
func (b *blockEnc) encodeRLE(val byte, length uint32) {
var bh blockHeader
bh.setLast(b.last)
bh.setSize(length)
bh.setType(blockTypeRLE)
b.output = bh.appendTo(b.output)
b.output = append(b.output, val)
}
// fuzzFseEncoder can be used to fuzz the FSE encoder. // fuzzFseEncoder can be used to fuzz the FSE encoder.
func fuzzFseEncoder(data []byte) int { func fuzzFseEncoder(data []byte) int {
if len(data) > maxSequences || len(data) < 2 { if len(data) > maxSequences || len(data) < 2 {
@ -479,6 +489,16 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if len(b.sequences) == 0 { if len(b.sequences) == 0 {
return b.encodeLits(b.literals, rawAllLits) return b.encodeLits(b.literals, rawAllLits)
} }
if len(b.sequences) == 1 && len(org) > 0 && len(b.literals) <= 1 {
// Check common RLE cases.
seq := b.sequences[0]
if seq.litLen == uint32(len(b.literals)) && seq.offset-3 == 1 {
// Offset == 1 and 0 or 1 literals.
b.encodeRLE(org[0], b.sequences[0].matchLen+zstdMinMatch+seq.litLen)
return nil
}
}
// We want some difference to at least account for the headers. // We want some difference to at least account for the headers.
saved := b.size - len(b.literals) - (b.size >> 6) saved := b.size - len(b.literals) - (b.size >> 6)
if saved < 16 { if saved < 16 {

View File

@ -95,42 +95,54 @@ type Header struct {
// If there isn't enough input, io.ErrUnexpectedEOF is returned. // If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header. // The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) Decode(in []byte) error { func (h *Header) Decode(in []byte) error {
_, err := h.DecodeAndStrip(in)
return err
}
// DecodeAndStrip will decode the header from the beginning of the stream
// and on success return the remaining bytes.
// This will decode the frame header and the first block header if enough bytes are provided.
// It is recommended to provide at least HeaderMaxSize bytes.
// If the frame header cannot be read an error will be returned.
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) {
*h = Header{} *h = Header{}
if len(in) < 4 { if len(in) < 4 {
return io.ErrUnexpectedEOF return nil, io.ErrUnexpectedEOF
} }
h.HeaderSize += 4 h.HeaderSize += 4
b, in := in[:4], in[4:] b, in := in[:4], in[4:]
if string(b) != frameMagic { if string(b) != frameMagic {
if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 { if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
return ErrMagicMismatch return nil, ErrMagicMismatch
} }
if len(in) < 4 { if len(in) < 4 {
return io.ErrUnexpectedEOF return nil, io.ErrUnexpectedEOF
} }
h.HeaderSize += 4 h.HeaderSize += 4
h.Skippable = true h.Skippable = true
h.SkippableID = int(b[0] & 0xf) h.SkippableID = int(b[0] & 0xf)
h.SkippableSize = binary.LittleEndian.Uint32(in) h.SkippableSize = binary.LittleEndian.Uint32(in)
return nil return in[4:], nil
} }
// Read Window_Descriptor // Read Window_Descriptor
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
if len(in) < 1 { if len(in) < 1 {
return io.ErrUnexpectedEOF return nil, io.ErrUnexpectedEOF
} }
fhd, in := in[0], in[1:] fhd, in := in[0], in[1:]
h.HeaderSize++ h.HeaderSize++
h.SingleSegment = fhd&(1<<5) != 0 h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0 h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 { if fhd&(1<<3) != 0 {
return errors.New("reserved bit set on frame header") return nil, errors.New("reserved bit set on frame header")
} }
if !h.SingleSegment { if !h.SingleSegment {
if len(in) < 1 { if len(in) < 1 {
return io.ErrUnexpectedEOF return nil, io.ErrUnexpectedEOF
} }
var wd byte var wd byte
wd, in = in[0], in[1:] wd, in = in[0], in[1:]
@ -148,7 +160,7 @@ func (h *Header) Decode(in []byte) error {
size = 4 size = 4
} }
if len(in) < int(size) { if len(in) < int(size) {
return io.ErrUnexpectedEOF return nil, io.ErrUnexpectedEOF
} }
b, in = in[:size], in[size:] b, in = in[:size], in[size:]
h.HeaderSize += int(size) h.HeaderSize += int(size)
@ -178,7 +190,7 @@ func (h *Header) Decode(in []byte) error {
if fcsSize > 0 { if fcsSize > 0 {
h.HasFCS = true h.HasFCS = true
if len(in) < fcsSize { if len(in) < fcsSize {
return io.ErrUnexpectedEOF return nil, io.ErrUnexpectedEOF
} }
b, in = in[:fcsSize], in[fcsSize:] b, in = in[:fcsSize], in[fcsSize:]
h.HeaderSize += int(fcsSize) h.HeaderSize += int(fcsSize)
@ -199,7 +211,7 @@ func (h *Header) Decode(in []byte) error {
// Frame Header done, we will not fail from now on. // Frame Header done, we will not fail from now on.
if len(in) < 3 { if len(in) < 3 {
return nil return in, nil
} }
tmp := in[:3] tmp := in[:3]
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
@ -209,7 +221,7 @@ func (h *Header) Decode(in []byte) error {
cSize := int(bh >> 3) cSize := int(bh >> 3)
switch blockType { switch blockType {
case blockTypeReserved: case blockTypeReserved:
return nil return in, nil
case blockTypeRLE: case blockTypeRLE:
h.FirstBlock.Compressed = true h.FirstBlock.Compressed = true
h.FirstBlock.DecompressedSize = cSize h.FirstBlock.DecompressedSize = cSize
@ -225,5 +237,25 @@ func (h *Header) Decode(in []byte) error {
} }
h.FirstBlock.OK = true h.FirstBlock.OK = true
return nil return in, nil
}
// AppendTo will append the encoded header to the dst slice.
// There is no error checking performed on the header values.
func (h *Header) AppendTo(dst []byte) ([]byte, error) {
if h.Skippable {
magic := [4]byte{0x50, 0x2a, 0x4d, 0x18}
magic[0] |= byte(h.SkippableID & 0xf)
dst = append(dst, magic[:]...)
f := h.SkippableSize
return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil
}
f := frameHeader{
ContentSize: h.FrameContentSize,
WindowSize: uint32(h.WindowSize),
SingleSegment: h.SingleSegment,
Checksum: h.HasCheckSum,
DictID: h.DictionaryID,
}
return f.appendTo(dst), nil
} }

View File

@ -82,7 +82,7 @@ var (
// can run multiple concurrent stateless decodes. It is even possible to // can run multiple concurrent stateless decodes. It is even possible to
// use stateless decodes while a stream is being decoded. // use stateless decodes while a stream is being decoded.
// //
// The Reset function can be used to initiate a new stream, which is will considerably // The Reset function can be used to initiate a new stream, which will considerably
// reduce the allocations normally caused by NewReader. // reduce the allocations normally caused by NewReader.
func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
initPredefined() initPredefined()

View File

@ -273,6 +273,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
enc.Encode(&block, b) enc.Encode(&block, b)
addValues(&remain, block.literals) addValues(&remain, block.literals)
litTotal += len(block.literals) litTotal += len(block.literals)
if len(block.sequences) == 0 {
continue
}
seqs += len(block.sequences) seqs += len(block.sequences)
block.genCodes() block.genCodes()
addHist(&ll, block.coders.llEnc.Histogram()) addHist(&ll, block.coders.llEnc.Histogram())
@ -286,6 +289,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
if offset == 0 { if offset == 0 {
continue continue
} }
if int(offset) >= len(o.History) {
continue
}
if offset > 3 { if offset > 3 {
newOffsets[offset-3]++ newOffsets[offset-3]++
} else { } else {
@ -336,6 +342,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
if seqs/nUsed < 512 { if seqs/nUsed < 512 {
// Use 512 as minimum. // Use 512 as minimum.
nUsed = seqs / 512 nUsed = seqs / 512
if nUsed == 0 {
nUsed = 1
}
} }
copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) { copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) {
hist := dst.Histogram() hist := dst.Histogram()
@ -358,6 +367,28 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
fakeLength += v fakeLength += v
hist[i] = uint32(v) hist[i] = uint32(v)
} }
// Ensure we aren't trying to represent RLE.
if maxCount == fakeLength {
for i := range hist {
if uint8(i) == maxSym {
fakeLength++
maxSym++
hist[i+1] = 1
if maxSym > 1 {
break
}
}
if hist[0] == 0 {
fakeLength++
hist[i] = 1
if maxSym > 1 {
break
}
}
}
}
dst.HistogramFinished(maxSym, maxCount) dst.HistogramFinished(maxSym, maxCount)
dst.reUsed = false dst.reUsed = false
dst.useRLE = false dst.useRLE = false

View File

@ -135,8 +135,20 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
break break
} }
// Add block to history
s := e.addBlock(src) s := e.addBlock(src)
blk.size = len(src) blk.size = len(src)
// Check RLE first
if len(src) > zstdMinMatch {
ml := matchLen(src[1:], src)
if ml == len(src)-1 {
blk.literals = append(blk.literals, src[0])
blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
return
}
}
if len(src) < minNonLiteralBlockSize { if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src) blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)] blk.literals = blk.literals[:len(src)]
@ -201,14 +213,6 @@ encodeLoop:
if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first { if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
return return
} }
if debugAsserts {
if offset >= s {
panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
}
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
// Try to quick reject if we already have a long match. // Try to quick reject if we already have a long match.
if m.length > 16 { if m.length > 16 {
left := len(src) - int(m.s+m.length) left := len(src) - int(m.s+m.length)
@ -227,8 +231,10 @@ encodeLoop:
} }
} }
l := 4 + e.matchlen(s+4, offset+4, src) l := 4 + e.matchlen(s+4, offset+4, src)
if true { if m.rep <= 0 {
// Extend candidate match backwards as far as possible. // Extend candidate match backwards as far as possible.
// Do not extend repeats as we can assume they are optimal
// and offsets change if s == nextEmit.
tMin := s - e.maxMatchOff tMin := s - e.maxMatchOff
if tMin < 0 { if tMin < 0 {
tMin = 0 tMin = 0
@ -239,7 +245,14 @@ encodeLoop:
l++ l++
} }
} }
if debugAsserts {
if offset >= s {
panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
}
if !bytes.Equal(src[s:s+l], src[offset:offset+l]) {
panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
cand := match{offset: offset, s: s, length: l, rep: rep} cand := match{offset: offset, s: s, length: l, rep: rep}
cand.estBits(bitsPerByte) cand.estBits(bitsPerByte)
if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 { if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
@ -336,24 +349,31 @@ encodeLoop:
} }
if debugAsserts { if debugAsserts {
if best.offset >= best.s {
panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s))
}
if best.s < nextEmit {
panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit))
}
if best.offset < s-e.maxMatchOff {
panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff))
}
if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) { if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length])) panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
} }
} }
// We have a match, we can store the forward value // We have a match, we can store the forward value
s = best.s
if best.rep > 0 { if best.rep > 0 {
var seq seq var seq seq
seq.matchLen = uint32(best.length - zstdMinMatch) seq.matchLen = uint32(best.length - zstdMinMatch)
if debugAsserts && s < nextEmit {
panic("s < nextEmit")
}
addLiterals(&seq, best.s) addLiterals(&seq, best.s)
// Repeat. If bit 4 is set, this is a non-lit repeat. // Repeat. If bit 4 is set, this is a non-lit repeat.
seq.offset = uint32(best.rep & 3) seq.offset = uint32(best.rep & 3)
if debugSequences { if debugSequences {
println("repeat sequence", seq, "next s:", s) println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset)
} }
blk.sequences = append(blk.sequences, seq) blk.sequences = append(blk.sequences, seq)
@ -396,7 +416,6 @@ encodeLoop:
// A 4-byte match has been found. Update recent offsets. // A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes. // We'll later see if more than 4 bytes.
s = best.s
t := best.offset t := best.offset
offset1, offset2, offset3 = s-t, offset1, offset2 offset1, offset2, offset3 = s-t, offset1, offset2

View File

@ -102,9 +102,20 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
e.cur = e.maxMatchOff e.cur = e.maxMatchOff
break break
} }
// Add block to history
s := e.addBlock(src) s := e.addBlock(src)
blk.size = len(src) blk.size = len(src)
// Check RLE first
if len(src) > zstdMinMatch {
ml := matchLen(src[1:], src)
if ml == len(src)-1 {
blk.literals = append(blk.literals, src[0])
blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
return
}
}
if len(src) < minNonLiteralBlockSize { if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src) blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)] blk.literals = blk.literals[:len(src)]

View File

@ -94,7 +94,7 @@ func WithEncoderConcurrency(n int) EOption {
// The value must be a power of two between MinWindowSize and MaxWindowSize. // The value must be a power of two between MinWindowSize and MaxWindowSize.
// A larger value will enable better compression but allocate more memory and, // A larger value will enable better compression but allocate more memory and,
// for above-default values, take considerably longer. // for above-default values, take considerably longer.
// The default value is determined by the compression level. // The default value is determined by the compression level and max 8MB.
func WithWindowSize(n int) EOption { func WithWindowSize(n int) EOption {
return func(o *encoderOptions) error { return func(o *encoderOptions) error {
switch { switch {
@ -232,9 +232,9 @@ func WithEncoderLevel(l EncoderLevel) EOption {
case SpeedDefault: case SpeedDefault:
o.windowSize = 8 << 20 o.windowSize = 8 << 20
case SpeedBetterCompression: case SpeedBetterCompression:
o.windowSize = 16 << 20 o.windowSize = 8 << 20
case SpeedBestCompression: case SpeedBestCompression:
o.windowSize = 32 << 20 o.windowSize = 8 << 20
} }
} }
if !o.customALEntropy { if !o.customALEntropy {

View File

@ -76,7 +76,7 @@ func (f frameHeader) appendTo(dst []byte) []byte {
if f.SingleSegment { if f.SingleSegment {
dst = append(dst, uint8(f.ContentSize)) dst = append(dst, uint8(f.ContentSize))
} }
// Unless SingleSegment is set, framessizes < 256 are nto stored. // Unless SingleSegment is set, framessizes < 256 are not stored.
case 1: case 1:
f.ContentSize -= 256 f.ContentSize -= 256
dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8)) dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8))

View File

@ -20,10 +20,9 @@ func (s *fseDecoder) buildDtable() error {
if v == -1 { if v == -1 {
s.dt[highThreshold].setAddBits(uint8(i)) s.dt[highThreshold].setAddBits(uint8(i))
highThreshold-- highThreshold--
symbolNext[i] = 1 v = 1
} else {
symbolNext[i] = uint16(v)
} }
symbolNext[i] = uint16(v)
} }
} }
@ -35,10 +34,12 @@ func (s *fseDecoder) buildDtable() error {
for ss, v := range s.norm[:s.symbolLen] { for ss, v := range s.norm[:s.symbolLen] {
for i := 0; i < int(v); i++ { for i := 0; i < int(v); i++ {
s.dt[position].setAddBits(uint8(ss)) s.dt[position].setAddBits(uint8(ss))
position = (position + step) & tableMask for {
for position > highThreshold {
// lowprob area // lowprob area
position = (position + step) & tableMask position = (position + step) & tableMask
if position <= highThreshold {
break
}
} }
} }
} }

View File

@ -162,12 +162,12 @@ finalize:
MOVD h, ret+24(FP) MOVD h, ret+24(FP)
RET RET
// func writeBlocks(d *Digest, b []byte) int // func writeBlocks(s *Digest, b []byte) int
TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
LDP ·primes+0(SB), (prime1, prime2) LDP ·primes+0(SB), (prime1, prime2)
// Load state. Assume v[1-4] are stored contiguously. // Load state. Assume v[1-4] are stored contiguously.
MOVD d+0(FP), digest MOVD s+0(FP), digest
LDP 0(digest), (v1, v2) LDP 0(digest), (v1, v2)
LDP 16(digest), (v3, v4) LDP 16(digest), (v3, v4)

View File

@ -5,7 +5,6 @@
#include "textflag.h" #include "textflag.h"
// func matchLen(a []byte, b []byte) int // func matchLen(a []byte, b []byte) int
// Requires: BMI
TEXT ·matchLen(SB), NOSPLIT, $0-56 TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX MOVQ b_base+24(FP), CX
@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56
JB matchlen_match4_standalone JB matchlen_match4_standalone
matchlen_loopback_standalone: matchlen_loopback_standalone:
MOVQ (AX)(SI*1), BX MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX XORQ (CX)(SI*1), BX
TESTQ BX, BX JZ matchlen_loop_standalone
JZ matchlen_loop_standalone
#ifdef GOAMD64_v3 #ifdef GOAMD64_v3
TZCNTQ BX, BX TZCNTQ BX, BX
#else #else
BSFQ BX, BX BSFQ BX, BX
#endif #endif
SARQ $0x03, BX SHRL $0x03, BX
LEAL (SI)(BX*1), SI LEAL (SI)(BX*1), SI
JMP gen_match_len_end JMP gen_match_len_end

View File

@ -157,8 +157,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
// Update Literal Length State // Update Literal Length State
MOVBQZX DI, R14 MOVBQZX DI, R14
SHRQ $0x10, DI SHRL $0x10, DI
MOVWQZX DI, DI
LEAQ (BX)(R14*1), CX LEAQ (BX)(R14*1), CX
MOVQ DX, R15 MOVQ DX, R15
MOVQ CX, BX MOVQ CX, BX
@ -177,8 +176,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
// Update Match Length State // Update Match Length State
MOVBQZX R8, R14 MOVBQZX R8, R14
SHRQ $0x10, R8 SHRL $0x10, R8
MOVWQZX R8, R8
LEAQ (BX)(R14*1), CX LEAQ (BX)(R14*1), CX
MOVQ DX, R15 MOVQ DX, R15
MOVQ CX, BX MOVQ CX, BX
@ -197,8 +195,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
// Update Offset State // Update Offset State
MOVBQZX R9, R14 MOVBQZX R9, R14
SHRQ $0x10, R9 SHRL $0x10, R9
MOVWQZX R9, R9
LEAQ (BX)(R14*1), CX LEAQ (BX)(R14*1), CX
MOVQ DX, R15 MOVQ DX, R15
MOVQ CX, BX MOVQ CX, BX
@ -459,8 +456,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
// Update Literal Length State // Update Literal Length State
MOVBQZX DI, R14 MOVBQZX DI, R14
SHRQ $0x10, DI SHRL $0x10, DI
MOVWQZX DI, DI
LEAQ (BX)(R14*1), CX LEAQ (BX)(R14*1), CX
MOVQ DX, R15 MOVQ DX, R15
MOVQ CX, BX MOVQ CX, BX
@ -479,8 +475,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
// Update Match Length State // Update Match Length State
MOVBQZX R8, R14 MOVBQZX R8, R14
SHRQ $0x10, R8 SHRL $0x10, R8
MOVWQZX R8, R8
LEAQ (BX)(R14*1), CX LEAQ (BX)(R14*1), CX
MOVQ DX, R15 MOVQ DX, R15
MOVQ CX, BX MOVQ CX, BX
@ -499,8 +494,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
// Update Offset State // Update Offset State
MOVBQZX R9, R14 MOVBQZX R9, R14
SHRQ $0x10, R9 SHRL $0x10, R9
MOVWQZX R9, R9
LEAQ (BX)(R14*1), CX LEAQ (BX)(R14*1), CX
MOVQ DX, R15 MOVQ DX, R15
MOVQ CX, BX MOVQ CX, BX
@ -772,11 +766,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
BZHIQ R14, R15, R15 BZHIQ R14, R15, R15
// Update Offset State // Update Offset State
BZHIQ R8, R15, CX BZHIQ R8, R15, CX
SHRXQ R8, R15, R15 SHRXQ R8, R15, R15
MOVQ $0x00001010, R14 SHRL $0x10, R8
BEXTRQ R14, R8, R8 ADDQ CX, R8
ADDQ CX, R8
// Load ctx.ofTable // Load ctx.ofTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -784,11 +777,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
MOVQ (CX)(R8*8), R8 MOVQ (CX)(R8*8), R8
// Update Match Length State // Update Match Length State
BZHIQ DI, R15, CX BZHIQ DI, R15, CX
SHRXQ DI, R15, R15 SHRXQ DI, R15, R15
MOVQ $0x00001010, R14 SHRL $0x10, DI
BEXTRQ R14, DI, DI ADDQ CX, DI
ADDQ CX, DI
// Load ctx.mlTable // Load ctx.mlTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -796,10 +788,9 @@ sequenceDecs_decode_bmi2_fill_2_end:
MOVQ (CX)(DI*8), DI MOVQ (CX)(DI*8), DI
// Update Literal Length State // Update Literal Length State
BZHIQ SI, R15, CX BZHIQ SI, R15, CX
MOVQ $0x00001010, R14 SHRL $0x10, SI
BEXTRQ R14, SI, SI ADDQ CX, SI
ADDQ CX, SI
// Load ctx.llTable // Load ctx.llTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -1032,11 +1023,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
BZHIQ R14, R15, R15 BZHIQ R14, R15, R15
// Update Offset State // Update Offset State
BZHIQ R8, R15, CX BZHIQ R8, R15, CX
SHRXQ R8, R15, R15 SHRXQ R8, R15, R15
MOVQ $0x00001010, R14 SHRL $0x10, R8
BEXTRQ R14, R8, R8 ADDQ CX, R8
ADDQ CX, R8
// Load ctx.ofTable // Load ctx.ofTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -1044,11 +1034,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
MOVQ (CX)(R8*8), R8 MOVQ (CX)(R8*8), R8
// Update Match Length State // Update Match Length State
BZHIQ DI, R15, CX BZHIQ DI, R15, CX
SHRXQ DI, R15, R15 SHRXQ DI, R15, R15
MOVQ $0x00001010, R14 SHRL $0x10, DI
BEXTRQ R14, DI, DI ADDQ CX, DI
ADDQ CX, DI
// Load ctx.mlTable // Load ctx.mlTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -1056,10 +1045,9 @@ sequenceDecs_decode_56_bmi2_fill_end:
MOVQ (CX)(DI*8), DI MOVQ (CX)(DI*8), DI
// Update Literal Length State // Update Literal Length State
BZHIQ SI, R15, CX BZHIQ SI, R15, CX
MOVQ $0x00001010, R14 SHRL $0x10, SI
BEXTRQ R14, SI, SI ADDQ CX, SI
ADDQ CX, SI
// Load ctx.llTable // Load ctx.llTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -1967,8 +1955,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
// Update Literal Length State // Update Literal Length State
MOVBQZX DI, R13 MOVBQZX DI, R13
SHRQ $0x10, DI SHRL $0x10, DI
MOVWQZX DI, DI
LEAQ (BX)(R13*1), CX LEAQ (BX)(R13*1), CX
MOVQ DX, R14 MOVQ DX, R14
MOVQ CX, BX MOVQ CX, BX
@ -1987,8 +1974,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
// Update Match Length State // Update Match Length State
MOVBQZX R8, R13 MOVBQZX R8, R13
SHRQ $0x10, R8 SHRL $0x10, R8
MOVWQZX R8, R8
LEAQ (BX)(R13*1), CX LEAQ (BX)(R13*1), CX
MOVQ DX, R14 MOVQ DX, R14
MOVQ CX, BX MOVQ CX, BX
@ -2007,8 +1993,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
// Update Offset State // Update Offset State
MOVBQZX R9, R13 MOVBQZX R9, R13
SHRQ $0x10, R9 SHRL $0x10, R9
MOVWQZX R9, R9
LEAQ (BX)(R13*1), CX LEAQ (BX)(R13*1), CX
MOVQ DX, R14 MOVQ DX, R14
MOVQ CX, BX MOVQ CX, BX
@ -2514,11 +2499,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
BZHIQ R13, R14, R14 BZHIQ R13, R14, R14
// Update Offset State // Update Offset State
BZHIQ R8, R14, CX BZHIQ R8, R14, CX
SHRXQ R8, R14, R14 SHRXQ R8, R14, R14
MOVQ $0x00001010, R13 SHRL $0x10, R8
BEXTRQ R13, R8, R8 ADDQ CX, R8
ADDQ CX, R8
// Load ctx.ofTable // Load ctx.ofTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -2526,11 +2510,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
MOVQ (CX)(R8*8), R8 MOVQ (CX)(R8*8), R8
// Update Match Length State // Update Match Length State
BZHIQ DI, R14, CX BZHIQ DI, R14, CX
SHRXQ DI, R14, R14 SHRXQ DI, R14, R14
MOVQ $0x00001010, R13 SHRL $0x10, DI
BEXTRQ R13, DI, DI ADDQ CX, DI
ADDQ CX, DI
// Load ctx.mlTable // Load ctx.mlTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -2538,10 +2521,9 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
MOVQ (CX)(DI*8), DI MOVQ (CX)(DI*8), DI
// Update Literal Length State // Update Literal Length State
BZHIQ SI, R14, CX BZHIQ SI, R14, CX
MOVQ $0x00001010, R13 SHRL $0x10, SI
BEXTRQ R13, SI, SI ADDQ CX, SI
ADDQ CX, SI
// Load ctx.llTable // Load ctx.llTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -3055,8 +3037,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
// Update Literal Length State // Update Literal Length State
MOVBQZX DI, R13 MOVBQZX DI, R13
SHRQ $0x10, DI SHRL $0x10, DI
MOVWQZX DI, DI
LEAQ (BX)(R13*1), CX LEAQ (BX)(R13*1), CX
MOVQ DX, R14 MOVQ DX, R14
MOVQ CX, BX MOVQ CX, BX
@ -3075,8 +3056,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
// Update Match Length State // Update Match Length State
MOVBQZX R8, R13 MOVBQZX R8, R13
SHRQ $0x10, R8 SHRL $0x10, R8
MOVWQZX R8, R8
LEAQ (BX)(R13*1), CX LEAQ (BX)(R13*1), CX
MOVQ DX, R14 MOVQ DX, R14
MOVQ CX, BX MOVQ CX, BX
@ -3095,8 +3075,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
// Update Offset State // Update Offset State
MOVBQZX R9, R13 MOVBQZX R9, R13
SHRQ $0x10, R9 SHRL $0x10, R9
MOVWQZX R9, R9
LEAQ (BX)(R13*1), CX LEAQ (BX)(R13*1), CX
MOVQ DX, R14 MOVQ DX, R14
MOVQ CX, BX MOVQ CX, BX
@ -3704,11 +3683,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
BZHIQ R13, R14, R14 BZHIQ R13, R14, R14
// Update Offset State // Update Offset State
BZHIQ R8, R14, CX BZHIQ R8, R14, CX
SHRXQ R8, R14, R14 SHRXQ R8, R14, R14
MOVQ $0x00001010, R13 SHRL $0x10, R8
BEXTRQ R13, R8, R8 ADDQ CX, R8
ADDQ CX, R8
// Load ctx.ofTable // Load ctx.ofTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -3716,11 +3694,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
MOVQ (CX)(R8*8), R8 MOVQ (CX)(R8*8), R8
// Update Match Length State // Update Match Length State
BZHIQ DI, R14, CX BZHIQ DI, R14, CX
SHRXQ DI, R14, R14 SHRXQ DI, R14, R14
MOVQ $0x00001010, R13 SHRL $0x10, DI
BEXTRQ R13, DI, DI ADDQ CX, DI
ADDQ CX, DI
// Load ctx.mlTable // Load ctx.mlTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -3728,10 +3705,9 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
MOVQ (CX)(DI*8), DI MOVQ (CX)(DI*8), DI
// Update Literal Length State // Update Literal Length State
BZHIQ SI, R14, CX BZHIQ SI, R14, CX
MOVQ $0x00001010, R13 SHRL $0x10, SI
BEXTRQ R13, SI, SI ADDQ CX, SI
ADDQ CX, SI
// Load ctx.llTable // Load ctx.llTable
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX

4
vendor/modules.txt vendored
View File

@ -482,8 +482,8 @@ github.com/josharian/intern
# github.com/json-iterator/go v1.1.12 # github.com/json-iterator/go v1.1.12
## explicit; go 1.12 ## explicit; go 1.12
github.com/json-iterator/go github.com/json-iterator/go
# github.com/klauspost/compress v1.17.4 # github.com/klauspost/compress v1.17.9
## explicit; go 1.19 ## explicit; go 1.20
github.com/klauspost/compress github.com/klauspost/compress
github.com/klauspost/compress/fse github.com/klauspost/compress/fse
github.com/klauspost/compress/huff0 github.com/klauspost/compress/huff0