2018-05-19 14:49:46 +02:00
|
|
|
// Copyright (c) 2017 Couchbase, Inc.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package zap
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
2022-01-01 16:26:27 +08:00
|
|
|
"fmt"
|
|
|
|
"io"
|
2018-05-19 14:49:46 +02:00
|
|
|
"math"
|
|
|
|
"os"
|
2020-05-10 07:40:54 +02:00
|
|
|
|
2021-02-28 18:08:33 -05:00
|
|
|
"github.com/blevesearch/vellum"
|
2018-05-19 14:49:46 +02:00
|
|
|
)
|
|
|
|
|
2019-02-18 08:50:26 +08:00
|
|
|
const Version uint32 = 11
|
|
|
|
|
|
|
|
const Type string = "zap"
|
2018-05-19 14:49:46 +02:00
|
|
|
|
|
|
|
const fieldNotUninverted = math.MaxUint64
|
|
|
|
|
2020-05-10 07:40:54 +02:00
|
|
|
func (sb *SegmentBase) Persist(path string) error {
|
|
|
|
return PersistSegmentBase(sb, path)
|
|
|
|
}
|
|
|
|
|
2022-01-01 16:26:27 +08:00
|
|
|
// WriteTo is an implementation of io.WriterTo interface.
|
|
|
|
func (sb *SegmentBase) WriteTo(w io.Writer) (int64, error) {
|
|
|
|
if w == nil {
|
|
|
|
return 0, fmt.Errorf("invalid writer found")
|
|
|
|
}
|
|
|
|
|
|
|
|
n, err := persistSegmentBaseToWriter(sb, w)
|
|
|
|
return int64(n), err
|
|
|
|
}
|
|
|
|
|
2018-05-19 14:49:46 +02:00
|
|
|
// PersistSegmentBase persists SegmentBase in the zap file format.
|
|
|
|
func PersistSegmentBase(sb *SegmentBase, path string) error {
|
|
|
|
flag := os.O_RDWR | os.O_CREATE
|
|
|
|
|
|
|
|
f, err := os.OpenFile(path, flag, 0600)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup := func() {
|
|
|
|
_ = f.Close()
|
|
|
|
_ = os.Remove(path)
|
|
|
|
}
|
|
|
|
|
2022-01-01 16:26:27 +08:00
|
|
|
_, err = persistSegmentBaseToWriter(sb, f)
|
2018-05-19 14:49:46 +02:00
|
|
|
if err != nil {
|
|
|
|
cleanup()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-01-01 16:26:27 +08:00
|
|
|
err = f.Sync()
|
2018-05-19 14:49:46 +02:00
|
|
|
if err != nil {
|
|
|
|
cleanup()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-01-01 16:26:27 +08:00
|
|
|
err = f.Close()
|
2018-05-19 14:49:46 +02:00
|
|
|
if err != nil {
|
|
|
|
cleanup()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-01-01 16:26:27 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
type bufWriter struct {
|
|
|
|
w *bufio.Writer
|
|
|
|
n int
|
|
|
|
}
|
|
|
|
|
|
|
|
func (br *bufWriter) Write(in []byte) (int, error) {
|
|
|
|
n, err := br.w.Write(in)
|
|
|
|
br.n += n
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
func persistSegmentBaseToWriter(sb *SegmentBase, w io.Writer) (int, error) {
|
|
|
|
br := &bufWriter{w: bufio.NewWriter(w)}
|
|
|
|
|
|
|
|
_, err := br.Write(sb.mem)
|
2018-05-19 14:49:46 +02:00
|
|
|
if err != nil {
|
2022-01-01 16:26:27 +08:00
|
|
|
return 0, err
|
2018-05-19 14:49:46 +02:00
|
|
|
}
|
|
|
|
|
2022-01-01 16:26:27 +08:00
|
|
|
err = persistFooter(sb.numDocs, sb.storedIndexOffset, sb.fieldsIndexOffset,
|
|
|
|
sb.docValueOffset, sb.chunkFactor, sb.memCRC, br)
|
2018-05-19 14:49:46 +02:00
|
|
|
if err != nil {
|
2022-01-01 16:26:27 +08:00
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
err = br.w.Flush()
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
2018-05-19 14:49:46 +02:00
|
|
|
}
|
|
|
|
|
2022-01-01 16:26:27 +08:00
|
|
|
return br.n, nil
|
2018-05-19 14:49:46 +02:00
|
|
|
}
|
|
|
|
|
2018-11-11 00:55:36 +01:00
|
|
|
func persistStoredFieldValues(fieldID int,
|
|
|
|
storedFieldValues [][]byte, stf []byte, spf [][]uint64,
|
2019-02-18 08:50:26 +08:00
|
|
|
curr int, metaEncode varintEncoder, data []byte) (
|
2018-11-11 00:55:36 +01:00
|
|
|
int, []byte, error) {
|
|
|
|
for i := 0; i < len(storedFieldValues); i++ {
|
|
|
|
// encode field
|
2019-02-18 08:50:26 +08:00
|
|
|
_, err := metaEncode(uint64(fieldID))
|
2018-11-11 00:55:36 +01:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, err
|
|
|
|
}
|
|
|
|
// encode type
|
2019-02-18 08:50:26 +08:00
|
|
|
_, err = metaEncode(uint64(stf[i]))
|
2018-11-11 00:55:36 +01:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, err
|
|
|
|
}
|
|
|
|
// encode start offset
|
2019-02-18 08:50:26 +08:00
|
|
|
_, err = metaEncode(uint64(curr))
|
2018-11-11 00:55:36 +01:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, err
|
|
|
|
}
|
|
|
|
// end len
|
2019-02-18 08:50:26 +08:00
|
|
|
_, err = metaEncode(uint64(len(storedFieldValues[i])))
|
2018-11-11 00:55:36 +01:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, err
|
|
|
|
}
|
|
|
|
// encode number of array pos
|
2019-02-18 08:50:26 +08:00
|
|
|
_, err = metaEncode(uint64(len(spf[i])))
|
2018-11-11 00:55:36 +01:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, err
|
|
|
|
}
|
|
|
|
// encode all array positions
|
|
|
|
for _, pos := range spf[i] {
|
2019-02-18 08:50:26 +08:00
|
|
|
_, err = metaEncode(pos)
|
2018-11-11 00:55:36 +01:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
data = append(data, storedFieldValues[i]...)
|
|
|
|
curr += len(storedFieldValues[i])
|
|
|
|
}
|
|
|
|
|
|
|
|
return curr, data, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
|
|
|
|
fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
|
|
|
|
storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,
|
|
|
|
dictLocs []uint64) (*SegmentBase, error) {
|
2018-05-19 14:49:46 +02:00
|
|
|
sb := &SegmentBase{
|
2018-11-11 00:55:36 +01:00
|
|
|
mem: mem,
|
|
|
|
memCRC: memCRC,
|
2018-05-19 14:49:46 +02:00
|
|
|
chunkFactor: chunkFactor,
|
2018-11-11 00:55:36 +01:00
|
|
|
fieldsMap: fieldsMap,
|
|
|
|
fieldsInv: fieldsInv,
|
2018-05-19 14:49:46 +02:00
|
|
|
numDocs: numDocs,
|
|
|
|
storedIndexOffset: storedIndexOffset,
|
|
|
|
fieldsIndexOffset: fieldsIndexOffset,
|
|
|
|
docValueOffset: docValueOffset,
|
|
|
|
dictLocs: dictLocs,
|
2019-02-18 08:50:26 +08:00
|
|
|
fieldDvReaders: make(map[uint16]*docValueReader),
|
2019-11-27 17:23:33 +08:00
|
|
|
fieldFSTs: make(map[uint16]*vellum.FST),
|
2018-05-19 14:49:46 +02:00
|
|
|
}
|
2019-02-18 08:50:26 +08:00
|
|
|
sb.updateSize()
|
2018-05-19 14:49:46 +02:00
|
|
|
|
2019-02-18 08:50:26 +08:00
|
|
|
err := sb.loadDvReaders()
|
2018-05-19 14:49:46 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return sb, nil
|
|
|
|
}
|