Module org.apache.lucene.codecs
Class SimpleTextBKDWriter
- java.lang.Object
-
- org.apache.lucene.codecs.simpletext.SimpleTextBKDWriter
-
- All Implemented Interfaces:
java.io.Closeable
,java.lang.AutoCloseable
final class SimpleTextBKDWriter extends java.lang.Object implements java.io.Closeable
Forked fromBKDWriter
and simplified/specialized for SimpleText's usage
-
-
Nested Class Summary
Nested Classes Modifier and Type Class Description private class
SimpleTextBKDWriter.OneDimensionBKDWriter
-
Field Summary
Fields Modifier and Type Field Description static java.lang.String
CODEC_NAME
(package private) int[]
commonPrefixLengths
protected BKDConfig
config
How many dimensions we are storing at the leaf (data) nodesstatic float
DEFAULT_MAX_MB_SORT_IN_HEAP
Default maximum heap to use, before spilling to (slower) diskprotected FixedBitSet
docsSeen
private boolean
finished
private int
maxDoc
(package private) double
maxMBSortInHeap
protected byte[]
maxPackedValue
Maximum per-dim values, packedprivate int
maxPointsSortInHeap
protected byte[]
minPackedValue
Minimum per-dim values, packedprotected long
pointCount
private PointWriter
pointWriter
(package private) BytesRefBuilder
scratch
(package private) byte[]
scratch1
(package private) byte[]
scratch2
(package private) BytesRef
scratchBytesRef1
(package private) BytesRef
scratchBytesRef2
(package private) byte[]
scratchDiff
(package private) TrackingDirectoryWrapper
tempDir
(package private) java.lang.String
tempFileNamePrefix
private IndexOutput
tempInput
private long
totalPointCount
An upper bound on how many points the caller will add (includes deletions)static int
VERSION_COMPRESSED_DOC_IDS
static int
VERSION_COMPRESSED_VALUES
static int
VERSION_CURRENT
static int
VERSION_IMPLICIT_SPLIT_DIM_1D
static int
VERSION_START
-
Constructor Summary
Constructors Constructor Description SimpleTextBKDWriter(int maxDoc, Directory tempDir, java.lang.String tempFileNamePrefix, BKDConfig config, double maxMBSortInHeap, long totalPointCount)
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description void
add(byte[] packedValue, int docID)
private void
build(int nodeID, int leafNodeOffset, MutablePointTree reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds)
private void
build(int nodeID, int leafNodeOffset, BKDRadixSelector.PathSlice points, IndexOutput out, BKDRadixSelector radixSelector, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds)
The array (sized numDims) of PathSlice describe the cell we have currently recursed to.private void
checkMaxLeafNodeCount(int numLeaves)
void
close()
private void
computeCommonPrefixLength(HeapPointWriter heapPointWriter, byte[] commonPrefix)
long
finish(IndexOutput out)
Writes the BKD tree to the providedIndexOutput
and returns the file offset where index was written.long
getPointCount()
How many points have been added so farprivate void
newline(IndexOutput out)
private void
rotateToTree(int nodeID, int offset, int count, byte[] index, java.util.List<byte[]> leafBlockStartValues)
protected int
split(byte[] minPackedValue, byte[] maxPackedValue)
private HeapPointWriter
switchToHeap(PointWriter source)
Pull a partition back into heap once the point count is low enough while recursing.private boolean
valueInBounds(BytesRef packedValue, byte[] minPackedValue, byte[] maxPackedValue)
Called only in assertprivate boolean
valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset, int doc, int lastDoc)
private boolean
valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue, java.util.function.IntFunction<BytesRef> values, int[] docs, int docsOffset)
private java.lang.Error
verifyChecksum(java.lang.Throwable priorException, PointWriter writer)
Called on exception, to check whether the checksum is also corrupt in this source, and add that information (checksum matched or didn't) as a suppressed exception.static void
verifyParams(double maxMBSortInHeap, long totalPointCount)
private void
write(IndexOutput out, java.lang.String s)
private void
write(IndexOutput out, BytesRef b)
long
writeField(IndexOutput out, java.lang.String fieldName, MutablePointTree reader)
Write a field from aMutablePointTree
.private long
writeField1Dim(IndexOutput out, java.lang.String fieldName, MutablePointTree reader)
private long
writeFieldNDims(IndexOutput out, java.lang.String fieldName, MutablePointTree values)
private void
writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues, int maxPointsInLeafNode)
Subclass can change how it writes the index.private void
writeInt(IndexOutput out, int x)
protected void
writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count)
protected void
writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, java.util.function.IntFunction<BytesRef> packedValues)
private void
writeLong(IndexOutput out, long x)
-
-
-
Field Detail
-
CODEC_NAME
public static final java.lang.String CODEC_NAME
- See Also:
- Constant Field Values
-
VERSION_START
public static final int VERSION_START
- See Also:
- Constant Field Values
-
VERSION_COMPRESSED_DOC_IDS
public static final int VERSION_COMPRESSED_DOC_IDS
- See Also:
- Constant Field Values
-
VERSION_COMPRESSED_VALUES
public static final int VERSION_COMPRESSED_VALUES
- See Also:
- Constant Field Values
-
VERSION_IMPLICIT_SPLIT_DIM_1D
public static final int VERSION_IMPLICIT_SPLIT_DIM_1D
- See Also:
- Constant Field Values
-
VERSION_CURRENT
public static final int VERSION_CURRENT
- See Also:
- Constant Field Values
-
DEFAULT_MAX_MB_SORT_IN_HEAP
public static final float DEFAULT_MAX_MB_SORT_IN_HEAP
Default maximum heap to use, before spilling to (slower) disk- See Also:
- Constant Field Values
-
config
protected final BKDConfig config
How many dimensions we are storing at the leaf (data) nodes
-
scratch
final BytesRefBuilder scratch
-
tempDir
final TrackingDirectoryWrapper tempDir
-
tempFileNamePrefix
final java.lang.String tempFileNamePrefix
-
maxMBSortInHeap
final double maxMBSortInHeap
-
scratchDiff
final byte[] scratchDiff
-
scratch1
final byte[] scratch1
-
scratch2
final byte[] scratch2
-
scratchBytesRef1
final BytesRef scratchBytesRef1
-
scratchBytesRef2
final BytesRef scratchBytesRef2
-
commonPrefixLengths
final int[] commonPrefixLengths
-
docsSeen
protected final FixedBitSet docsSeen
-
pointWriter
private PointWriter pointWriter
-
finished
private boolean finished
-
tempInput
private IndexOutput tempInput
-
maxPointsSortInHeap
private final int maxPointsSortInHeap
-
minPackedValue
protected final byte[] minPackedValue
Minimum per-dim values, packed
-
maxPackedValue
protected final byte[] maxPackedValue
Maximum per-dim values, packed
-
pointCount
protected long pointCount
-
totalPointCount
private final long totalPointCount
An upper bound on how many points the caller will add (includes deletions)
-
maxDoc
private final int maxDoc
-
-
Method Detail
-
verifyParams
public static void verifyParams(double maxMBSortInHeap, long totalPointCount)
-
add
public void add(byte[] packedValue, int docID) throws java.io.IOException
- Throws:
java.io.IOException
-
getPointCount
public long getPointCount()
How many points have been added so far
-
writeField
public long writeField(IndexOutput out, java.lang.String fieldName, MutablePointTree reader) throws java.io.IOException
Write a field from aMutablePointTree
. This way of writing points is faster than regular writes withBKDWriter.add(byte[], int)
since there is opportunity for reordering points before writing them to disk. This method does not use transient disk in order to reorder points.- Throws:
java.io.IOException
-
writeFieldNDims
private long writeFieldNDims(IndexOutput out, java.lang.String fieldName, MutablePointTree values) throws java.io.IOException
- Throws:
java.io.IOException
-
writeField1Dim
private long writeField1Dim(IndexOutput out, java.lang.String fieldName, MutablePointTree reader) throws java.io.IOException
- Throws:
java.io.IOException
-
rotateToTree
private void rotateToTree(int nodeID, int offset, int count, byte[] index, java.util.List<byte[]> leafBlockStartValues)
-
checkMaxLeafNodeCount
private void checkMaxLeafNodeCount(int numLeaves)
-
finish
public long finish(IndexOutput out) throws java.io.IOException
Writes the BKD tree to the providedIndexOutput
and returns the file offset where index was written.- Throws:
java.io.IOException
-
writeIndex
private void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues, int maxPointsInLeafNode) throws java.io.IOException
Subclass can change how it writes the index.- Throws:
java.io.IOException
-
writeLeafBlockDocs
protected void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws java.io.IOException
- Throws:
java.io.IOException
-
writeLeafBlockPackedValues
protected void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, java.util.function.IntFunction<BytesRef> packedValues) throws java.io.IOException
- Throws:
java.io.IOException
-
close
public void close() throws java.io.IOException
- Specified by:
close
in interfacejava.lang.AutoCloseable
- Specified by:
close
in interfacejava.io.Closeable
- Throws:
java.io.IOException
-
verifyChecksum
private java.lang.Error verifyChecksum(java.lang.Throwable priorException, PointWriter writer) throws java.io.IOException
Called on exception, to check whether the checksum is also corrupt in this source, and add that information (checksum matched or didn't) as a suppressed exception.- Throws:
java.io.IOException
-
valueInBounds
private boolean valueInBounds(BytesRef packedValue, byte[] minPackedValue, byte[] maxPackedValue)
Called only in assert
-
split
protected int split(byte[] minPackedValue, byte[] maxPackedValue)
-
switchToHeap
private HeapPointWriter switchToHeap(PointWriter source) throws java.io.IOException
Pull a partition back into heap once the point count is low enough while recursing.- Throws:
java.io.IOException
-
build
private void build(int nodeID, int leafNodeOffset, MutablePointTree reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws java.io.IOException
- Throws:
java.io.IOException
-
build
private void build(int nodeID, int leafNodeOffset, BKDRadixSelector.PathSlice points, IndexOutput out, BKDRadixSelector radixSelector, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws java.io.IOException
The array (sized numDims) of PathSlice describe the cell we have currently recursed to.- Throws:
java.io.IOException
-
computeCommonPrefixLength
private void computeCommonPrefixLength(HeapPointWriter heapPointWriter, byte[] commonPrefix)
-
valuesInOrderAndBounds
private boolean valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue, java.util.function.IntFunction<BytesRef> values, int[] docs, int docsOffset) throws java.io.IOException
- Throws:
java.io.IOException
-
valueInOrder
private boolean valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset, int doc, int lastDoc)
-
write
private void write(IndexOutput out, java.lang.String s) throws java.io.IOException
- Throws:
java.io.IOException
-
writeInt
private void writeInt(IndexOutput out, int x) throws java.io.IOException
- Throws:
java.io.IOException
-
writeLong
private void writeLong(IndexOutput out, long x) throws java.io.IOException
- Throws:
java.io.IOException
-
write
private void write(IndexOutput out, BytesRef b) throws java.io.IOException
- Throws:
java.io.IOException
-
newline
private void newline(IndexOutput out) throws java.io.IOException
- Throws:
java.io.IOException
-
-