@@ -10,58 +10,65 @@ import kotlin.collections.mutableListOf
1010/* * a BTree1 that uses OpenFileExtended and tracks its own tiling. */
1111internal class BTree1data (
1212 val raf : OpenFileExtended ,
13- val rootNodeAddress : Long ,
13+ rootNodeAddress : Long ,
1414 varShape : LongArray ,
1515 chunkShape : LongArray ,
1616) {
1717 val tiling = Tiling (varShape, chunkShape)
1818 val ndimStorage = chunkShape.size
19+ val rootNode: BTreeNode
1920
20- fun rootNode (): BTreeNode = BTreeNode (rootNodeAddress, null )
21+ init {
22+ rootNode = BTreeNode (rootNodeAddress, null )
23+ }
24+
25+ fun asSequence (): Sequence <Pair <Long , DataChunk >> = sequence {
26+ repeat( tiling.nelems) {
27+ // val startingIndex = tiling.orderToIndex(it.toLong())
28+ // val indexSpace = IndexSpace(startingIndex, tiling.chunk)
29+ yield (Pair (it.toLong(), findDataChunk(it) ? : missingDataChunk(it)))
30+ }
31+ }
32+
33+ internal fun findDataChunk (order : Int ): DataChunk ? {
34+ return rootNode.findDataChunk(order)
35+ }
2136
2237 // here both internal and leaf are the same structure
2338 // Btree nodes Level 1A1 - Version 1 B-trees
2439 inner class BTreeNode (val address : Long , val parent : BTreeNode ? ) {
25- val level: Int
26- val nentries: Int
27- private val leftAddress: Long
28- private val rightAddress: Long
40+ var level: Int = 0
41+ var nentries: Int = 0
2942
30- val keys = mutableListOf<LongArray >()
31- val values = mutableListOf<DataChunkIF >()
43+ val keyValues = mutableListOf<Pair <Int , DataChunk >>() // tile order to DataChunk
3244 val children = mutableListOf<BTreeNode >()
3345
46+ var lastOrder : Int = 0
47+
3448 init {
35- val state = OpenFileState (raf.getFileOffset(address), false )
36- val magic: String = raf.readString(state, 4 )
37- check(magic == " TREE" ) { " DataBTree doesnt start with TREE" }
38-
39- val type: Int = raf.readByte(state).toInt()
40- check(type == 1 ) { " DataBTree must be type 1" }
41-
42- level = raf.readByte(state).toInt() // leaf nodes are level 0
43- nentries = raf.readShort(state).toInt() // number of children to which this node points
44- leftAddress = raf.readOffset(state)
45- rightAddress = raf.readOffset(state)
46-
47- if (nentries == 0 ) {
48- val chunkSize = raf.readInt(state)
49- val filterMask = raf.readInt(state)
50- val inner = LongArray (ndimStorage) { j -> raf.readLong(state) }
51- val key = DataChunkKey (chunkSize, filterMask, inner)
52- val childPointer = raf.readAddress(state)
53- keys.add(inner)
54- values.add(DataChunkEntry1 (this , key, childPointer))
55- } else {
49+ if (address > 0 ) {
50+ val state = OpenFileState (raf.getFileOffset(address), false )
51+ val magic: String = raf.readString(state, 4 )
52+ check(magic == " TREE" ) { " DataBTree doesnt start with TREE" }
53+
54+ val type: Int = raf.readByte(state).toInt()
55+ check(type == 1 ) { " DataBTree must be type 1" }
56+
57+ level = raf.readByte(state).toInt() // leaf nodes are level 0
58+ nentries = raf.readShort(state).toInt() // number of children to which this node points
59+ val leftAddress = raf.readOffset(state)
60+ val rightAddress = raf.readOffset(state)
61+
5662 repeat(nentries) {
5763 val chunkSize = raf.readInt(state)
5864 val filterMask = raf.readInt(state)
5965 val inner = LongArray (ndimStorage) { j -> raf.readLong(state) }
60- val key = DataChunkKey (chunkSize, filterMask, inner)
66+ val order = tiling.order(inner).toInt()
67+ val key = DataChunkKey (order, chunkSize, filterMask)
6168 val childPointer = raf.readAddress(state) // 4 or 8 bytes, then add fileOffset
6269 if (level == 0 ) {
63- keys .add(inner )
64- values.add( DataChunkEntry1 ( this , key, childPointer))
70+ keyValues .add(Pair (order, DataChunk (key, childPointer)) )
71+ lastOrder = order
6572 } else {
6673 children.add(BTreeNode (childPointer, this ))
6774 }
@@ -72,44 +79,52 @@ internal class BTree1data(
7279 // but most nodes will point to less than that number of children""
7380 }
7481
75- // return only the leaf nodes, in any order
76- fun asSequence (): Sequence <Pair <Long , DataChunkIF >> = sequence {
82+ // this does not have missing data. Use iterator on the Btree1data class
83+ // return only the leaf nodes, in depth-first order
84+ fun asSequence (): Sequence <Pair <Int , DataChunkIF >> = sequence {
7785 // Handle child nodes recursively (in-order traversal)
7886 if (children.isNotEmpty()) {
7987 children.forEachIndexed { index, childNode ->
8088 yieldAll(childNode.asSequence()) // Yield all elements from the child
8189 }
8290 } else { // If it's a leaf node (no children)
83- keys.forEachIndexed { index, key ->
84- yield (tiling.order(key) to values[index]) // Yield all key-value pairs
85- }
91+ keyValues.forEach { yield (it) }
8692 }
8793 }
88- }
8994
90- data class DataChunkKey (val chunkSize : Int , val filterMask : Int , val offsets : LongArray ) {
91- override fun equals (other : Any? ): Boolean {
92- if (this == = other) return true
93- if (other !is DataChunkKey ) return false
94- if (! offsets.contentEquals(other.offsets)) return false
95- return true
95+ fun findDataChunk (wantOrder : Int ): DataChunk ? {
96+ if (children.isNotEmpty()) { // search tree; assumes that chunks are ordered
97+ children.forEach { childNode ->
98+ if (wantOrder <= childNode.lastOrder)
99+ return childNode.findDataChunk(wantOrder)
100+ }
101+ } else { // If it's a leaf node (no children)
102+ val kv = keyValues.find { it.first == wantOrder }
103+ return kv?.second
104+ }
105+ return null
96106 }
97107
98- override fun hashCode (): Int {
99- return offsets.contentHashCode()
100- }
101108 }
102109
110+ data class DataChunkKey (val order : Int , val chunkSize : Int , val filterMask : Int )
111+
103112 // childAddress = data chunk (level 1) else a child node
104- data class DataChunkEntry1 ( val parent : BTreeNode , val key : DataChunkKey , val childAddress : Long ) : DataChunkIF {
113+ inner class DataChunk ( val key : DataChunkKey , val childAddress : Long ) : DataChunkIF {
105114 override fun childAddress () = childAddress
106- override fun offsets () = key.offsets
115+ override fun offsets () = tiling.orderToIndex( key.order.toLong())
107116 override fun isMissing () = (childAddress <= 0L ) // may be 0 or -1
108117 override fun chunkSize () = key.chunkSize
109118 override fun filterMask () = key.filterMask
110119
111- override fun show (tiling : Tiling ) : String = " chunkSize=${key.chunkSize} , chunkStart=${key.offsets.contentToString()} " +
112- " , tile= ${tiling.tile(key.offsets).contentToString()} "
120+ override fun show (tiling : Tiling ) : String = " chunkSize=${key.chunkSize} , chunkStart=${offsets().contentToString()} " +
121+ " , tile= ${tiling.tile(offsets() ).contentToString()} "
122+
123+ fun show () = show(tiling)
124+ }
125+
126+ fun missingDataChunk (order : Int ) : DataChunk {
127+ return DataChunk (DataChunkKey (order, 0 , 0 ), - 1L )
113128 }
114129}
115130
0 commit comments