blob: 587b8cc02def8dd65bfea87438189692deb31464 [file] [log] [blame]
khenaidooab1f7bd2019-11-14 14:00:27 -05001package bbolt
2
3import (
4 "fmt"
5 "sort"
6 "unsafe"
7)
8
9// txPending holds a list of pgids and corresponding allocation txns
10// that are pending to be freed.
11type txPending struct {
12 ids []pgid
13 alloctx []txid // txids allocating the ids
14 lastReleaseBegin txid // beginning txid of last matching releaseRange
15}
16
17// pidSet holds the set of starting pgids which have the same span size
18type pidSet map[pgid]struct{}
19
20// freelist represents a list of all pages that are available for allocation.
21// It also tracks pages that have been freed but are still in use by open transactions.
22type freelist struct {
23 freelistType FreelistType // freelist type
24 ids []pgid // all free and available free page ids.
25 allocs map[pgid]txid // mapping of txid that allocated a pgid.
26 pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
27 cache map[pgid]bool // fast lookup of all free and pending page ids.
28 freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
29 forwardMap map[pgid]uint64 // key is start pgid, value is its span size
30 backwardMap map[pgid]uint64 // key is end pgid, value is its span size
31 allocate func(txid txid, n int) pgid // the freelist allocate func
32 free_count func() int // the function which gives you free page number
33 mergeSpans func(ids pgids) // the mergeSpan func
34 getFreePageIDs func() []pgid // get free pgids func
35 readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist
36}
37
38// newFreelist returns an empty, initialized freelist.
39func newFreelist(freelistType FreelistType) *freelist {
40 f := &freelist{
41 freelistType: freelistType,
42 allocs: make(map[pgid]txid),
43 pending: make(map[txid]*txPending),
44 cache: make(map[pgid]bool),
45 freemaps: make(map[uint64]pidSet),
46 forwardMap: make(map[pgid]uint64),
47 backwardMap: make(map[pgid]uint64),
48 }
49
50 if freelistType == FreelistMapType {
51 f.allocate = f.hashmapAllocate
52 f.free_count = f.hashmapFreeCount
53 f.mergeSpans = f.hashmapMergeSpans
54 f.getFreePageIDs = f.hashmapGetFreePageIDs
55 f.readIDs = f.hashmapReadIDs
56 } else {
57 f.allocate = f.arrayAllocate
58 f.free_count = f.arrayFreeCount
59 f.mergeSpans = f.arrayMergeSpans
60 f.getFreePageIDs = f.arrayGetFreePageIDs
61 f.readIDs = f.arrayReadIDs
62 }
63
64 return f
65}
66
67// size returns the size of the page after serialization.
68func (f *freelist) size() int {
69 n := f.count()
70 if n >= 0xFFFF {
71 // The first element will be used to store the count. See freelist.write.
72 n++
73 }
74 return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
75}
76
77// count returns count of pages on the freelist
78func (f *freelist) count() int {
79 return f.free_count() + f.pending_count()
80}
81
82// arrayFreeCount returns count of free pages(array version)
83func (f *freelist) arrayFreeCount() int {
84 return len(f.ids)
85}
86
87// pending_count returns count of pending pages
88func (f *freelist) pending_count() int {
89 var count int
90 for _, txp := range f.pending {
91 count += len(txp.ids)
92 }
93 return count
94}
95
96// copyall copies into dst a list of all free ids and all pending ids in one sorted list.
97// f.count returns the minimum length required for dst.
98func (f *freelist) copyall(dst []pgid) {
99 m := make(pgids, 0, f.pending_count())
100 for _, txp := range f.pending {
101 m = append(m, txp.ids...)
102 }
103 sort.Sort(m)
104 mergepgids(dst, f.getFreePageIDs(), m)
105}
106
107// arrayAllocate returns the starting page id of a contiguous list of pages of a given size.
108// If a contiguous block cannot be found then 0 is returned.
109func (f *freelist) arrayAllocate(txid txid, n int) pgid {
110 if len(f.ids) == 0 {
111 return 0
112 }
113
114 var initial, previd pgid
115 for i, id := range f.ids {
116 if id <= 1 {
117 panic(fmt.Sprintf("invalid page allocation: %d", id))
118 }
119
120 // Reset initial page if this is not contiguous.
121 if previd == 0 || id-previd != 1 {
122 initial = id
123 }
124
125 // If we found a contiguous block then remove it and return it.
126 if (id-initial)+1 == pgid(n) {
127 // If we're allocating off the beginning then take the fast path
128 // and just adjust the existing slice. This will use extra memory
129 // temporarily but the append() in free() will realloc the slice
130 // as is necessary.
131 if (i + 1) == n {
132 f.ids = f.ids[i+1:]
133 } else {
134 copy(f.ids[i-n+1:], f.ids[i+1:])
135 f.ids = f.ids[:len(f.ids)-n]
136 }
137
138 // Remove from the free cache.
139 for i := pgid(0); i < pgid(n); i++ {
140 delete(f.cache, initial+i)
141 }
142 f.allocs[initial] = txid
143 return initial
144 }
145
146 previd = id
147 }
148 return 0
149}
150
151// free releases a page and its overflow for a given transaction id.
152// If the page is already free then a panic will occur.
153func (f *freelist) free(txid txid, p *page) {
154 if p.id <= 1 {
155 panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
156 }
157
158 // Free page and all its overflow pages.
159 txp := f.pending[txid]
160 if txp == nil {
161 txp = &txPending{}
162 f.pending[txid] = txp
163 }
164 allocTxid, ok := f.allocs[p.id]
165 if ok {
166 delete(f.allocs, p.id)
167 } else if (p.flags & freelistPageFlag) != 0 {
168 // Freelist is always allocated by prior tx.
169 allocTxid = txid - 1
170 }
171
172 for id := p.id; id <= p.id+pgid(p.overflow); id++ {
173 // Verify that page is not already free.
174 if f.cache[id] {
175 panic(fmt.Sprintf("page %d already freed", id))
176 }
177 // Add to the freelist and cache.
178 txp.ids = append(txp.ids, id)
179 txp.alloctx = append(txp.alloctx, allocTxid)
180 f.cache[id] = true
181 }
182}
183
184// release moves all page ids for a transaction id (or older) to the freelist.
185func (f *freelist) release(txid txid) {
186 m := make(pgids, 0)
187 for tid, txp := range f.pending {
188 if tid <= txid {
189 // Move transaction's pending pages to the available freelist.
190 // Don't remove from the cache since the page is still free.
191 m = append(m, txp.ids...)
192 delete(f.pending, tid)
193 }
194 }
195 f.mergeSpans(m)
196}
197
198// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
199func (f *freelist) releaseRange(begin, end txid) {
200 if begin > end {
201 return
202 }
203 var m pgids
204 for tid, txp := range f.pending {
205 if tid < begin || tid > end {
206 continue
207 }
208 // Don't recompute freed pages if ranges haven't updated.
209 if txp.lastReleaseBegin == begin {
210 continue
211 }
212 for i := 0; i < len(txp.ids); i++ {
213 if atx := txp.alloctx[i]; atx < begin || atx > end {
214 continue
215 }
216 m = append(m, txp.ids[i])
217 txp.ids[i] = txp.ids[len(txp.ids)-1]
218 txp.ids = txp.ids[:len(txp.ids)-1]
219 txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
220 txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
221 i--
222 }
223 txp.lastReleaseBegin = begin
224 if len(txp.ids) == 0 {
225 delete(f.pending, tid)
226 }
227 }
228 f.mergeSpans(m)
229}
230
231// rollback removes the pages from a given pending tx.
232func (f *freelist) rollback(txid txid) {
233 // Remove page ids from cache.
234 txp := f.pending[txid]
235 if txp == nil {
236 return
237 }
238 var m pgids
239 for i, pgid := range txp.ids {
240 delete(f.cache, pgid)
241 tx := txp.alloctx[i]
242 if tx == 0 {
243 continue
244 }
245 if tx != txid {
246 // Pending free aborted; restore page back to alloc list.
247 f.allocs[pgid] = tx
248 } else {
249 // Freed page was allocated by this txn; OK to throw away.
250 m = append(m, pgid)
251 }
252 }
253 // Remove pages from pending list and mark as free if allocated by txid.
254 delete(f.pending, txid)
255 f.mergeSpans(m)
256}
257
258// freed returns whether a given page is in the free list.
259func (f *freelist) freed(pgid pgid) bool {
260 return f.cache[pgid]
261}
262
263// read initializes the freelist from a freelist page.
264func (f *freelist) read(p *page) {
265 if (p.flags & freelistPageFlag) == 0 {
266 panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
267 }
268 // If the page.count is at the max uint16 value (64k) then it's considered
269 // an overflow and the size of the freelist is stored as the first element.
270 idx, count := 0, int(p.count)
271 if count == 0xFFFF {
272 idx = 1
273 count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
274 }
275
276 // Copy the list of page ids from the freelist.
277 if count == 0 {
278 f.ids = nil
279 } else {
280 ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
281
282 // copy the ids, so we don't modify on the freelist page directly
283 idsCopy := make([]pgid, count)
284 copy(idsCopy, ids)
285 // Make sure they're sorted.
286 sort.Sort(pgids(idsCopy))
287
288 f.readIDs(idsCopy)
289 }
290}
291
292// arrayReadIDs initializes the freelist from a given list of ids.
293func (f *freelist) arrayReadIDs(ids []pgid) {
294 f.ids = ids
295 f.reindex()
296}
297
298func (f *freelist) arrayGetFreePageIDs() []pgid {
299 return f.ids
300}
301
302// write writes the page ids onto a freelist page. All free and pending ids are
303// saved to disk since in the event of a program crash, all pending ids will
304// become free.
305func (f *freelist) write(p *page) error {
306 // Combine the old free pgids and pgids waiting on an open transaction.
307
308 // Update the header flag.
309 p.flags |= freelistPageFlag
310
311 // The page.count can only hold up to 64k elements so if we overflow that
312 // number then we handle it by putting the size in the first element.
313 lenids := f.count()
314 if lenids == 0 {
315 p.count = uint16(lenids)
316 } else if lenids < 0xFFFF {
317 p.count = uint16(lenids)
318 f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
319 } else {
320 p.count = 0xFFFF
321 ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
322 f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
323 }
324
325 return nil
326}
327
328// reload reads the freelist from a page and filters out pending items.
329func (f *freelist) reload(p *page) {
330 f.read(p)
331
332 // Build a cache of only pending pages.
333 pcache := make(map[pgid]bool)
334 for _, txp := range f.pending {
335 for _, pendingID := range txp.ids {
336 pcache[pendingID] = true
337 }
338 }
339
340 // Check each page in the freelist and build a new available freelist
341 // with any pages not in the pending lists.
342 var a []pgid
343 for _, id := range f.getFreePageIDs() {
344 if !pcache[id] {
345 a = append(a, id)
346 }
347 }
348
349 f.readIDs(a)
350}
351
352// noSyncReload reads the freelist from pgids and filters out pending items.
353func (f *freelist) noSyncReload(pgids []pgid) {
354 // Build a cache of only pending pages.
355 pcache := make(map[pgid]bool)
356 for _, txp := range f.pending {
357 for _, pendingID := range txp.ids {
358 pcache[pendingID] = true
359 }
360 }
361
362 // Check each page in the freelist and build a new available freelist
363 // with any pages not in the pending lists.
364 var a []pgid
365 for _, id := range pgids {
366 if !pcache[id] {
367 a = append(a, id)
368 }
369 }
370
371 f.readIDs(a)
372}
373
374// reindex rebuilds the free cache based on available and pending free lists.
375func (f *freelist) reindex() {
376 ids := f.getFreePageIDs()
377 f.cache = make(map[pgid]bool, len(ids))
378 for _, id := range ids {
379 f.cache[id] = true
380 }
381 for _, txp := range f.pending {
382 for _, pendingID := range txp.ids {
383 f.cache[pendingID] = true
384 }
385 }
386}
387
388// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array
389func (f *freelist) arrayMergeSpans(ids pgids) {
390 sort.Sort(ids)
391 f.ids = pgids(f.ids).merge(ids)
392}