blob: 1223493ca7be8197f6666bf777d10cb1b023cde7 [file] [log] [blame]
khenaidooffe076b2019-01-15 16:08:08 -05001package bolt
2
3import (
4 "errors"
5 "fmt"
6 "hash/fnv"
7 "log"
8 "os"
9 "runtime"
10 "runtime/debug"
11 "strings"
12 "sync"
13 "time"
14 "unsafe"
15)
16
17// The largest step that can be taken when remapping the mmap.
18const maxMmapStep = 1 << 30 // 1GB
19
20// The data file format version.
21const version = 2
22
23// Represents a marker value to indicate that a file is a Bolt DB.
24const magic uint32 = 0xED0CDAED
25
26// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
27// syncing changes to a file. This is required as some operating systems,
28// such as OpenBSD, do not have a unified buffer cache (UBC) and writes
29// must be synchronized using the msync(2) syscall.
30const IgnoreNoSync = runtime.GOOS == "openbsd"
31
32// Default values if not set in a DB instance.
33const (
34 DefaultMaxBatchSize int = 1000
35 DefaultMaxBatchDelay = 10 * time.Millisecond
36 DefaultAllocSize = 16 * 1024 * 1024
37)
38
39// default page size for db is set to the OS page size.
40var defaultPageSize = os.Getpagesize()
41
42// DB represents a collection of buckets persisted to a file on disk.
43// All data access is performed through transactions which can be obtained through the DB.
44// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
45type DB struct {
46 // When enabled, the database will perform a Check() after every commit.
47 // A panic is issued if the database is in an inconsistent state. This
48 // flag has a large performance impact so it should only be used for
49 // debugging purposes.
50 StrictMode bool
51
52 // Setting the NoSync flag will cause the database to skip fsync()
53 // calls after each commit. This can be useful when bulk loading data
54 // into a database and you can restart the bulk load in the event of
55 // a system failure or database corruption. Do not set this flag for
56 // normal use.
57 //
58 // If the package global IgnoreNoSync constant is true, this value is
59 // ignored. See the comment on that constant for more details.
60 //
61 // THIS IS UNSAFE. PLEASE USE WITH CAUTION.
62 NoSync bool
63
64 // When true, skips the truncate call when growing the database.
65 // Setting this to true is only safe on non-ext3/ext4 systems.
66 // Skipping truncation avoids preallocation of hard drive space and
67 // bypasses a truncate() and fsync() syscall on remapping.
68 //
69 // https://github.com/boltdb/bolt/issues/284
70 NoGrowSync bool
71
72 // If you want to read the entire database fast, you can set MmapFlag to
73 // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
74 MmapFlags int
75
76 // MaxBatchSize is the maximum size of a batch. Default value is
77 // copied from DefaultMaxBatchSize in Open.
78 //
79 // If <=0, disables batching.
80 //
81 // Do not change concurrently with calls to Batch.
82 MaxBatchSize int
83
84 // MaxBatchDelay is the maximum delay before a batch starts.
85 // Default value is copied from DefaultMaxBatchDelay in Open.
86 //
87 // If <=0, effectively disables batching.
88 //
89 // Do not change concurrently with calls to Batch.
90 MaxBatchDelay time.Duration
91
92 // AllocSize is the amount of space allocated when the database
93 // needs to create new pages. This is done to amortize the cost
94 // of truncate() and fsync() when growing the data file.
95 AllocSize int
96
97 path string
98 file *os.File
99 lockfile *os.File // windows only
100 dataref []byte // mmap'ed readonly, write throws SEGV
101 data *[maxMapSize]byte
102 datasz int
103 filesz int // current on disk file size
104 meta0 *meta
105 meta1 *meta
106 pageSize int
107 opened bool
108 rwtx *Tx
109 txs []*Tx
110 freelist *freelist
111 stats Stats
112
113 pagePool sync.Pool
114
115 batchMu sync.Mutex
116 batch *batch
117
118 rwlock sync.Mutex // Allows only one writer at a time.
119 metalock sync.Mutex // Protects meta page access.
120 mmaplock sync.RWMutex // Protects mmap access during remapping.
121 statlock sync.RWMutex // Protects stats access.
122
123 ops struct {
124 writeAt func(b []byte, off int64) (n int, err error)
125 }
126
127 // Read only mode.
128 // When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
129 readOnly bool
130}
131
132// Path returns the path to currently open database file.
133func (db *DB) Path() string {
134 return db.path
135}
136
137// GoString returns the Go string representation of the database.
138func (db *DB) GoString() string {
139 return fmt.Sprintf("bolt.DB{path:%q}", db.path)
140}
141
142// String returns the string representation of the database.
143func (db *DB) String() string {
144 return fmt.Sprintf("DB<%q>", db.path)
145}
146
147// Open creates and opens a database at the given path.
148// If the file does not exist then it will be created automatically.
149// Passing in nil options will cause Bolt to open the database with the default options.
150func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
151 var db = &DB{opened: true}
152
153 // Set default options if no options are provided.
154 if options == nil {
155 options = DefaultOptions
156 }
157 db.NoGrowSync = options.NoGrowSync
158 db.MmapFlags = options.MmapFlags
159
160 // Set default values for later DB operations.
161 db.MaxBatchSize = DefaultMaxBatchSize
162 db.MaxBatchDelay = DefaultMaxBatchDelay
163 db.AllocSize = DefaultAllocSize
164
165 flag := os.O_RDWR
166 if options.ReadOnly {
167 flag = os.O_RDONLY
168 db.readOnly = true
169 }
170
171 // Open data file and separate sync handler for metadata writes.
172 db.path = path
173 var err error
174 if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
175 _ = db.close()
176 return nil, err
177 }
178
179 // Lock file so that other processes using Bolt in read-write mode cannot
180 // use the database at the same time. This would cause corruption since
181 // the two processes would write meta pages and free pages separately.
182 // The database file is locked exclusively (only one process can grab the lock)
183 // if !options.ReadOnly.
184 // The database file is locked using the shared lock (more than one process may
185 // hold a lock at the same time) otherwise (options.ReadOnly is set).
186 if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil {
187 _ = db.close()
188 return nil, err
189 }
190
191 // Default values for test hooks
192 db.ops.writeAt = db.file.WriteAt
193
194 // Initialize the database if it doesn't exist.
195 if info, err := db.file.Stat(); err != nil {
196 return nil, err
197 } else if info.Size() == 0 {
198 // Initialize new files with meta pages.
199 if err := db.init(); err != nil {
200 return nil, err
201 }
202 } else {
203 // Read the first meta page to determine the page size.
204 var buf [0x1000]byte
205 if _, err := db.file.ReadAt(buf[:], 0); err == nil {
206 m := db.pageInBuffer(buf[:], 0).meta()
207 if err := m.validate(); err != nil {
208 // If we can't read the page size, we can assume it's the same
209 // as the OS -- since that's how the page size was chosen in the
210 // first place.
211 //
212 // If the first page is invalid and this OS uses a different
213 // page size than what the database was created with then we
214 // are out of luck and cannot access the database.
215 db.pageSize = os.Getpagesize()
216 } else {
217 db.pageSize = int(m.pageSize)
218 }
219 }
220 }
221
222 // Initialize page pool.
223 db.pagePool = sync.Pool{
224 New: func() interface{} {
225 return make([]byte, db.pageSize)
226 },
227 }
228
229 // Memory map the data file.
230 if err := db.mmap(options.InitialMmapSize); err != nil {
231 _ = db.close()
232 return nil, err
233 }
234
235 // Read in the freelist.
236 db.freelist = newFreelist()
237 db.freelist.read(db.page(db.meta().freelist))
238
239 // Mark the database as opened and return.
240 return db, nil
241}
242
243// mmap opens the underlying memory-mapped file and initializes the meta references.
244// minsz is the minimum size that the new mmap can be.
245func (db *DB) mmap(minsz int) error {
246 db.mmaplock.Lock()
247 defer db.mmaplock.Unlock()
248
249 info, err := db.file.Stat()
250 if err != nil {
251 return fmt.Errorf("mmap stat error: %s", err)
252 } else if int(info.Size()) < db.pageSize*2 {
253 return fmt.Errorf("file size too small")
254 }
255
256 // Ensure the size is at least the minimum size.
257 var size = int(info.Size())
258 if size < minsz {
259 size = minsz
260 }
261 size, err = db.mmapSize(size)
262 if err != nil {
263 return err
264 }
265
266 // Dereference all mmap references before unmapping.
267 if db.rwtx != nil {
268 db.rwtx.root.dereference()
269 }
270
271 // Unmap existing data before continuing.
272 if err := db.munmap(); err != nil {
273 return err
274 }
275
276 // Memory-map the data file as a byte slice.
277 if err := mmap(db, size); err != nil {
278 return err
279 }
280
281 // Save references to the meta pages.
282 db.meta0 = db.page(0).meta()
283 db.meta1 = db.page(1).meta()
284
285 // Validate the meta pages. We only return an error if both meta pages fail
286 // validation, since meta0 failing validation means that it wasn't saved
287 // properly -- but we can recover using meta1. And vice-versa.
288 err0 := db.meta0.validate()
289 err1 := db.meta1.validate()
290 if err0 != nil && err1 != nil {
291 return err0
292 }
293
294 return nil
295}
296
297// munmap unmaps the data file from memory.
298func (db *DB) munmap() error {
299 if err := munmap(db); err != nil {
300 return fmt.Errorf("unmap error: " + err.Error())
301 }
302 return nil
303}
304
305// mmapSize determines the appropriate size for the mmap given the current size
306// of the database. The minimum size is 32KB and doubles until it reaches 1GB.
307// Returns an error if the new mmap size is greater than the max allowed.
308func (db *DB) mmapSize(size int) (int, error) {
309 // Double the size from 32KB until 1GB.
310 for i := uint(15); i <= 30; i++ {
311 if size <= 1<<i {
312 return 1 << i, nil
313 }
314 }
315
316 // Verify the requested size is not above the maximum allowed.
317 if size > maxMapSize {
318 return 0, fmt.Errorf("mmap too large")
319 }
320
321 // If larger than 1GB then grow by 1GB at a time.
322 sz := int64(size)
323 if remainder := sz % int64(maxMmapStep); remainder > 0 {
324 sz += int64(maxMmapStep) - remainder
325 }
326
327 // Ensure that the mmap size is a multiple of the page size.
328 // This should always be true since we're incrementing in MBs.
329 pageSize := int64(db.pageSize)
330 if (sz % pageSize) != 0 {
331 sz = ((sz / pageSize) + 1) * pageSize
332 }
333
334 // If we've exceeded the max size then only grow up to the max size.
335 if sz > maxMapSize {
336 sz = maxMapSize
337 }
338
339 return int(sz), nil
340}
341
342// init creates a new database file and initializes its meta pages.
343func (db *DB) init() error {
344 // Set the page size to the OS page size.
345 db.pageSize = os.Getpagesize()
346
347 // Create two meta pages on a buffer.
348 buf := make([]byte, db.pageSize*4)
349 for i := 0; i < 2; i++ {
350 p := db.pageInBuffer(buf[:], pgid(i))
351 p.id = pgid(i)
352 p.flags = metaPageFlag
353
354 // Initialize the meta page.
355 m := p.meta()
356 m.magic = magic
357 m.version = version
358 m.pageSize = uint32(db.pageSize)
359 m.freelist = 2
360 m.root = bucket{root: 3}
361 m.pgid = 4
362 m.txid = txid(i)
363 m.checksum = m.sum64()
364 }
365
366 // Write an empty freelist at page 3.
367 p := db.pageInBuffer(buf[:], pgid(2))
368 p.id = pgid(2)
369 p.flags = freelistPageFlag
370 p.count = 0
371
372 // Write an empty leaf page at page 4.
373 p = db.pageInBuffer(buf[:], pgid(3))
374 p.id = pgid(3)
375 p.flags = leafPageFlag
376 p.count = 0
377
378 // Write the buffer to our data file.
379 if _, err := db.ops.writeAt(buf, 0); err != nil {
380 return err
381 }
382 if err := fdatasync(db); err != nil {
383 return err
384 }
385
386 return nil
387}
388
389// Close releases all database resources.
390// All transactions must be closed before closing the database.
391func (db *DB) Close() error {
392 db.rwlock.Lock()
393 defer db.rwlock.Unlock()
394
395 db.metalock.Lock()
396 defer db.metalock.Unlock()
397
398 db.mmaplock.RLock()
399 defer db.mmaplock.RUnlock()
400
401 return db.close()
402}
403
404func (db *DB) close() error {
405 if !db.opened {
406 return nil
407 }
408
409 db.opened = false
410
411 db.freelist = nil
412
413 // Clear ops.
414 db.ops.writeAt = nil
415
416 // Close the mmap.
417 if err := db.munmap(); err != nil {
418 return err
419 }
420
421 // Close file handles.
422 if db.file != nil {
423 // No need to unlock read-only file.
424 if !db.readOnly {
425 // Unlock the file.
426 if err := funlock(db); err != nil {
427 log.Printf("bolt.Close(): funlock error: %s", err)
428 }
429 }
430
431 // Close the file descriptor.
432 if err := db.file.Close(); err != nil {
433 return fmt.Errorf("db file close: %s", err)
434 }
435 db.file = nil
436 }
437
438 db.path = ""
439 return nil
440}
441
442// Begin starts a new transaction.
443// Multiple read-only transactions can be used concurrently but only one
444// write transaction can be used at a time. Starting multiple write transactions
445// will cause the calls to block and be serialized until the current write
446// transaction finishes.
447//
448// Transactions should not be dependent on one another. Opening a read
449// transaction and a write transaction in the same goroutine can cause the
450// writer to deadlock because the database periodically needs to re-mmap itself
451// as it grows and it cannot do that while a read transaction is open.
452//
453// If a long running read transaction (for example, a snapshot transaction) is
454// needed, you might want to set DB.InitialMmapSize to a large enough value
455// to avoid potential blocking of write transaction.
456//
457// IMPORTANT: You must close read-only transactions after you are finished or
458// else the database will not reclaim old pages.
459func (db *DB) Begin(writable bool) (*Tx, error) {
460 if writable {
461 return db.beginRWTx()
462 }
463 return db.beginTx()
464}
465
466func (db *DB) beginTx() (*Tx, error) {
467 // Lock the meta pages while we initialize the transaction. We obtain
468 // the meta lock before the mmap lock because that's the order that the
469 // write transaction will obtain them.
470 db.metalock.Lock()
471
472 // Obtain a read-only lock on the mmap. When the mmap is remapped it will
473 // obtain a write lock so all transactions must finish before it can be
474 // remapped.
475 db.mmaplock.RLock()
476
477 // Exit if the database is not open yet.
478 if !db.opened {
479 db.mmaplock.RUnlock()
480 db.metalock.Unlock()
481 return nil, ErrDatabaseNotOpen
482 }
483
484 // Create a transaction associated with the database.
485 t := &Tx{}
486 t.init(db)
487
488 // Keep track of transaction until it closes.
489 db.txs = append(db.txs, t)
490 n := len(db.txs)
491
492 // Unlock the meta pages.
493 db.metalock.Unlock()
494
495 // Update the transaction stats.
496 db.statlock.Lock()
497 db.stats.TxN++
498 db.stats.OpenTxN = n
499 db.statlock.Unlock()
500
501 return t, nil
502}
503
504func (db *DB) beginRWTx() (*Tx, error) {
505 // If the database was opened with Options.ReadOnly, return an error.
506 if db.readOnly {
507 return nil, ErrDatabaseReadOnly
508 }
509
510 // Obtain writer lock. This is released by the transaction when it closes.
511 // This enforces only one writer transaction at a time.
512 db.rwlock.Lock()
513
514 // Once we have the writer lock then we can lock the meta pages so that
515 // we can set up the transaction.
516 db.metalock.Lock()
517 defer db.metalock.Unlock()
518
519 // Exit if the database is not open yet.
520 if !db.opened {
521 db.rwlock.Unlock()
522 return nil, ErrDatabaseNotOpen
523 }
524
525 // Create a transaction associated with the database.
526 t := &Tx{writable: true}
527 t.init(db)
528 db.rwtx = t
529
530 // Free any pages associated with closed read-only transactions.
531 var minid txid = 0xFFFFFFFFFFFFFFFF
532 for _, t := range db.txs {
533 if t.meta.txid < minid {
534 minid = t.meta.txid
535 }
536 }
537 if minid > 0 {
538 db.freelist.release(minid - 1)
539 }
540
541 return t, nil
542}
543
544// removeTx removes a transaction from the database.
545func (db *DB) removeTx(tx *Tx) {
546 // Release the read lock on the mmap.
547 db.mmaplock.RUnlock()
548
549 // Use the meta lock to restrict access to the DB object.
550 db.metalock.Lock()
551
552 // Remove the transaction.
553 for i, t := range db.txs {
554 if t == tx {
555 db.txs = append(db.txs[:i], db.txs[i+1:]...)
556 break
557 }
558 }
559 n := len(db.txs)
560
561 // Unlock the meta pages.
562 db.metalock.Unlock()
563
564 // Merge statistics.
565 db.statlock.Lock()
566 db.stats.OpenTxN = n
567 db.stats.TxStats.add(&tx.stats)
568 db.statlock.Unlock()
569}
570
571// Update executes a function within the context of a read-write managed transaction.
572// If no error is returned from the function then the transaction is committed.
573// If an error is returned then the entire transaction is rolled back.
574// Any error that is returned from the function or returned from the commit is
575// returned from the Update() method.
576//
577// Attempting to manually commit or rollback within the function will cause a panic.
578func (db *DB) Update(fn func(*Tx) error) error {
579 t, err := db.Begin(true)
580 if err != nil {
581 return err
582 }
583
584 // Make sure the transaction rolls back in the event of a panic.
585 defer func() {
586 if t.db != nil {
587 t.rollback()
588 }
589 }()
590
591 // Mark as a managed tx so that the inner function cannot manually commit.
592 t.managed = true
593
594 // If an error is returned from the function then rollback and return error.
595 err = fn(t)
596 t.managed = false
597 if err != nil {
598 _ = t.Rollback()
599 return err
600 }
601
602 return t.Commit()
603}
604
605// View executes a function within the context of a managed read-only transaction.
606// Any error that is returned from the function is returned from the View() method.
607//
608// Attempting to manually rollback within the function will cause a panic.
609func (db *DB) View(fn func(*Tx) error) error {
610 t, err := db.Begin(false)
611 if err != nil {
612 return err
613 }
614
615 // Make sure the transaction rolls back in the event of a panic.
616 defer func() {
617 if t.db != nil {
618 t.rollback()
619 }
620 }()
621
622 // Mark as a managed tx so that the inner function cannot manually rollback.
623 t.managed = true
624
625 // If an error is returned from the function then pass it through.
626 err = fn(t)
627 t.managed = false
628 if err != nil {
629 _ = t.Rollback()
630 return err
631 }
632
633 if err := t.Rollback(); err != nil {
634 return err
635 }
636
637 return nil
638}
639
640// Batch calls fn as part of a batch. It behaves similar to Update,
641// except:
642//
643// 1. concurrent Batch calls can be combined into a single Bolt
644// transaction.
645//
646// 2. the function passed to Batch may be called multiple times,
647// regardless of whether it returns error or not.
648//
649// This means that Batch function side effects must be idempotent and
650// take permanent effect only after a successful return is seen in
651// caller.
652//
653// The maximum batch size and delay can be adjusted with DB.MaxBatchSize
654// and DB.MaxBatchDelay, respectively.
655//
656// Batch is only useful when there are multiple goroutines calling it.
657func (db *DB) Batch(fn func(*Tx) error) error {
658 errCh := make(chan error, 1)
659
660 db.batchMu.Lock()
661 if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
662 // There is no existing batch, or the existing batch is full; start a new one.
663 db.batch = &batch{
664 db: db,
665 }
666 db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
667 }
668 db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
669 if len(db.batch.calls) >= db.MaxBatchSize {
670 // wake up batch, it's ready to run
671 go db.batch.trigger()
672 }
673 db.batchMu.Unlock()
674
675 err := <-errCh
676 if err == trySolo {
677 err = db.Update(fn)
678 }
679 return err
680}
681
682type call struct {
683 fn func(*Tx) error
684 err chan<- error
685}
686
687type batch struct {
688 db *DB
689 timer *time.Timer
690 start sync.Once
691 calls []call
692}
693
694// trigger runs the batch if it hasn't already been run.
695func (b *batch) trigger() {
696 b.start.Do(b.run)
697}
698
699// run performs the transactions in the batch and communicates results
700// back to DB.Batch.
701func (b *batch) run() {
702 b.db.batchMu.Lock()
703 b.timer.Stop()
704 // Make sure no new work is added to this batch, but don't break
705 // other batches.
706 if b.db.batch == b {
707 b.db.batch = nil
708 }
709 b.db.batchMu.Unlock()
710
711retry:
712 for len(b.calls) > 0 {
713 var failIdx = -1
714 err := b.db.Update(func(tx *Tx) error {
715 for i, c := range b.calls {
716 if err := safelyCall(c.fn, tx); err != nil {
717 failIdx = i
718 return err
719 }
720 }
721 return nil
722 })
723
724 if failIdx >= 0 {
725 // take the failing transaction out of the batch. it's
726 // safe to shorten b.calls here because db.batch no longer
727 // points to us, and we hold the mutex anyway.
728 c := b.calls[failIdx]
729 b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
730 // tell the submitter re-run it solo, continue with the rest of the batch
731 c.err <- trySolo
732 continue retry
733 }
734
735 // pass success, or bolt internal errors, to all callers
736 for _, c := range b.calls {
737 if c.err != nil {
738 c.err <- err
739 }
740 }
741 break retry
742 }
743}
744
745// trySolo is a special sentinel error value used for signaling that a
746// transaction function should be re-run. It should never be seen by
747// callers.
748var trySolo = errors.New("batch function returned an error and should be re-run solo")
749
750type panicked struct {
751 reason interface{}
752}
753
754func (p panicked) Error() string {
755 if err, ok := p.reason.(error); ok {
756 return err.Error()
757 }
758 return fmt.Sprintf("panic: %v", p.reason)
759}
760
761func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
762 defer func() {
763 if p := recover(); p != nil {
764 err = panicked{p}
765 }
766 }()
767 return fn(tx)
768}
769
770// Sync executes fdatasync() against the database file handle.
771//
772// This is not necessary under normal operation, however, if you use NoSync
773// then it allows you to force the database file to sync against the disk.
774func (db *DB) Sync() error { return fdatasync(db) }
775
776// Stats retrieves ongoing performance stats for the database.
777// This is only updated when a transaction closes.
778func (db *DB) Stats() Stats {
779 db.statlock.RLock()
780 defer db.statlock.RUnlock()
781 return db.stats
782}
783
784// This is for internal access to the raw data bytes from the C cursor, use
785// carefully, or not at all.
786func (db *DB) Info() *Info {
787 return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
788}
789
790// page retrieves a page reference from the mmap based on the current page size.
791func (db *DB) page(id pgid) *page {
792 pos := id * pgid(db.pageSize)
793 return (*page)(unsafe.Pointer(&db.data[pos]))
794}
795
796// pageInBuffer retrieves a page reference from a given byte array based on the current page size.
797func (db *DB) pageInBuffer(b []byte, id pgid) *page {
798 return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
799}
800
801// meta retrieves the current meta page reference.
802func (db *DB) meta() *meta {
803 // We have to return the meta with the highest txid which doesn't fail
804 // validation. Otherwise, we can cause errors when in fact the database is
805 // in a consistent state. metaA is the one with the higher txid.
806 metaA := db.meta0
807 metaB := db.meta1
808 if db.meta1.txid > db.meta0.txid {
809 metaA = db.meta1
810 metaB = db.meta0
811 }
812
813 // Use higher meta page if valid. Otherwise fallback to previous, if valid.
814 if err := metaA.validate(); err == nil {
815 return metaA
816 } else if err := metaB.validate(); err == nil {
817 return metaB
818 }
819
820 // This should never be reached, because both meta1 and meta0 were validated
821 // on mmap() and we do fsync() on every write.
822 panic("bolt.DB.meta(): invalid meta pages")
823}
824
825// allocate returns a contiguous block of memory starting at a given page.
826func (db *DB) allocate(count int) (*page, error) {
827 // Allocate a temporary buffer for the page.
828 var buf []byte
829 if count == 1 {
830 buf = db.pagePool.Get().([]byte)
831 } else {
832 buf = make([]byte, count*db.pageSize)
833 }
834 p := (*page)(unsafe.Pointer(&buf[0]))
835 p.overflow = uint32(count - 1)
836
837 // Use pages from the freelist if they are available.
838 if p.id = db.freelist.allocate(count); p.id != 0 {
839 return p, nil
840 }
841
842 // Resize mmap() if we're at the end.
843 p.id = db.rwtx.meta.pgid
844 var minsz = int((p.id+pgid(count))+1) * db.pageSize
845 if minsz >= db.datasz {
846 if err := db.mmap(minsz); err != nil {
847 return nil, fmt.Errorf("mmap allocate error: %s", err)
848 }
849 }
850
851 // Move the page id high water mark.
852 db.rwtx.meta.pgid += pgid(count)
853
854 return p, nil
855}
856
857// grow grows the size of the database to the given sz.
858func (db *DB) grow(sz int) error {
859 // Ignore if the new size is less than available file size.
860 if sz <= db.filesz {
861 return nil
862 }
863
864 // If the data is smaller than the alloc size then only allocate what's needed.
865 // Once it goes over the allocation size then allocate in chunks.
866 if db.datasz < db.AllocSize {
867 sz = db.datasz
868 } else {
869 sz += db.AllocSize
870 }
871
872 // Truncate and fsync to ensure file size metadata is flushed.
873 // https://github.com/boltdb/bolt/issues/284
874 if !db.NoGrowSync && !db.readOnly {
875 if runtime.GOOS != "windows" {
876 if err := db.file.Truncate(int64(sz)); err != nil {
877 return fmt.Errorf("file resize error: %s", err)
878 }
879 }
880 if err := db.file.Sync(); err != nil {
881 return fmt.Errorf("file sync error: %s", err)
882 }
883 }
884
885 db.filesz = sz
886 return nil
887}
888
889func (db *DB) IsReadOnly() bool {
890 return db.readOnly
891}
892
893// Options represents the options that can be set when opening a database.
894type Options struct {
895 // Timeout is the amount of time to wait to obtain a file lock.
896 // When set to zero it will wait indefinitely. This option is only
897 // available on Darwin and Linux.
898 Timeout time.Duration
899
900 // Sets the DB.NoGrowSync flag before memory mapping the file.
901 NoGrowSync bool
902
903 // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
904 // grab a shared lock (UNIX).
905 ReadOnly bool
906
907 // Sets the DB.MmapFlags flag before memory mapping the file.
908 MmapFlags int
909
910 // InitialMmapSize is the initial mmap size of the database
911 // in bytes. Read transactions won't block write transaction
912 // if the InitialMmapSize is large enough to hold database mmap
913 // size. (See DB.Begin for more information)
914 //
915 // If <=0, the initial map size is 0.
916 // If initialMmapSize is smaller than the previous database size,
917 // it takes no effect.
918 InitialMmapSize int
919}
920
921// DefaultOptions represent the options used if nil options are passed into Open().
922// No timeout is used which will cause Bolt to wait indefinitely for a lock.
923var DefaultOptions = &Options{
924 Timeout: 0,
925 NoGrowSync: false,
926}
927
928// Stats represents statistics about the database.
929type Stats struct {
930 // Freelist stats
931 FreePageN int // total number of free pages on the freelist
932 PendingPageN int // total number of pending pages on the freelist
933 FreeAlloc int // total bytes allocated in free pages
934 FreelistInuse int // total bytes used by the freelist
935
936 // Transaction stats
937 TxN int // total number of started read transactions
938 OpenTxN int // number of currently open read transactions
939
940 TxStats TxStats // global, ongoing stats.
941}
942
943// Sub calculates and returns the difference between two sets of database stats.
944// This is useful when obtaining stats at two different points and time and
945// you need the performance counters that occurred within that time span.
946func (s *Stats) Sub(other *Stats) Stats {
947 if other == nil {
948 return *s
949 }
950 var diff Stats
951 diff.FreePageN = s.FreePageN
952 diff.PendingPageN = s.PendingPageN
953 diff.FreeAlloc = s.FreeAlloc
954 diff.FreelistInuse = s.FreelistInuse
955 diff.TxN = other.TxN - s.TxN
956 diff.TxStats = s.TxStats.Sub(&other.TxStats)
957 return diff
958}
959
960func (s *Stats) add(other *Stats) {
961 s.TxStats.add(&other.TxStats)
962}
963
964type Info struct {
965 Data uintptr
966 PageSize int
967}
968
969type meta struct {
970 magic uint32
971 version uint32
972 pageSize uint32
973 flags uint32
974 root bucket
975 freelist pgid
976 pgid pgid
977 txid txid
978 checksum uint64
979}
980
981// validate checks the marker bytes and version of the meta page to ensure it matches this binary.
982func (m *meta) validate() error {
983 if m.magic != magic {
984 return ErrInvalid
985 } else if m.version != version {
986 return ErrVersionMismatch
987 } else if m.checksum != 0 && m.checksum != m.sum64() {
988 return ErrChecksum
989 }
990 return nil
991}
992
993// copy copies one meta object to another.
994func (m *meta) copy(dest *meta) {
995 *dest = *m
996}
997
998// write writes the meta onto a page.
999func (m *meta) write(p *page) {
1000 if m.root.root >= m.pgid {
1001 panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
1002 } else if m.freelist >= m.pgid {
1003 panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
1004 }
1005
1006 // Page id is either going to be 0 or 1 which we can determine by the transaction ID.
1007 p.id = pgid(m.txid % 2)
1008 p.flags |= metaPageFlag
1009
1010 // Calculate the checksum.
1011 m.checksum = m.sum64()
1012
1013 m.copy(p.meta())
1014}
1015
1016// generates the checksum for the meta.
1017func (m *meta) sum64() uint64 {
1018 var h = fnv.New64a()
1019 _, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
1020 return h.Sum64()
1021}
1022
1023// _assert will panic with a given formatted message if the given condition is false.
1024func _assert(condition bool, msg string, v ...interface{}) {
1025 if !condition {
1026 panic(fmt.Sprintf("assertion failed: "+msg, v...))
1027 }
1028}
1029
1030func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) }
1031func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }
1032
1033func printstack() {
1034 stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n")
1035 fmt.Fprintln(os.Stderr, stack)
1036}