2020-01-30 14:03:24 +01:00
package fs
import (
"flag"
"fmt"
"os"
2023-03-26 00:33:09 +01:00
"sync/atomic"
2020-01-30 14:03:24 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
)
2020-05-12 19:18:57 +02:00
var disableMmap = flag . Bool ( "fs.disableMmap" , is32BitPtr , "Whether to use pread() instead of mmap() for reading data files. " +
2020-07-06 13:28:28 +02:00
"By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. " +
"mmap() is usually faster for reading small data chunks than pread()" )
2020-05-12 19:18:57 +02:00
2021-02-09 15:49:01 +01:00
// Disable mmap for architectures with 32-bit pointers in order to be able to work with files exceeding 2^32 bytes.
2020-05-12 19:18:57 +02:00
const is32BitPtr = ( ^ uintptr ( 0 ) >> 32 ) == 0
2020-01-30 14:03:24 +01:00
// MustReadAtCloser is rand-access read interface.
type MustReadAtCloser interface {
// MustReadAt must read len(p) bytes from offset off to p.
MustReadAt ( p [ ] byte , off int64 )
// MustClose must close the reader.
MustClose ( )
}
// ReaderAt implements rand-access reader.
type ReaderAt struct {
2023-03-26 00:33:09 +01:00
readCalls uint64
readBytes uint64
2020-01-30 14:03:24 +01:00
f * os . File
mmapData [ ] byte
2023-03-26 00:33:09 +01:00
useLocalStats bool
2020-01-30 14:03:24 +01:00
}
// MustReadAt reads len(p) bytes at off from r.
func ( r * ReaderAt ) MustReadAt ( p [ ] byte , off int64 ) {
if len ( p ) == 0 {
return
}
2020-06-05 18:07:57 +02:00
if off < 0 {
logger . Panicf ( "off=%d cannot be negative" , off )
}
2021-02-09 15:49:01 +01:00
if len ( r . mmapData ) == 0 {
2020-01-30 14:03:24 +01:00
n , err := r . f . ReadAt ( p , off )
if err != nil {
logger . Panicf ( "FATAL: cannot read %d bytes at offset %d of file %q: %s" , len ( p ) , off , r . f . Name ( ) , err )
}
if n != len ( p ) {
logger . Panicf ( "FATAL: unexpected number of bytes read; got %d; want %d" , n , len ( p ) )
}
} else {
2020-06-05 18:07:57 +02:00
if off > int64 ( len ( r . mmapData ) - len ( p ) ) {
2020-01-30 14:03:24 +01:00
logger . Panicf ( "off=%d is out of allowed range [0...%d] for len(p)=%d" , off , len ( r . mmapData ) - len ( p ) , len ( p ) )
}
2020-06-05 18:07:57 +02:00
src := r . mmapData [ off : ]
2021-02-09 15:49:01 +01:00
// The copy() below may result in thread block as described at https://valyala.medium.com/mmap-in-go-considered-harmful-d92a25cb161d .
// But production workload proved this is OK in most cases, so use it without fear :)
copy ( p , src )
2020-01-30 14:03:24 +01:00
}
2023-03-26 00:33:09 +01:00
if r . useLocalStats {
atomic . AddUint64 ( & r . readCalls , 1 )
atomic . AddUint64 ( & r . readBytes , uint64 ( len ( p ) ) )
} else {
readCalls . Inc ( )
readBytes . Add ( len ( p ) )
}
2020-01-30 14:03:24 +01:00
}
// MustClose closes r.
func ( r * ReaderAt ) MustClose ( ) {
fname := r . f . Name ( )
if len ( r . mmapData ) > 0 {
2021-02-26 23:37:07 +01:00
if err := mUnmap ( r . mmapData [ : cap ( r . mmapData ) ] ) ; err != nil {
2020-01-30 14:03:24 +01:00
logger . Panicf ( "FATAL: cannot unmap data for file %q: %s" , fname , err )
}
r . mmapData = nil
}
MustClose ( r . f )
r . f = nil
2023-03-26 00:33:09 +01:00
if r . useLocalStats {
readCalls . Add ( int ( r . readCalls ) )
readBytes . Add ( int ( r . readBytes ) )
r . readCalls = 0
r . readBytes = 0
r . useLocalStats = false
}
2020-01-30 14:03:24 +01:00
readersCount . Dec ( )
}
2023-03-26 00:33:09 +01:00
// SetUseLocalStats switches to local stats collection instead of global stats collection.
//
// This function must be called before the first call to MustReadAt().
//
// Collecting local stats may improve performance on systems with big number of CPU cores,
// since the locally collected stats is pushed to global stats only at MustClose() call
// instead of pushing it at every MustReadAt call.
func ( r * ReaderAt ) SetUseLocalStats ( ) {
r . useLocalStats = true
}
2020-01-30 14:03:24 +01:00
// MustFadviseSequentialRead hints the OS that f is read mostly sequentially.
//
// if prefetch is set, then the OS is hinted to prefetch f data.
func ( r * ReaderAt ) MustFadviseSequentialRead ( prefetch bool ) {
if err := fadviseSequentialRead ( r . f , prefetch ) ; err != nil {
logger . Panicf ( "FATAL: error in fadviseSequentialRead(%q, %v): %s" , r . f . Name ( ) , prefetch , err )
}
}
2020-11-23 08:55:38 +01:00
// MustOpenReaderAt opens ReaderAt for reading from filename.
2020-01-30 14:03:24 +01:00
//
// MustClose must be called on the returned ReaderAt when it is no longer needed.
2020-11-23 08:55:38 +01:00
func MustOpenReaderAt ( path string ) * ReaderAt {
2020-01-30 14:03:24 +01:00
f , err := os . Open ( path )
if err != nil {
2022-12-04 07:00:20 +01:00
logger . Panicf ( "FATAL: cannot open file for reading: %s" , err )
2020-01-30 14:03:24 +01:00
}
var r ReaderAt
r . f = f
if ! * disableMmap {
2020-06-05 18:07:57 +02:00
fi , err := f . Stat ( )
if err != nil {
2020-11-23 08:55:38 +01:00
MustClose ( f )
logger . Panicf ( "FATAL: error in fstat(%q): %s" , path , err )
2020-06-05 18:07:57 +02:00
}
size := fi . Size ( )
data , err := mmapFile ( f , size )
2020-01-30 14:03:24 +01:00
if err != nil {
MustClose ( f )
2020-11-23 08:55:38 +01:00
logger . Panicf ( "FATAL: cannot mmap %q: %s" , path , err )
2020-01-30 14:03:24 +01:00
}
r . mmapData = data
}
readersCount . Inc ( )
2020-11-23 08:55:38 +01:00
return & r
2020-01-30 14:03:24 +01:00
}
var (
readCalls = metrics . NewCounter ( ` vm_fs_read_calls_total ` )
readBytes = metrics . NewCounter ( ` vm_fs_read_bytes_total ` )
readersCount = metrics . NewCounter ( ` vm_fs_readers ` )
)
2020-06-05 18:07:57 +02:00
func mmapFile ( f * os . File , size int64 ) ( [ ] byte , error ) {
2020-01-30 14:03:24 +01:00
if size == 0 {
return nil , nil
}
if size < 0 {
return nil , fmt . Errorf ( "got negative file size: %d bytes" , size )
}
if int64 ( int ( size ) ) != size {
return nil , fmt . Errorf ( "file is too big to be mmap'ed: %d bytes" , size )
}
2020-06-23 12:39:12 +02:00
// Round size to multiple of 4KB pages as `man 2 mmap` recommends.
// This may help preventing SIGBUS panic at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/581
// The SIGBUS could occur if standard copy(dst, src) function may read beyond src bounds.
sizeOrig := size
if size % 4096 != 0 {
size += 4096 - size % 4096
}
2021-02-27 00:01:47 +01:00
data , err := mmap ( int ( f . Fd ( ) ) , int ( size ) )
2020-01-30 14:03:24 +01:00
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( "cannot mmap file with size %d: %w" , size , err )
2020-01-30 14:03:24 +01:00
}
2020-06-23 12:39:12 +02:00
return data [ : sizeOrig ] , nil
2020-01-30 14:03:24 +01:00
}