2020-01-30 14:03:24 +01:00
package fs
import (
"flag"
"fmt"
"os"
2024-02-06 19:40:10 +01:00
"sync"
2023-03-26 00:33:09 +01:00
"sync/atomic"
2020-01-30 14:03:24 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
)
2020-05-12 19:18:57 +02:00
var disableMmap = flag . Bool ( "fs.disableMmap" , is32BitPtr , "Whether to use pread() instead of mmap() for reading data files. " +
2023-05-10 09:50:41 +02:00
"By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. " +
2020-07-06 13:28:28 +02:00
"mmap() is usually faster for reading small data chunks than pread()" )
2020-05-12 19:18:57 +02:00
2021-02-09 15:49:01 +01:00
// Disable mmap for architectures with 32-bit pointers in order to be able to work with files exceeding 2^32 bytes.
2020-05-12 19:18:57 +02:00
const is32BitPtr = ( ^ uintptr ( 0 ) >> 32 ) == 0
2020-01-30 14:03:24 +01:00
// MustReadAtCloser is rand-access read interface.
type MustReadAtCloser interface {
2023-06-20 07:42:25 +02:00
// Path must return path for the reader (e.g. file path, url or in-memory reference)
Path ( ) string
2020-01-30 14:03:24 +01:00
// MustReadAt must read len(p) bytes from offset off to p.
MustReadAt ( p [ ] byte , off int64 )
// MustClose must close the reader.
MustClose ( )
}
// ReaderAt implements rand-access reader.
type ReaderAt struct {
2024-02-24 01:07:51 +01:00
readCalls atomic . Int64
readBytes atomic . Int64
2023-03-26 00:33:09 +01:00
2024-02-06 19:40:10 +01:00
// path contains the path to the file for reading
path string
// mr is used for lazy opening of the file at path on the first access.
mr atomic . Pointer [ mmapReader ]
mrLock sync . Mutex
2023-03-26 00:33:09 +01:00
useLocalStats bool
2020-01-30 14:03:24 +01:00
}
2023-06-20 07:42:25 +02:00
// Path returns path to r.
func ( r * ReaderAt ) Path ( ) string {
2024-02-06 19:40:10 +01:00
return r . path
2023-06-20 07:42:25 +02:00
}
2020-01-30 14:03:24 +01:00
// MustReadAt reads len(p) bytes at off from r.
func ( r * ReaderAt ) MustReadAt ( p [ ] byte , off int64 ) {
if len ( p ) == 0 {
return
}
2020-06-05 18:07:57 +02:00
if off < 0 {
2023-04-14 23:51:03 +02:00
logger . Panicf ( "BUG: off=%d cannot be negative" , off )
2020-06-05 18:07:57 +02:00
}
2024-02-06 19:40:10 +01:00
// Lazily open the file at r.path on the first access
mr := r . getMmapReader ( )
// Read len(p) bytes at offset off to p.
if len ( mr . mmapData ) == 0 {
n , err := mr . f . ReadAt ( p , off )
2020-01-30 14:03:24 +01:00
if err != nil {
2024-02-06 19:40:10 +01:00
logger . Panicf ( "FATAL: cannot read %d bytes at offset %d of file %q: %s" , len ( p ) , off , r . path , err )
2020-01-30 14:03:24 +01:00
}
if n != len ( p ) {
2024-02-06 19:40:10 +01:00
logger . Panicf ( "FATAL: unexpected number of bytes read from file %q; got %d; want %d" , r . path , n , len ( p ) )
2020-01-30 14:03:24 +01:00
}
} else {
2024-02-06 19:40:10 +01:00
if off > int64 ( len ( mr . mmapData ) - len ( p ) ) {
logger . Panicf ( "BUG: off=%d is out of allowed range [0...%d] for len(p)=%d" , off , len ( mr . mmapData ) - len ( p ) , len ( p ) )
2020-01-30 14:03:24 +01:00
}
2024-02-06 19:40:10 +01:00
src := mr . mmapData [ off : ]
2021-02-09 15:49:01 +01:00
// The copy() below may result in thread block as described at https://valyala.medium.com/mmap-in-go-considered-harmful-d92a25cb161d .
// But production workload proved this is OK in most cases, so use it without fear :)
copy ( p , src )
2020-01-30 14:03:24 +01:00
}
2023-03-26 00:33:09 +01:00
if r . useLocalStats {
2024-02-24 01:07:51 +01:00
r . readCalls . Add ( 1 )
r . readBytes . Add ( int64 ( len ( p ) ) )
2023-03-26 00:33:09 +01:00
} else {
readCalls . Inc ( )
readBytes . Add ( len ( p ) )
}
2020-01-30 14:03:24 +01:00
}
2024-02-06 19:40:10 +01:00
func ( r * ReaderAt ) getMmapReader ( ) * mmapReader {
mr := r . mr . Load ( )
if mr != nil {
return mr
}
r . mrLock . Lock ( )
mr = r . mr . Load ( )
if mr == nil {
mr = newMmapReaderFromPath ( r . path )
r . mr . Store ( mr )
}
r . mrLock . Unlock ( )
return mr
}
var (
readCalls = metrics . NewCounter ( ` vm_fs_read_calls_total ` )
readBytes = metrics . NewCounter ( ` vm_fs_read_bytes_total ` )
readersCount = metrics . NewCounter ( ` vm_fs_readers ` )
)
2020-01-30 14:03:24 +01:00
// MustClose closes r.
func ( r * ReaderAt ) MustClose ( ) {
2024-02-06 19:40:10 +01:00
mr := r . mr . Load ( )
if mr != nil {
mr . mustClose ( )
r . mr . Store ( nil )
2020-01-30 14:03:24 +01:00
}
2023-03-26 00:33:09 +01:00
if r . useLocalStats {
2024-02-24 01:07:51 +01:00
readCalls . AddInt64 ( r . readCalls . Load ( ) )
readBytes . AddInt64 ( r . readBytes . Load ( ) )
r . readCalls . Store ( 0 )
r . readBytes . Store ( 0 )
2023-03-26 00:33:09 +01:00
r . useLocalStats = false
}
2020-01-30 14:03:24 +01:00
}
2023-03-26 00:33:09 +01:00
// SetUseLocalStats switches to local stats collection instead of global stats collection.
//
// This function must be called before the first call to MustReadAt().
//
// Collecting local stats may improve performance on systems with big number of CPU cores,
// since the locally collected stats is pushed to global stats only at MustClose() call
// instead of pushing it at every MustReadAt call.
func ( r * ReaderAt ) SetUseLocalStats ( ) {
r . useLocalStats = true
}
2020-01-30 14:03:24 +01:00
// MustFadviseSequentialRead hints the OS that f is read mostly sequentially.
//
// if prefetch is set, then the OS is hinted to prefetch f data.
func ( r * ReaderAt ) MustFadviseSequentialRead ( prefetch bool ) {
2024-02-06 19:40:10 +01:00
mr := r . getMmapReader ( )
if err := fadviseSequentialRead ( mr . f , prefetch ) ; err != nil {
logger . Panicf ( "FATAL: error in fadviseSequentialRead(%q, %v): %s" , r . path , prefetch , err )
2020-01-30 14:03:24 +01:00
}
}
2024-02-01 18:09:03 +01:00
// MustOpenReaderAt opens ReaderAt for reading from the file located at path.
2020-01-30 14:03:24 +01:00
//
// MustClose must be called on the returned ReaderAt when it is no longer needed.
2020-11-23 08:55:38 +01:00
func MustOpenReaderAt ( path string ) * ReaderAt {
2024-02-06 19:40:10 +01:00
var r ReaderAt
r . path = path
return & r
2024-02-01 18:09:03 +01:00
}
// NewReaderAt returns ReaderAt for reading from f.
//
// NewReaderAt takes ownership for f, so it shouldn't be closed by the caller.
//
// MustClose must be called on the returned ReaderAt when it is no longer needed.
func NewReaderAt ( f * os . File ) * ReaderAt {
2024-02-06 19:40:10 +01:00
mr := newMmapReaderFromFile ( f )
2020-01-30 14:03:24 +01:00
var r ReaderAt
2024-02-06 19:40:10 +01:00
r . path = f . Name ( )
r . mr . Store ( mr )
return & r
}
type mmapReader struct {
f * os . File
mmapData [ ] byte
}
func newMmapReaderFromPath ( path string ) * mmapReader {
f , err := os . Open ( path )
if err != nil {
logger . Panicf ( "FATAL: cannot open file for reading: %s" , err )
}
return newMmapReaderFromFile ( f )
}
func newMmapReaderFromFile ( f * os . File ) * mmapReader {
var mmapData [ ] byte
2020-01-30 14:03:24 +01:00
if ! * disableMmap {
2020-06-05 18:07:57 +02:00
fi , err := f . Stat ( )
if err != nil {
2024-02-01 18:09:03 +01:00
path := f . Name ( )
2020-11-23 08:55:38 +01:00
MustClose ( f )
logger . Panicf ( "FATAL: error in fstat(%q): %s" , path , err )
2020-06-05 18:07:57 +02:00
}
size := fi . Size ( )
data , err := mmapFile ( f , size )
2020-01-30 14:03:24 +01:00
if err != nil {
2024-02-01 18:09:03 +01:00
path := f . Name ( )
2020-01-30 14:03:24 +01:00
MustClose ( f )
2020-11-23 08:55:38 +01:00
logger . Panicf ( "FATAL: cannot mmap %q: %s" , path , err )
2020-01-30 14:03:24 +01:00
}
2024-02-06 19:40:10 +01:00
mmapData = data
2020-01-30 14:03:24 +01:00
}
readersCount . Inc ( )
2024-02-06 19:40:10 +01:00
return & mmapReader {
f : f ,
mmapData : mmapData ,
}
2020-01-30 14:03:24 +01:00
}
2024-02-06 19:40:10 +01:00
func ( mr * mmapReader ) mustClose ( ) {
fname := mr . f . Name ( )
if len ( mr . mmapData ) > 0 {
if err := mUnmap ( mr . mmapData [ : cap ( mr . mmapData ) ] ) ; err != nil {
logger . Panicf ( "FATAL: cannot unmap data for file %q: %s" , fname , err )
}
mr . mmapData = nil
}
MustClose ( mr . f )
mr . f = nil
readersCount . Dec ( )
}
2020-01-30 14:03:24 +01:00
2020-06-05 18:07:57 +02:00
func mmapFile ( f * os . File , size int64 ) ( [ ] byte , error ) {
2020-01-30 14:03:24 +01:00
if size == 0 {
return nil , nil
}
if size < 0 {
return nil , fmt . Errorf ( "got negative file size: %d bytes" , size )
}
if int64 ( int ( size ) ) != size {
return nil , fmt . Errorf ( "file is too big to be mmap'ed: %d bytes" , size )
}
2020-06-23 12:39:12 +02:00
// Round size to multiple of 4KB pages as `man 2 mmap` recommends.
// This may help preventing SIGBUS panic at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/581
// The SIGBUS could occur if standard copy(dst, src) function may read beyond src bounds.
sizeOrig := size
if size % 4096 != 0 {
size += 4096 - size % 4096
}
2021-02-27 00:01:47 +01:00
data , err := mmap ( int ( f . Fd ( ) ) , int ( size ) )
2020-01-30 14:03:24 +01:00
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( "cannot mmap file with size %d: %w" , size , err )
2020-01-30 14:03:24 +01:00
}
2020-06-23 12:39:12 +02:00
return data [ : sizeOrig ] , nil
2020-01-30 14:03:24 +01:00
}