package ebpf

import (
	"bytes"
	"errors"
	"fmt"
	"io"
	"math/rand"
	"os"
	"path/filepath"
	"reflect"
	"slices"
	"strings"
	"sync"
	"time"
	"unsafe"

	"github.com/cilium/ebpf/btf"
	"github.com/cilium/ebpf/internal"
	"github.com/cilium/ebpf/internal/sys"
	"github.com/cilium/ebpf/internal/sysenc"
	"github.com/cilium/ebpf/internal/unix"
)

// Errors returned by Map and MapIterator methods.
var (
	ErrKeyNotExist      = errors.New("key does not exist")
	ErrKeyExist         = errors.New("key already exists")
	ErrIterationAborted = errors.New("iteration aborted")
	ErrMapIncompatible  = errors.New("map spec is incompatible with existing map")
	errMapNoBTFValue    = errors.New("map spec does not contain a BTF Value")

	// pre-allocating these errors here since they may get called in hot code paths
	// and cause unnecessary memory allocations
	errMapLookupKeyNotExist = fmt.Errorf("lookup: %w", sysErrKeyNotExist)
)

// MapOptions control loading a map into the kernel.
type MapOptions struct {
	// The base path to pin maps in if requested via PinByName.
	// Existing maps will be re-used if they are compatible, otherwise an
	// error is returned.
	PinPath        string
	LoadPinOptions LoadPinOptions
}

// MapID represents the unique ID of an eBPF map
type MapID uint32

// MapSpec defines a Map.
type MapSpec struct {
	// Name is passed to the kernel as a debug aid. Must only contain
	// alpha numeric and '_' characters.
	Name       string
	Type       MapType
	KeySize    uint32
	ValueSize  uint32
	MaxEntries uint32

	// Flags is passed to the kernel and specifies additional map
	// creation attributes.
	Flags uint32

	// Automatically pin and load a map from MapOptions.PinPath.
	// Generates an error if an existing pinned map is incompatible with the MapSpec.
	Pinning PinType

	// Specify numa node during map creation
	// (effective only if sys.BPF_F_NUMA_NODE flag is set,
	// which can be imported from golang.org/x/sys/unix)
	NumaNode uint32

	// The initial contents of the map. May be nil.
	Contents []MapKV

	// InnerMap is used as a template for ArrayOfMaps and HashOfMaps
	InnerMap *MapSpec

	// Extra trailing bytes found in the ELF map definition when using structs
	// larger than libbpf's bpf_map_def. nil if no trailing bytes were present.
	// Must be nil or empty before instantiating the MapSpec into a Map.
	Extra *bytes.Reader

	// The key and value type of this map. May be nil.
	Key, Value btf.Type
}

func (ms *MapSpec) String() string {
	return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags)
}

// Copy returns a copy of the spec.
//
// MapSpec.Contents is a shallow copy.
func (ms *MapSpec) Copy() *MapSpec {
	if ms == nil {
		return nil
	}

	cpy := *ms
	cpy.Contents = slices.Clone(cpy.Contents)
	cpy.Key = btf.Copy(cpy.Key)
	cpy.Value = btf.Copy(cpy.Value)

	if cpy.InnerMap == ms {
		cpy.InnerMap = &cpy
	} else {
		cpy.InnerMap = ms.InnerMap.Copy()
	}

	if cpy.Extra != nil {
		extra := *cpy.Extra
		cpy.Extra = &extra
	}

	return &cpy
}

// fixupMagicFields fills fields of MapSpec which are usually
// left empty in ELF or which depend on runtime information.
//
// The method doesn't modify Spec, instead returning a copy.
// The copy is only performed if fixups are necessary, so callers mustn't mutate
// the returned spec.
func (spec *MapSpec) fixupMagicFields() (*MapSpec, error) {
	switch spec.Type {
	case ArrayOfMaps, HashOfMaps:
		if spec.ValueSize != 0 && spec.ValueSize != 4 {
			return nil, errors.New("ValueSize must be zero or four for map of map")
		}

		spec = spec.Copy()
		spec.ValueSize = 4

	case PerfEventArray:
		if spec.KeySize != 0 && spec.KeySize != 4 {
			return nil, errors.New("KeySize must be zero or four for perf event array")
		}

		if spec.ValueSize != 0 && spec.ValueSize != 4 {
			return nil, errors.New("ValueSize must be zero or four for perf event array")
		}

		spec = spec.Copy()
		spec.KeySize = 4
		spec.ValueSize = 4

		n, err := PossibleCPU()
		if err != nil {
			return nil, fmt.Errorf("fixup perf event array: %w", err)
		}

		if n := uint32(n); spec.MaxEntries == 0 || spec.MaxEntries > n {
			// MaxEntries should be zero most of the time, but there is code
			// out there which hardcodes large constants. Clamp the number
			// of entries to the number of CPUs at most. Allow creating maps with
			// less than n items since some kernel selftests relied on this
			// behaviour in the past.
			spec.MaxEntries = n
		}

	case CPUMap:
		n, err := PossibleCPU()
		if err != nil {
			return nil, fmt.Errorf("fixup cpu map: %w", err)
		}

		if n := uint32(n); spec.MaxEntries == 0 || spec.MaxEntries > n {
			// Perform clamping similar to PerfEventArray.
			spec.MaxEntries = n
		}
	}

	return spec, nil
}

// dataSection returns the contents and BTF Datasec descriptor of the spec.
func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) {
	if ms.Value == nil {
		return nil, nil, errMapNoBTFValue
	}

	ds, ok := ms.Value.(*btf.Datasec)
	if !ok {
		return nil, nil, fmt.Errorf("map value BTF is a %T, not a *btf.Datasec", ms.Value)
	}

	if n := len(ms.Contents); n != 1 {
		return nil, nil, fmt.Errorf("expected one key, found %d", n)
	}

	kv := ms.Contents[0]
	value, ok := kv.Value.([]byte)
	if !ok {
		return nil, nil, fmt.Errorf("value at first map key is %T, not []byte", kv.Value)
	}

	return value, ds, nil
}

func (ms *MapSpec) readOnly() bool {
	return (ms.Flags & sys.BPF_F_RDONLY_PROG) > 0
}

func (ms *MapSpec) writeOnly() bool {
	return (ms.Flags & sys.BPF_F_WRONLY_PROG) > 0
}

// MapKV is used to initialize the contents of a Map.
type MapKV struct {
	Key   interface{}
	Value interface{}
}

// Compatible returns nil if an existing map may be used instead of creating
// one from the spec.
//
// Returns an error wrapping [ErrMapIncompatible] otherwise.
func (ms *MapSpec) Compatible(m *Map) error {
	ms, err := ms.fixupMagicFields()
	if err != nil {
		return err
	}

	diffs := []string{}
	if m.typ != ms.Type {
		diffs = append(diffs, fmt.Sprintf("Type: %s changed to %s", m.typ, ms.Type))
	}
	if m.keySize != ms.KeySize {
		diffs = append(diffs, fmt.Sprintf("KeySize: %d changed to %d", m.keySize, ms.KeySize))
	}
	if m.valueSize != ms.ValueSize {
		diffs = append(diffs, fmt.Sprintf("ValueSize: %d changed to %d", m.valueSize, ms.ValueSize))
	}
	if m.maxEntries != ms.MaxEntries {
		diffs = append(diffs, fmt.Sprintf("MaxEntries: %d changed to %d", m.maxEntries, ms.MaxEntries))
	}

	// BPF_F_RDONLY_PROG is set unconditionally for devmaps. Explicitly allow this
	// mismatch.
	if !((ms.Type == DevMap || ms.Type == DevMapHash) && m.flags^ms.Flags == sys.BPF_F_RDONLY_PROG) &&
		m.flags != ms.Flags {
		diffs = append(diffs, fmt.Sprintf("Flags: %d changed to %d", m.flags, ms.Flags))
	}

	if len(diffs) == 0 {
		return nil
	}

	return fmt.Errorf("%s: %w", strings.Join(diffs, ", "), ErrMapIncompatible)
}

// Map represents a Map file descriptor.
//
// It is not safe to close a map which is used by other goroutines.
//
// Methods which take interface{} arguments by default encode
// them using binary.Read/Write in the machine's native endianness.
//
// Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler
// if you require custom encoding.
type Map struct {
	name       string
	fd         *sys.FD
	typ        MapType
	keySize    uint32
	valueSize  uint32
	maxEntries uint32
	flags      uint32
	pinnedPath string
	// Per CPU maps return values larger than the size in the spec
	fullValueSize int

	memory *Memory
}

// NewMapFromFD creates a map from a raw fd.
//
// You should not use fd after calling this function.
func NewMapFromFD(fd int) (*Map, error) {
	f, err := sys.NewFD(fd)
	if err != nil {
		return nil, err
	}

	return newMapFromFD(f)
}

func newMapFromFD(fd *sys.FD) (*Map, error) {
	info, err := newMapInfoFromFd(fd)
	if err != nil {
		fd.Close()
		return nil, fmt.Errorf("get map info: %w", err)
	}

	return newMap(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags)
}

// NewMap creates a new Map.
//
// It's equivalent to calling NewMapWithOptions with default options.
func NewMap(spec *MapSpec) (*Map, error) {
	return NewMapWithOptions(spec, MapOptions{})
}

// NewMapWithOptions creates a new Map.
//
// Creating a map for the first time will perform feature detection
// by creating small, temporary maps.
//
// The caller is responsible for ensuring the process' rlimit is set
// sufficiently high for locking memory during map creation. This can be done
// by calling rlimit.RemoveMemlock() prior to calling NewMapWithOptions.
//
// May return an error wrapping ErrMapIncompatible.
func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) {
	m, err := newMapWithOptions(spec, opts)
	if err != nil {
		return nil, fmt.Errorf("creating map: %w", err)
	}

	if err := m.finalize(spec); err != nil {
		m.Close()
		return nil, fmt.Errorf("populating map: %w", err)
	}

	return m, nil
}

func newMapWithOptions(spec *MapSpec, opts MapOptions) (_ *Map, err error) {
	closeOnError := func(c io.Closer) {
		if err != nil {
			c.Close()
		}
	}

	switch spec.Pinning {
	case PinByName:
		if spec.Name == "" {
			return nil, fmt.Errorf("pin by name: missing Name")
		}

		if opts.PinPath == "" {
			return nil, fmt.Errorf("pin by name: missing MapOptions.PinPath")
		}

		path := filepath.Join(opts.PinPath, spec.Name)
		m, err := LoadPinnedMap(path, &opts.LoadPinOptions)
		if errors.Is(err, unix.ENOENT) {
			break
		}
		if err != nil {
			return nil, fmt.Errorf("load pinned map: %w", err)
		}
		defer closeOnError(m)

		if err := spec.Compatible(m); err != nil {
			return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err)
		}

		return m, nil

	case PinNone:
		// Nothing to do here

	default:
		return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported)
	}

	var innerFd *sys.FD
	if spec.Type == ArrayOfMaps || spec.Type == HashOfMaps {
		if spec.InnerMap == nil {
			return nil, fmt.Errorf("%s requires InnerMap", spec.Type)
		}

		if spec.InnerMap.Pinning != PinNone {
			return nil, errors.New("inner maps cannot be pinned")
		}

		template, err := spec.InnerMap.createMap(nil)
		if err != nil {
			return nil, fmt.Errorf("inner map: %w", err)
		}
		defer template.Close()

		// Intentionally skip populating and freezing (finalizing)
		// the inner map template since it will be removed shortly.

		innerFd = template.fd
	}

	m, err := spec.createMap(innerFd)
	if err != nil {
		return nil, err
	}
	defer closeOnError(m)

	if spec.Pinning == PinByName {
		path := filepath.Join(opts.PinPath, spec.Name)
		if err := m.Pin(path); err != nil {
			return nil, fmt.Errorf("pin map to %s: %w", path, err)
		}
	}

	return m, nil
}

// Memory returns a memory-mapped region for the Map. The Map must have been
// created with the BPF_F_MMAPABLE flag. Repeated calls to Memory return the
// same mapping. Callers are responsible for coordinating access to Memory.
func (m *Map) Memory() (*Memory, error) {
	if m.memory != nil {
		return m.memory, nil
	}

	if m.flags&sys.BPF_F_MMAPABLE == 0 {
		return nil, fmt.Errorf("Map was not created with the BPF_F_MMAPABLE flag: %w", ErrNotSupported)
	}

	size, err := m.memorySize()
	if err != nil {
		return nil, err
	}

	mm, err := newMemory(m.FD(), size)
	if err != nil {
		return nil, fmt.Errorf("creating new Memory: %w", err)
	}

	m.memory = mm

	return mm, nil
}

func (m *Map) memorySize() (int, error) {
	switch m.Type() {
	case Array:
		// In Arrays, values are always laid out on 8-byte boundaries regardless of
		// architecture. Multiply by MaxEntries and align the result to the host's
		// page size.
		size := int(internal.Align(m.ValueSize(), 8) * m.MaxEntries())
		size = internal.Align(size, os.Getpagesize())
		return size, nil
	case Arena:
		// For Arenas, MaxEntries denotes the maximum number of pages available to
		// the arena.
		return int(m.MaxEntries()) * os.Getpagesize(), nil
	}

	return 0, fmt.Errorf("determine memory size of map type %s: %w", m.Type(), ErrNotSupported)
}

// createMap validates the spec's properties and creates the map in the kernel
// using the given opts. It does not populate or freeze the map.
func (spec *MapSpec) createMap(inner *sys.FD) (_ *Map, err error) {
	closeOnError := func(closer io.Closer) {
		if err != nil {
			closer.Close()
		}
	}

	// Kernels 4.13 through 5.4 used a struct bpf_map_def that contained
	// additional 'inner_map_idx' and later 'numa_node' fields.
	// In order to support loading these definitions, tolerate the presence of
	// extra bytes, but require them to be zeroes.
	if spec.Extra != nil {
		if _, err := io.Copy(internal.DiscardZeroes{}, spec.Extra); err != nil {
			return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map")
		}
	}

	spec, err = spec.fixupMagicFields()
	if err != nil {
		return nil, err
	}

	attr := sys.MapCreateAttr{
		MapType:    sys.MapType(spec.Type),
		KeySize:    spec.KeySize,
		ValueSize:  spec.ValueSize,
		MaxEntries: spec.MaxEntries,
		MapFlags:   spec.Flags,
		NumaNode:   spec.NumaNode,
	}

	if inner != nil {
		attr.InnerMapFd = inner.Uint()
	}

	if haveObjName() == nil {
		attr.MapName = sys.NewObjName(spec.Name)
	}

	if spec.Key != nil || spec.Value != nil {
		handle, keyTypeID, valueTypeID, err := btf.MarshalMapKV(spec.Key, spec.Value)
		if err != nil && !errors.Is(err, btf.ErrNotSupported) {
			return nil, fmt.Errorf("load BTF: %w", err)
		}

		if handle != nil {
			defer handle.Close()

			// Use BTF k/v during map creation.
			attr.BtfFd = uint32(handle.FD())
			attr.BtfKeyTypeId = keyTypeID
			attr.BtfValueTypeId = valueTypeID
		}
	}

	fd, err := sys.MapCreate(&attr)

	// Some map types don't support BTF k/v in earlier kernel versions.
	// Remove BTF metadata and retry map creation.
	if (errors.Is(err, sys.ENOTSUPP) || errors.Is(err, unix.EINVAL)) && attr.BtfFd != 0 {
		attr.BtfFd, attr.BtfKeyTypeId, attr.BtfValueTypeId = 0, 0, 0
		fd, err = sys.MapCreate(&attr)
	}
	if err != nil {
		return nil, handleMapCreateError(attr, spec, err)
	}

	defer closeOnError(fd)
	m, err := newMap(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags)
	if err != nil {
		return nil, fmt.Errorf("map create: %w", err)
	}
	return m, nil
}

func handleMapCreateError(attr sys.MapCreateAttr, spec *MapSpec, err error) error {
	if errors.Is(err, unix.EPERM) {
		return fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err)
	}
	if errors.Is(err, unix.EINVAL) && spec.MaxEntries == 0 {
		return fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err)
	}
	if errors.Is(err, unix.EINVAL) && spec.Type == UnspecifiedMap {
		return fmt.Errorf("map create: cannot use type %s", UnspecifiedMap)
	}
	if errors.Is(err, unix.EINVAL) && spec.Flags&sys.BPF_F_NO_PREALLOC > 0 {
		return fmt.Errorf("map create: %w (noPrealloc flag may be incompatible with map type %s)", err, spec.Type)
	}

	if spec.Type.canStoreMap() {
		if haveFeatErr := haveNestedMaps(); haveFeatErr != nil {
			return fmt.Errorf("map create: %w", haveFeatErr)
		}
	}

	if spec.readOnly() || spec.writeOnly() {
		if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil {
			return fmt.Errorf("map create: %w", haveFeatErr)
		}
	}
	if spec.Flags&sys.BPF_F_MMAPABLE > 0 {
		if haveFeatErr := haveMmapableMaps(); haveFeatErr != nil {
			return fmt.Errorf("map create: %w", haveFeatErr)
		}
	}
	if spec.Flags&sys.BPF_F_INNER_MAP > 0 {
		if haveFeatErr := haveInnerMaps(); haveFeatErr != nil {
			return fmt.Errorf("map create: %w", haveFeatErr)
		}
	}
	if spec.Flags&sys.BPF_F_NO_PREALLOC > 0 {
		if haveFeatErr := haveNoPreallocMaps(); haveFeatErr != nil {
			return fmt.Errorf("map create: %w", haveFeatErr)
		}
	}
	// BPF_MAP_TYPE_RINGBUF's max_entries must be a power-of-2 multiple of kernel's page size.
	if errors.Is(err, unix.EINVAL) &&
		(attr.MapType == sys.BPF_MAP_TYPE_RINGBUF || attr.MapType == sys.BPF_MAP_TYPE_USER_RINGBUF) {
		pageSize := uint32(os.Getpagesize())
		maxEntries := attr.MaxEntries
		if maxEntries%pageSize != 0 || !internal.IsPow(maxEntries) {
			return fmt.Errorf("map create: %w (ring map size %d not a multiple of page size %d)", err, maxEntries, pageSize)
		}
	}

	return fmt.Errorf("map create: %w", err)
}

// newMap allocates and returns a new Map structure.
// Sets the fullValueSize on per-CPU maps.
func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) {
	m := &Map{
		name,
		fd,
		typ,
		keySize,
		valueSize,
		maxEntries,
		flags,
		"",
		int(valueSize),
		nil,
	}

	if !typ.hasPerCPUValue() {
		return m, nil
	}

	possibleCPUs, err := PossibleCPU()
	if err != nil {
		return nil, err
	}

	m.fullValueSize = int(internal.Align(valueSize, 8)) * possibleCPUs
	return m, nil
}

func (m *Map) String() string {
	if m.name != "" {
		return fmt.Sprintf("%s(%s)#%v", m.typ, m.name, m.fd)
	}
	return fmt.Sprintf("%s#%v", m.typ, m.fd)
}

// Type returns the underlying type of the map.
func (m *Map) Type() MapType {
	return m.typ
}

// KeySize returns the size of the map key in bytes.
func (m *Map) KeySize() uint32 {
	return m.keySize
}

// ValueSize returns the size of the map value in bytes.
func (m *Map) ValueSize() uint32 {
	return m.valueSize
}

// MaxEntries returns the maximum number of elements the map can hold.
func (m *Map) MaxEntries() uint32 {
	return m.maxEntries
}

// Flags returns the flags of the map.
func (m *Map) Flags() uint32 {
	return m.flags
}

// Info returns metadata about the map. This was first introduced in Linux 4.5,
// but newer kernels support more MapInfo fields with the introduction of more
// features. See [MapInfo] and its methods for more details.
//
// Returns an error wrapping ErrNotSupported if the kernel supports neither
// BPF_OBJ_GET_INFO_BY_FD nor reading map information from /proc/self/fdinfo.
func (m *Map) Info() (*MapInfo, error) {
	return newMapInfoFromFd(m.fd)
}

// Handle returns a reference to the Map's type information in the kernel.
//
// Returns ErrNotSupported if the kernel has no BTF support, or if there is no
// BTF associated with the Map.
func (m *Map) Handle() (*btf.Handle, error) {
	info, err := m.Info()
	if err != nil {
		return nil, err
	}

	id, ok := info.BTFID()
	if !ok {
		return nil, fmt.Errorf("map %s: retrieve BTF ID: %w", m, ErrNotSupported)
	}

	return btf.NewHandleFromID(id)
}

// MapLookupFlags controls the behaviour of the map lookup calls.
type MapLookupFlags uint64

// LookupLock look up the value of a spin-locked map.
const LookupLock MapLookupFlags = sys.BPF_F_LOCK

// Lookup retrieves a value from a Map.
//
// Calls Close() on valueOut if it is of type **Map or **Program,
// and *valueOut is not nil.
//
// Returns an error if the key doesn't exist, see ErrKeyNotExist.
func (m *Map) Lookup(key, valueOut interface{}) error {
	return m.LookupWithFlags(key, valueOut, 0)
}

// LookupWithFlags retrieves a value from a Map with flags.
//
// Passing LookupLock flag will look up the value of a spin-locked
// map without returning the lock. This must be specified if the
// elements contain a spinlock.
//
// Calls Close() on valueOut if it is of type **Map or **Program,
// and *valueOut is not nil.
//
// Returns an error if the key doesn't exist, see ErrKeyNotExist.
func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
	if m.typ.hasPerCPUValue() {
		return m.lookupPerCPU(key, valueOut, flags)
	}

	valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize)
	if err := m.lookup(key, valueBytes.Pointer(), flags); err != nil {
		return err
	}

	return m.unmarshalValue(valueOut, valueBytes)
}

// LookupAndDelete retrieves and deletes a value from a Map.
//
// Returns ErrKeyNotExist if the key doesn't exist.
func (m *Map) LookupAndDelete(key, valueOut interface{}) error {
	return m.LookupAndDeleteWithFlags(key, valueOut, 0)
}

// LookupAndDeleteWithFlags retrieves and deletes a value from a Map.
//
// Passing LookupLock flag will look up and delete the value of a spin-locked
// map without returning the lock. This must be specified if the elements
// contain a spinlock.
//
// Returns ErrKeyNotExist if the key doesn't exist.
func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
	if m.typ.hasPerCPUValue() {
		return m.lookupAndDeletePerCPU(key, valueOut, flags)
	}

	valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize)
	if err := m.lookupAndDelete(key, valueBytes.Pointer(), flags); err != nil {
		return err
	}
	return m.unmarshalValue(valueOut, valueBytes)
}

// LookupBytes gets a value from Map.
//
// Returns a nil value if a key doesn't exist.
func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
	valueBytes := make([]byte, m.fullValueSize)
	valuePtr := sys.NewSlicePointer(valueBytes)

	err := m.lookup(key, valuePtr, 0)
	if errors.Is(err, ErrKeyNotExist) {
		return nil, nil
	}

	return valueBytes, err
}

func (m *Map) lookupPerCPU(key, valueOut any, flags MapLookupFlags) error {
	slice, err := ensurePerCPUSlice(valueOut)
	if err != nil {
		return err
	}
	valueBytes := make([]byte, m.fullValueSize)
	if err := m.lookup(key, sys.NewSlicePointer(valueBytes), flags); err != nil {
		return err
	}
	return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes)
}

func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error {
	keyPtr, err := m.marshalKey(key)
	if err != nil {
		return fmt.Errorf("can't marshal key: %w", err)
	}

	attr := sys.MapLookupElemAttr{
		MapFd: m.fd.Uint(),
		Key:   keyPtr,
		Value: valueOut,
		Flags: uint64(flags),
	}

	if err = sys.MapLookupElem(&attr); err != nil {
		if errors.Is(err, unix.ENOENT) {
			return errMapLookupKeyNotExist
		}
		return fmt.Errorf("lookup: %w", wrapMapError(err))
	}
	return nil
}

func (m *Map) lookupAndDeletePerCPU(key, valueOut any, flags MapLookupFlags) error {
	slice, err := ensurePerCPUSlice(valueOut)
	if err != nil {
		return err
	}
	valueBytes := make([]byte, m.fullValueSize)
	if err := m.lookupAndDelete(key, sys.NewSlicePointer(valueBytes), flags); err != nil {
		return err
	}
	return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes)
}

// ensurePerCPUSlice allocates a slice for a per-CPU value if necessary.
func ensurePerCPUSlice(sliceOrPtr any) (any, error) {
	sliceOrPtrType := reflect.TypeOf(sliceOrPtr)
	if sliceOrPtrType.Kind() == reflect.Slice {
		// The target is a slice, the caller is responsible for ensuring that
		// size is correct.
		return sliceOrPtr, nil
	}

	slicePtrType := sliceOrPtrType
	if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice {
		return nil, fmt.Errorf("per-cpu value requires a slice or a pointer to slice")
	}

	possibleCPUs, err := PossibleCPU()
	if err != nil {
		return nil, err
	}

	sliceType := slicePtrType.Elem()
	slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs)

	sliceElemType := sliceType.Elem()
	sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr
	reflect.ValueOf(sliceOrPtr).Elem().Set(slice)
	if !sliceElemIsPointer {
		return slice.Interface(), nil
	}
	sliceElemType = sliceElemType.Elem()

	for i := 0; i < possibleCPUs; i++ {
		newElem := reflect.New(sliceElemType)
		slice.Index(i).Set(newElem)
	}

	return slice.Interface(), nil
}

func (m *Map) lookupAndDelete(key any, valuePtr sys.Pointer, flags MapLookupFlags) error {
	keyPtr, err := m.marshalKey(key)
	if err != nil {
		return fmt.Errorf("can't marshal key: %w", err)
	}

	attr := sys.MapLookupAndDeleteElemAttr{
		MapFd: m.fd.Uint(),
		Key:   keyPtr,
		Value: valuePtr,
		Flags: uint64(flags),
	}

	if err := sys.MapLookupAndDeleteElem(&attr); err != nil {
		return fmt.Errorf("lookup and delete: %w", wrapMapError(err))
	}

	return nil
}

// MapUpdateFlags controls the behaviour of the Map.Update call.
//
// The exact semantics depend on the specific MapType.
type MapUpdateFlags uint64

const (
	// UpdateAny creates a new element or update an existing one.
	UpdateAny MapUpdateFlags = iota
	// UpdateNoExist creates a new element.
	UpdateNoExist MapUpdateFlags = 1 << (iota - 1)
	// UpdateExist updates an existing element.
	UpdateExist
	// UpdateLock updates elements under bpf_spin_lock.
	UpdateLock
)

// Put replaces or creates a value in map.
//
// It is equivalent to calling Update with UpdateAny.
func (m *Map) Put(key, value interface{}) error {
	return m.Update(key, value, UpdateAny)
}

// Update changes the value of a key.
func (m *Map) Update(key, value any, flags MapUpdateFlags) error {
	if m.typ.hasPerCPUValue() {
		return m.updatePerCPU(key, value, flags)
	}

	valuePtr, err := m.marshalValue(value)
	if err != nil {
		return fmt.Errorf("marshal value: %w", err)
	}

	return m.update(key, valuePtr, flags)
}

func (m *Map) updatePerCPU(key, value any, flags MapUpdateFlags) error {
	valuePtr, err := marshalPerCPUValue(value, int(m.valueSize))
	if err != nil {
		return fmt.Errorf("marshal value: %w", err)
	}

	return m.update(key, valuePtr, flags)
}

func (m *Map) update(key any, valuePtr sys.Pointer, flags MapUpdateFlags) error {
	keyPtr, err := m.marshalKey(key)
	if err != nil {
		return fmt.Errorf("marshal key: %w", err)
	}

	attr := sys.MapUpdateElemAttr{
		MapFd: m.fd.Uint(),
		Key:   keyPtr,
		Value: valuePtr,
		Flags: uint64(flags),
	}

	if err = sys.MapUpdateElem(&attr); err != nil {
		return fmt.Errorf("update: %w", wrapMapError(err))
	}

	return nil
}

// Delete removes a value.
//
// Returns ErrKeyNotExist if the key does not exist.
func (m *Map) Delete(key interface{}) error {
	keyPtr, err := m.marshalKey(key)
	if err != nil {
		return fmt.Errorf("can't marshal key: %w", err)
	}

	attr := sys.MapDeleteElemAttr{
		MapFd: m.fd.Uint(),
		Key:   keyPtr,
	}

	if err = sys.MapDeleteElem(&attr); err != nil {
		return fmt.Errorf("delete: %w", wrapMapError(err))
	}
	return nil
}

// NextKey finds the key following an initial key.
//
// See NextKeyBytes for details.
//
// Returns ErrKeyNotExist if there is no next key.
func (m *Map) NextKey(key, nextKeyOut interface{}) error {
	nextKeyBytes := makeMapSyscallOutput(nextKeyOut, int(m.keySize))

	if err := m.nextKey(key, nextKeyBytes.Pointer()); err != nil {
		return err
	}

	if err := nextKeyBytes.Unmarshal(nextKeyOut); err != nil {
		return fmt.Errorf("can't unmarshal next key: %w", err)
	}
	return nil
}

// NextKeyBytes returns the key following an initial key as a byte slice.
//
// Passing nil will return the first key.
//
// Use Iterate if you want to traverse all entries in the map.
//
// Returns nil if there are no more keys.
func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
	nextKey := make([]byte, m.keySize)
	nextKeyPtr := sys.NewSlicePointer(nextKey)

	err := m.nextKey(key, nextKeyPtr)
	if errors.Is(err, ErrKeyNotExist) {
		return nil, nil
	}

	return nextKey, err
}

func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error {
	var (
		keyPtr sys.Pointer
		err    error
	)

	if key != nil {
		keyPtr, err = m.marshalKey(key)
		if err != nil {
			return fmt.Errorf("can't marshal key: %w", err)
		}
	}

	attr := sys.MapGetNextKeyAttr{
		MapFd:   m.fd.Uint(),
		Key:     keyPtr,
		NextKey: nextKeyOut,
	}

	if err = sys.MapGetNextKey(&attr); err != nil {
		// Kernels 4.4.131 and earlier return EFAULT instead of a pointer to the
		// first map element when a nil key pointer is specified.
		if key == nil && errors.Is(err, unix.EFAULT) {
			var guessKey []byte
			guessKey, err = m.guessNonExistentKey()
			if err != nil {
				return err
			}

			// Retry the syscall with a valid non-existing key.
			attr.Key = sys.NewSlicePointer(guessKey)
			if err = sys.MapGetNextKey(&attr); err == nil {
				return nil
			}
		}

		return fmt.Errorf("next key: %w", wrapMapError(err))
	}

	return nil
}

var mmapProtectedPage = sync.OnceValues(func() ([]byte, error) {
	return unix.Mmap(-1, 0, os.Getpagesize(), unix.PROT_NONE, unix.MAP_ANON|unix.MAP_SHARED)
})

// guessNonExistentKey attempts to perform a map lookup that returns ENOENT.
// This is necessary on kernels before 4.4.132, since those don't support
// iterating maps from the start by providing an invalid key pointer.
func (m *Map) guessNonExistentKey() ([]byte, error) {
	// Map a protected page and use that as the value pointer. This saves some
	// work copying out the value, which we're not interested in.
	page, err := mmapProtectedPage()
	if err != nil {
		return nil, err
	}
	valuePtr := sys.NewSlicePointer(page)

	randKey := make([]byte, int(m.keySize))

	for i := 0; i < 4; i++ {
		switch i {
		// For hash maps, the 0 key is less likely to be occupied. They're often
		// used for storing data related to pointers, and their access pattern is
		// generally scattered across the keyspace.
		case 0:
		// An all-0xff key is guaranteed to be out of bounds of any array, since
		// those have a fixed key size of 4 bytes. The only corner case being
		// arrays with 2^32 max entries, but those are prohibitively expensive
		// in many environments.
		case 1:
			for r := range randKey {
				randKey[r] = 0xff
			}
		// Inspired by BCC, 0x55 is an alternating binary pattern (0101), so
		// is unlikely to be taken.
		case 2:
			for r := range randKey {
				randKey[r] = 0x55
			}
		// Last ditch effort, generate a random key.
		case 3:
			rand.New(rand.NewSource(time.Now().UnixNano())).Read(randKey)
		}

		err := m.lookup(randKey, valuePtr, 0)
		if errors.Is(err, ErrKeyNotExist) {
			return randKey, nil
		}
	}

	return nil, errors.New("couldn't find non-existing key")
}

// BatchLookup looks up many elements in a map at once.
//
// "keysOut" and "valuesOut" must be of type slice, a pointer
// to a slice or buffer will not work.
// "cursor" is an pointer to an opaque handle. It must be non-nil. Pass
// "cursor" to subsequent calls of this function to continue the batching
// operation in the case of chunking.
//
// Warning: This API is not very safe to use as the kernel implementation for
// batching relies on the user to be aware of subtle details with regarding to
// different map type implementations.
//
// ErrKeyNotExist is returned when the batch lookup has reached
// the end of all possible results, even when partial results
// are returned. It should be used to evaluate when lookup is "done".
func (m *Map) BatchLookup(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
	n, err := m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, cursor, keysOut, valuesOut, opts)
	if err != nil {
		return n, fmt.Errorf("map batch lookup: %w", err)
	}
	return n, nil
}

// BatchLookupAndDelete looks up many elements in a map at once,
//
// It then deletes all those elements.
// "keysOut" and "valuesOut" must be of type slice, a pointer
// to a slice or buffer will not work.
// "cursor" is an pointer to an opaque handle. It must be non-nil. Pass
// "cursor" to subsequent calls of this function to continue the batching
// operation in the case of chunking.
//
// Warning: This API is not very safe to use as the kernel implementation for
// batching relies on the user to be aware of subtle details with regarding to
// different map type implementations.
//
// ErrKeyNotExist is returned when the batch lookup has reached
// the end of all possible results, even when partial results
// are returned. It should be used to evaluate when lookup is "done".
func (m *Map) BatchLookupAndDelete(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
	n, err := m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, cursor, keysOut, valuesOut, opts)
	if err != nil {
		return n, fmt.Errorf("map batch lookup and delete: %w", err)
	}
	return n, nil
}

// MapBatchCursor represents a starting point for a batch operation.
type MapBatchCursor struct {
	m      *Map
	opaque []byte
}

func (m *Map) batchLookup(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
	if m.typ.hasPerCPUValue() {
		return m.batchLookupPerCPU(cmd, cursor, keysOut, valuesOut, opts)
	}

	count, err := batchCount(keysOut, valuesOut)
	if err != nil {
		return 0, err
	}

	valueBuf := sysenc.SyscallOutput(valuesOut, count*int(m.fullValueSize))

	n, err := m.batchLookupCmd(cmd, cursor, count, keysOut, valueBuf.Pointer(), opts)
	if errors.Is(err, unix.ENOSPC) {
		// Hash tables return ENOSPC when the size of the batch is smaller than
		// any bucket.
		return n, fmt.Errorf("%w (batch size too small?)", err)
	} else if err != nil {
		return n, err
	}

	err = valueBuf.Unmarshal(valuesOut)
	if err != nil {
		return 0, err
	}

	return n, nil
}

func (m *Map) batchLookupPerCPU(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
	count, err := sliceLen(keysOut)
	if err != nil {
		return 0, fmt.Errorf("keys: %w", err)
	}

	valueBuf := make([]byte, count*int(m.fullValueSize))
	valuePtr := sys.NewSlicePointer(valueBuf)

	n, sysErr := m.batchLookupCmd(cmd, cursor, count, keysOut, valuePtr, opts)
	if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) {
		return 0, err
	}

	err = unmarshalBatchPerCPUValue(valuesOut, count, int(m.valueSize), valueBuf)
	if err != nil {
		return 0, err
	}

	return n, sysErr
}

func (m *Map) batchLookupCmd(cmd sys.Cmd, cursor *MapBatchCursor, count int, keysOut any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) {
	cursorLen := int(m.keySize)
	if cursorLen < 4 {
		// * generic_map_lookup_batch requires that batch_out is key_size bytes.
		//   This is used by array and LPM maps.
		//
		// * __htab_map_lookup_and_delete_batch requires u32. This is used by the
		//   various hash maps.
		//
		// Use a minimum of 4 bytes to avoid having to distinguish between the two.
		cursorLen = 4
	}

	inBatch := cursor.opaque
	if inBatch == nil {
		// This is the first lookup, allocate a buffer to hold the cursor.
		cursor.opaque = make([]byte, cursorLen)
		cursor.m = m
	} else if cursor.m != m {
		// Prevent reuse of a cursor across maps. First, it's unlikely to work.
		// Second, the maps may require different cursorLen and cursor.opaque
		// may therefore be too short. This could lead to the kernel clobbering
		// user space memory.
		return 0, errors.New("a cursor may not be reused across maps")
	}

	if err := haveBatchAPI(); err != nil {
		return 0, err
	}

	keyBuf := sysenc.SyscallOutput(keysOut, count*int(m.keySize))

	attr := sys.MapLookupBatchAttr{
		MapFd:    m.fd.Uint(),
		Keys:     keyBuf.Pointer(),
		Values:   valuePtr,
		Count:    uint32(count),
		InBatch:  sys.NewSlicePointer(inBatch),
		OutBatch: sys.NewSlicePointer(cursor.opaque),
	}

	if opts != nil {
		attr.ElemFlags = opts.ElemFlags
		attr.Flags = opts.Flags
	}

	_, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
	sysErr = wrapMapError(sysErr)
	if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) {
		return 0, sysErr
	}

	if err := keyBuf.Unmarshal(keysOut); err != nil {
		return 0, err
	}

	return int(attr.Count), sysErr
}

// BatchUpdate updates the map with multiple keys and values
// simultaneously.
// "keys" and "values" must be of type slice, a pointer
// to a slice or buffer will not work.
func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) {
	if m.typ.hasPerCPUValue() {
		return m.batchUpdatePerCPU(keys, values, opts)
	}

	count, err := batchCount(keys, values)
	if err != nil {
		return 0, err
	}

	valuePtr, err := marshalMapSyscallInput(values, count*int(m.valueSize))
	if err != nil {
		return 0, err
	}

	return m.batchUpdate(count, keys, valuePtr, opts)
}

func (m *Map) batchUpdate(count int, keys any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) {
	keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize))
	if err != nil {
		return 0, err
	}

	attr := sys.MapUpdateBatchAttr{
		MapFd:  m.fd.Uint(),
		Keys:   keyPtr,
		Values: valuePtr,
		Count:  uint32(count),
	}
	if opts != nil {
		attr.ElemFlags = opts.ElemFlags
		attr.Flags = opts.Flags
	}

	err = sys.MapUpdateBatch(&attr)
	if err != nil {
		if haveFeatErr := haveBatchAPI(); haveFeatErr != nil {
			return 0, haveFeatErr
		}
		return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err))
	}

	return int(attr.Count), nil
}

func (m *Map) batchUpdatePerCPU(keys, values any, opts *BatchOptions) (int, error) {
	count, err := sliceLen(keys)
	if err != nil {
		return 0, fmt.Errorf("keys: %w", err)
	}

	valueBuf, err := marshalBatchPerCPUValue(values, count, int(m.valueSize))
	if err != nil {
		return 0, err
	}

	return m.batchUpdate(count, keys, sys.NewSlicePointer(valueBuf), opts)
}

// BatchDelete batch deletes entries in the map by keys.
// "keys" must be of type slice, a pointer to a slice or buffer will not work.
func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) {
	count, err := sliceLen(keys)
	if err != nil {
		return 0, fmt.Errorf("keys: %w", err)
	}

	keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize))
	if err != nil {
		return 0, fmt.Errorf("cannot marshal keys: %v", err)
	}

	attr := sys.MapDeleteBatchAttr{
		MapFd: m.fd.Uint(),
		Keys:  keyPtr,
		Count: uint32(count),
	}

	if opts != nil {
		attr.ElemFlags = opts.ElemFlags
		attr.Flags = opts.Flags
	}

	if err = sys.MapDeleteBatch(&attr); err != nil {
		if haveFeatErr := haveBatchAPI(); haveFeatErr != nil {
			return 0, haveFeatErr
		}
		return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err))
	}

	return int(attr.Count), nil
}

func batchCount(keys, values any) (int, error) {
	keysLen, err := sliceLen(keys)
	if err != nil {
		return 0, fmt.Errorf("keys: %w", err)
	}

	valuesLen, err := sliceLen(values)
	if err != nil {
		return 0, fmt.Errorf("values: %w", err)
	}

	if keysLen != valuesLen {
		return 0, fmt.Errorf("keys and values must have the same length")
	}

	return keysLen, nil
}

// Iterate traverses a map.
//
// It's safe to create multiple iterators at the same time.
//
// It's not possible to guarantee that all keys in a map will be
// returned if there are concurrent modifications to the map.
func (m *Map) Iterate() *MapIterator {
	return newMapIterator(m)
}

// Close the Map's underlying file descriptor, which could unload the
// Map from the kernel if it is not pinned or in use by a loaded Program.
func (m *Map) Close() error {
	if m == nil {
		// This makes it easier to clean up when iterating maps
		// of maps / programs.
		return nil
	}

	return m.fd.Close()
}

// FD gets the file descriptor of the Map.
//
// Calling this function is invalid after Close has been called.
func (m *Map) FD() int {
	return m.fd.Int()
}

// Clone creates a duplicate of the Map.
//
// Closing the duplicate does not affect the original, and vice versa.
// Changes made to the map are reflected by both instances however.
// If the original map was pinned, the cloned map will not be pinned by default.
//
// Cloning a nil Map returns nil.
func (m *Map) Clone() (*Map, error) {
	if m == nil {
		return nil, nil
	}

	dup, err := m.fd.Dup()
	if err != nil {
		return nil, fmt.Errorf("can't clone map: %w", err)
	}

	return &Map{
		m.name,
		dup,
		m.typ,
		m.keySize,
		m.valueSize,
		m.maxEntries,
		m.flags,
		"",
		m.fullValueSize,
		nil,
	}, nil
}

// Pin persists the map on the BPF virtual file system past the lifetime of
// the process that created it .
//
// Calling Pin on a previously pinned map will overwrite the path, except when
// the new path already exists. Re-pinning across filesystems is not supported.
// You can Clone a map to pin it to a different path.
//
// This requires bpffs to be mounted above fileName.
// See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd
func (m *Map) Pin(fileName string) error {
	if err := sys.Pin(m.pinnedPath, fileName, m.fd); err != nil {
		return err
	}
	m.pinnedPath = fileName
	return nil
}

// Unpin removes the persisted state for the map from the BPF virtual filesystem.
//
// Failed calls to Unpin will not alter the state returned by IsPinned.
//
// Unpinning an unpinned Map returns nil.
func (m *Map) Unpin() error {
	if err := sys.Unpin(m.pinnedPath); err != nil {
		return err
	}
	m.pinnedPath = ""
	return nil
}

// IsPinned returns true if the map has a non-empty pinned path.
func (m *Map) IsPinned() bool {
	return m.pinnedPath != ""
}

// Freeze prevents a map to be modified from user space.
//
// It makes no changes to kernel-side restrictions.
func (m *Map) Freeze() error {
	attr := sys.MapFreezeAttr{
		MapFd: m.fd.Uint(),
	}

	if err := sys.MapFreeze(&attr); err != nil {
		if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil {
			return fmt.Errorf("can't freeze map: %w", haveFeatErr)
		}
		return fmt.Errorf("can't freeze map: %w", err)
	}
	return nil
}

// finalize populates the Map according to the Contents specified
// in spec and freezes the Map if requested by spec.
func (m *Map) finalize(spec *MapSpec) error {
	for _, kv := range spec.Contents {
		if err := m.Put(kv.Key, kv.Value); err != nil {
			return fmt.Errorf("putting value: key %v: %w", kv.Key, err)
		}
	}

	if isConstantDataSection(spec.Name) || isKconfigSection(spec.Name) {
		if err := m.Freeze(); err != nil {
			return fmt.Errorf("freezing map: %w", err)
		}
	}

	return nil
}

func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) {
	if data == nil {
		if m.keySize == 0 {
			// Queues have a key length of zero, so passing nil here is valid.
			return sys.NewPointer(nil), nil
		}
		return sys.Pointer{}, errors.New("can't use nil as key of map")
	}

	return marshalMapSyscallInput(data, int(m.keySize))
}

func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) {
	var (
		buf []byte
		err error
	)

	switch value := data.(type) {
	case *Map:
		if !m.typ.canStoreMap() {
			return sys.Pointer{}, fmt.Errorf("can't store map in %s", m.typ)
		}
		buf, err = marshalMap(value, int(m.valueSize))

	case *Program:
		if !m.typ.canStoreProgram() {
			return sys.Pointer{}, fmt.Errorf("can't store program in %s", m.typ)
		}
		buf, err = marshalProgram(value, int(m.valueSize))

	default:
		return marshalMapSyscallInput(data, int(m.valueSize))
	}

	if err != nil {
		return sys.Pointer{}, err
	}

	return sys.NewSlicePointer(buf), nil
}

func (m *Map) unmarshalValue(value any, buf sysenc.Buffer) error {
	switch value := value.(type) {
	case **Map:
		if !m.typ.canStoreMap() {
			return fmt.Errorf("can't read a map from %s", m.typ)
		}

		other, err := unmarshalMap(buf)
		if err != nil {
			return err
		}

		// The caller might close the map externally, so ignore errors.
		_ = (*value).Close()

		*value = other
		return nil

	case *Map:
		if !m.typ.canStoreMap() {
			return fmt.Errorf("can't read a map from %s", m.typ)
		}
		return errors.New("require pointer to *Map")

	case **Program:
		if !m.typ.canStoreProgram() {
			return fmt.Errorf("can't read a program from %s", m.typ)
		}

		other, err := unmarshalProgram(buf)
		if err != nil {
			return err
		}

		// The caller might close the program externally, so ignore errors.
		_ = (*value).Close()

		*value = other
		return nil

	case *Program:
		if !m.typ.canStoreProgram() {
			return fmt.Errorf("can't read a program from %s", m.typ)
		}
		return errors.New("require pointer to *Program")
	}

	return buf.Unmarshal(value)
}

// LoadPinnedMap opens a Map from a pin (file) on the BPF virtual filesystem.
//
// Requires at least Linux 4.5.
func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) {
	fd, typ, err := sys.ObjGetTyped(&sys.ObjGetAttr{
		Pathname:  sys.NewStringPointer(fileName),
		FileFlags: opts.Marshal(),
	})
	if err != nil {
		return nil, err
	}

	if typ != sys.BPF_TYPE_MAP {
		_ = fd.Close()
		return nil, fmt.Errorf("%s is not a Map", fileName)
	}

	m, err := newMapFromFD(fd)
	if err == nil {
		m.pinnedPath = fileName
	}

	return m, err
}

// unmarshalMap creates a map from a map ID encoded in host endianness.
func unmarshalMap(buf sysenc.Buffer) (*Map, error) {
	var id uint32
	if err := buf.Unmarshal(&id); err != nil {
		return nil, err
	}
	return NewMapFromID(MapID(id))
}

// marshalMap marshals the fd of a map into a buffer in host endianness.
func marshalMap(m *Map, length int) ([]byte, error) {
	if m == nil {
		return nil, errors.New("can't marshal a nil Map")
	}

	if length != 4 {
		return nil, fmt.Errorf("can't marshal map to %d bytes", length)
	}

	buf := make([]byte, 4)
	internal.NativeEndian.PutUint32(buf, m.fd.Uint())
	return buf, nil
}

// MapIterator iterates a Map.
//
// See Map.Iterate.
type MapIterator struct {
	target *Map
	// Temporary storage to avoid allocations in Next(). This is any instead
	// of []byte to avoid allocations.
	cursor            any
	count, maxEntries uint32
	done              bool
	err               error
}

func newMapIterator(target *Map) *MapIterator {
	return &MapIterator{
		target:     target,
		maxEntries: target.maxEntries,
	}
}

// Next decodes the next key and value.
//
// Iterating a hash map from which keys are being deleted is not
// safe. You may see the same key multiple times. Iteration may
// also abort with an error, see IsIterationAborted.
//
// Returns false if there are no more entries. You must check
// the result of Err afterwards.
//
// See Map.Get for further caveats around valueOut.
func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
	if mi.err != nil || mi.done {
		return false
	}

	// For array-like maps NextKey returns nil only after maxEntries
	// iterations.
	for mi.count <= mi.maxEntries {
		if mi.cursor == nil {
			// Pass nil interface to NextKey to make sure the Map's first key
			// is returned. If we pass an uninitialized []byte instead, it'll see a
			// non-nil interface and try to marshal it.
			mi.cursor = make([]byte, mi.target.keySize)
			mi.err = mi.target.NextKey(nil, mi.cursor)
		} else {
			mi.err = mi.target.NextKey(mi.cursor, mi.cursor)
		}

		if errors.Is(mi.err, ErrKeyNotExist) {
			mi.done = true
			mi.err = nil
			return false
		} else if mi.err != nil {
			mi.err = fmt.Errorf("get next key: %w", mi.err)
			return false
		}

		mi.count++
		mi.err = mi.target.Lookup(mi.cursor, valueOut)
		if errors.Is(mi.err, ErrKeyNotExist) {
			// Even though the key should be valid, we couldn't look up
			// its value. If we're iterating a hash map this is probably
			// because a concurrent delete removed the value before we
			// could get it. This means that the next call to NextKeyBytes
			// is very likely to restart iteration.
			// If we're iterating one of the fd maps like
			// ProgramArray it means that a given slot doesn't have
			// a valid fd associated. It's OK to continue to the next slot.
			continue
		}
		if mi.err != nil {
			mi.err = fmt.Errorf("look up next key: %w", mi.err)
			return false
		}

		buf := mi.cursor.([]byte)
		if ptr, ok := keyOut.(unsafe.Pointer); ok {
			copy(unsafe.Slice((*byte)(ptr), len(buf)), buf)
		} else {
			mi.err = sysenc.Unmarshal(keyOut, buf)
		}

		return mi.err == nil
	}

	mi.err = fmt.Errorf("%w", ErrIterationAborted)
	return false
}

// Err returns any encountered error.
//
// The method must be called after Next returns nil.
//
// Returns ErrIterationAborted if it wasn't possible to do a full iteration.
func (mi *MapIterator) Err() error {
	return mi.err
}

// MapGetNextID returns the ID of the next eBPF map.
//
// Returns ErrNotExist, if there is no next eBPF map.
func MapGetNextID(startID MapID) (MapID, error) {
	attr := &sys.MapGetNextIdAttr{Id: uint32(startID)}
	return MapID(attr.NextId), sys.MapGetNextId(attr)
}

// NewMapFromID returns the map for a given id.
//
// Returns ErrNotExist, if there is no eBPF map with the given id.
func NewMapFromID(id MapID) (*Map, error) {
	fd, err := sys.MapGetFdById(&sys.MapGetFdByIdAttr{
		Id: uint32(id),
	})
	if err != nil {
		return nil, err
	}

	return newMapFromFD(fd)
}

// sliceLen returns the length if the value is a slice or an error otherwise.
func sliceLen(slice any) (int, error) {
	sliceValue := reflect.ValueOf(slice)
	if sliceValue.Kind() != reflect.Slice {
		return 0, fmt.Errorf("%T is not a slice", slice)
	}
	return sliceValue.Len(), nil
}
