관리-도구
편집 파일: plugin_scheduler.py
# code for cores isolation was inspired by Tuna implementation # perf code was borrowed from kernel/tools/perf/python/twatch.py # thanks to Arnaldo Carvalho de Melo <acme@redhat.com> from . import base from .decorators import * import tuned.logs import re from subprocess import * import threading # perf is optional try: import perf except ImportError: # if perf is unavailable, it will be disabled later pass import select import tuned.consts as consts import procfs from tuned.utils.commands import commands import errno import os import collections import math # Check existence of scheduler API in os module try: os.SCHED_FIFO except AttributeError: import schedutils log = tuned.logs.get() class SchedulerParams(object): def __init__(self, cmd, cmdline = None, scheduler = None, priority = None, affinity = None, cgroup = None): self._cmd = cmd self.cmdline = cmdline self.scheduler = scheduler self.priority = priority self.affinity = affinity self.cgroup = cgroup @property def affinity(self): if self._affinity is None: return None else: return self._cmd.bitmask2cpulist(self._affinity) @affinity.setter def affinity(self, value): if value is None: self._affinity = None else: self._affinity = self._cmd.cpulist2bitmask(value) class IRQAffinities(object): def __init__(self): self.irqs = {} self.default = None # IRQs that don't support changing CPU affinity: self.unchangeable = [] class SchedulerUtils(object): """ Class encapsulating scheduler implementation in os module """ _dict_schedcfg2schedconst = { "f": "SCHED_FIFO", "b": "SCHED_BATCH", "r": "SCHED_RR", "o": "SCHED_OTHER", "i": "SCHED_IDLE", } def __init__(self): # {"f": os.SCHED_FIFO...} self._dict_schedcfg2num = dict((k, getattr(os, name)) for k, name in self._dict_schedcfg2schedconst.items()) # { os.SCHED_FIFO: "SCHED_FIFO"... } self._dict_num2schedconst = dict((getattr(os, name), name) for name in self._dict_schedcfg2schedconst.values()) def sched_cfg_to_num(self, str_scheduler): return self._dict_schedcfg2num.get(str_scheduler) # Reimplementation of schedstr from schedutils for logging purposes def sched_num_to_const(self, scheduler): return self._dict_num2schedconst.get(scheduler) def get_scheduler(self, pid): return os.sched_getscheduler(pid) def set_scheduler(self, pid, sched, prio): os.sched_setscheduler(pid, sched, os.sched_param(prio)) def get_affinity(self, pid): return os.sched_getaffinity(pid) def set_affinity(self, pid, affinity): os.sched_setaffinity(pid, affinity) def get_priority(self, pid): return os.sched_getparam(pid).sched_priority def get_priority_min(self, sched): return os.sched_get_priority_min(sched) def get_priority_max(self, sched): return os.sched_get_priority_max(sched) class SchedulerUtilsSchedutils(SchedulerUtils): """ Class encapsulating scheduler implementation in schedutils module """ def __init__(self): # { "f": schedutils.SCHED_FIFO... } self._dict_schedcfg2num = dict((k, getattr(schedutils, name)) for k, name in self._dict_schedcfg2schedconst.items()) # { schedutils.SCHED_FIFO: "SCHED_FIFO"... } self._dict_num2schedconst = dict((getattr(schedutils, name), name) for name in self._dict_schedcfg2schedconst.values()) def get_scheduler(self, pid): return schedutils.get_scheduler(pid) def set_scheduler(self, pid, sched, prio): schedutils.set_scheduler(pid, sched, prio) def get_affinity(self, pid): return schedutils.get_affinity(pid) def set_affinity(self, pid, affinity): schedutils.set_affinity(pid, affinity) def get_priority(self, pid): return schedutils.get_priority(pid) def get_priority_min(self, sched): return schedutils.get_priority_min(sched) def get_priority_max(self, sched): return schedutils.get_priority_max(sched) class SchedulerPlugin(base.Plugin): r""" Allows tuning of scheduling priorities, process/thread/IRQ affinities, and CPU isolation. To prevent processes/threads/IRQs from using certain CPUs, use the [option]`isolated_cores` option. It changes process/thread affinities, IRQs affinities and it sets `default_smp_affinity` for IRQs. The CPU affinity mask is adjusted for all processes and threads matching [option]`ps_whitelist` option subject to success of the `sched_setaffinity()` system call. The default setting of the [option]`ps_whitelist` regular expression is `.*` to match all processes and thread names. To exclude certain processes and threads use [option]`ps_blacklist` option. The value of this option is also interpreted as a regular expression and process/thread names (`ps -eo cmd`) are matched against that expression. Profile rollback allows all matching processes and threads to run on all CPUs and restores the IRQ settings prior to the profile application. Multiple regular expressions for [option]`ps_whitelist` and [option]`ps_blacklist` options are allowed and separated by `;`. Quoted semicolon `\;` is taken literally. .Isolate CPUs 2-4 ==== ---- [scheduler] isolated_cores=2-4 ps_blacklist=.*pmd.*;.*PMD.*;^DPDK;.*qemu-kvm.* ---- Isolate CPUs 2-4 while ignoring processes and threads matching `ps_blacklist` regular expressions. ==== The [option]`irq_process` option controls whether the scheduler plugin applies the `isolated_cores` parameter to IRQ affinities. The default value is `true`, which means that the scheduler plugin will move all possible IRQs away from the isolated cores. When `irq_process` is set to `false`, the plugin will not change any IRQ affinities. The [option]`default_irq_smp_affinity` option controls the values *TuneD* writes to `/proc/irq/default_smp_affinity`. The file specifies default affinity mask that applies to all non-active IRQs. Once an IRQ is allocated/activated its affinity bitmask will be set to the default mask. The following values are supported: * `calc` + The content of `/proc/irq/default_smp_affinity` will be calculated from the `isolated_cores` parameter. Non-isolated cores are calculated as an inversion of the `isolated_cores`. Then the intersection of the non-isolated cores and the previous content of `/proc/irq/default_smp_affinity` is written to `/proc/irq/default_smp_affinity`. If the intersection is an empty set, then just the non-isolated cores are written to `/proc/irq/default_smp_affinity`. This behavior is the default if the parameter `default_irq_smp_affinity` is omitted. * `ignore` + *TuneD* will not touch `/proc/irq/default_smp_affinity`. * an explicit cpulist + The cpulist (such as `1,3-4`) is unpacked and written directly to `/proc/irq/default_smp_affinity`. .An explicit CPU list to set the default IRQ smp affinity to CPUs 0 and 2 ==== ---- [scheduler] isolated_cores=1,3 default_irq_smp_affinity=0,2 ---- ==== To adjust scheduling policy, priority and affinity for a group of processes/threads, use the following syntax. [subs="quotes"] ---- group.__groupname__=__rule_prio__:__sched__:__prio__:__affinity__:__regex__ ---- Here, `__rule_prio__` defines internal *TuneD* priority of the rule. Rules are sorted based on priority. This is needed for inheritence to be able to reorder previously defined rules. Equal `__rule_prio__` rules should be processed in the order they were defined. However, this is Python interpreter dependant. To disable an inherited rule for `__groupname__` use: [subs="quotes"] ---- group.__groupname__= ---- `__sched__` must be one of: *`f`* for FIFO, *`b`* for batch, *`r`* for round robin, *`o`* for other, *`*`* do not change. `__affinity__` is CPU affinity in hexadecimal. Use `*` for no change. `__prio__` scheduling priority (see `chrt -m`). `__regex__` is Python regular expression. It is matched against the output of: [subs="quotes"] ---- ps -eo cmd ---- Any given process name may match more than one group. In such a case, the priority and scheduling policy are taken from the last matching `__regex__`. .Setting scheduling policy and priorities to kernel threads and watchdog ==== ---- [scheduler] group.kthreads=0:*:1:*:\[.*\]$ group.watchdog=0:f:99:*:\[watchdog.*\] ---- ==== The scheduler plug-in uses perf event loop to catch newly created processes. By default it listens to `perf.RECORD_COMM` and `perf.RECORD_EXIT` events. By setting [option]`perf_process_fork` option to `true`, `perf.RECORD_FORK` events will be also listened to. In other words, child processes created by the `fork()` system call will be processed. Since child processes inherit CPU affinity from their parents, the scheduler plug-in usually does not need to explicitly process these events. As processing perf events can pose a significant CPU overhead, the [option]`perf_process_fork` option parameter is set to `false` by default. Due to this, child processes are not processed by the scheduler plug-in. The CPU overhead of the scheduler plugin can be mitigated by using the scheduler [option]`runtime` option and setting it to `0`. This will completely disable the dynamic scheduler functionality and the perf events will not be monitored and acted upon. The disadvantage ot this approach is the procees/thread tuning will be done only at profile application. .Disabling the scheduler dynamic functionality ==== ---- [scheduler] runtime=0 isolated_cores=1,3 ---- ==== NOTE: For perf events, memory mapped buffer is used. Under heavy load the buffer may overflow. In such cases the `scheduler` plug-in may start missing events and failing to process some newly created processes. Increasing the buffer size may help. The buffer size can be set with the [option]`perf_mmap_pages` option. The value of this parameter has to expressed in powers of 2. If it is not the power of 2, the nearest higher power of 2 value is calculated from it and this calculated value used. If the [option]`perf_mmap_pages` option is omitted, the default kernel value is used. The scheduler plug-in supports process/thread confinement using cgroups v1. [option]`cgroup_mount_point` option specifies the path to mount the cgroup filesystem or where *TuneD* expects it to be mounted. If unset, `/sys/fs/cgroup/cpuset` is expected. If [option]`cgroup_groups_init` option is set to `1` *TuneD* will create (and remove) all cgroups defined with the `cgroup*` options. This is the default behavior. If it is set to `0` the cgroups need to be preset by other means. If [option]`cgroup_mount_point_init` option is set to `1`, *TuneD* will create (and remove) the cgroup mountpoint. It implies `cgroup_groups_init = 1`. If set to `0` the cgroups mount point needs to be preset by other means. This is the default behavior. The [option]`cgroup_for_isolated_cores` option is the cgroup name used for the [option]`isolated_cores` option functionality. For example, if a system has 4 CPUs, `isolated_cores=1` means that all processes/threads will be moved to CPUs 0,2-3. The scheduler plug-in will isolate the specified core by writing the calculated CPU affinity to the `cpuset.cpus` control file of the specified cgroup and move all the matching processes/threads to this group. If this option is unset, classic cpuset affinity using `sched_setaffinity()` will be used. [option]`cgroup.__cgroup_name__` option defines affinities for arbitrary cgroups. Even hierarchic cgroups can be used, but the hieararchy needs to be specified in the correct order. Also *TuneD* does not do any sanity checks here, with the exception that it forces the cgroup to be under [option]`cgroup_mount_point`. The syntax of the scheduler option starting with `group.` has been augmented to use `cgroup.__cgroup_name__` instead of the hexadecimal `__affinity__`. The matching processes will be moved to the cgroup `__cgroup_name__`. It is also possible to use cgroups which have not been defined by the [option]`cgroup.` option as described above, i.e. cgroups not managed by *TuneD*. All cgroup names are sanitized by replacing all all dots (`.`) with slashes (`/`). This is to prevent the plug-in from writing outside [option]`cgroup_mount_point`. .Using cgroups v1 with the scheduler plug-in ==== ---- [scheduler] cgroup_mount_point=/sys/fs/cgroup/cpuset cgroup_mount_point_init=1 cgroup_groups_init=1 cgroup_for_isolated_cores=group cgroup.group1=2 cgroup.group2=0,2 group.ksoftirqd=0:f:2:cgroup.group1:ksoftirqd.* ps_blacklist=ksoftirqd.*;rcuc.*;rcub.*;ktimersoftd.* isolated_cores=1 ---- Cgroup `group1` has the affinity set to CPU 2 and the cgroup `group2` to CPUs 0,2. Given a 4 CPU setup, the [option]`isolated_cores=1` option causes all processes/threads to be moved to CPU cores 0,2-3. Processes/threads that are blacklisted by the [option]`ps_blacklist` regular expression will not be moved. The scheduler plug-in will isolate the specified core by writing the CPU affinity 0,2-3 to the `cpuset.cpus` control file of the `group` and move all the matching processes/threads to this cgroup. ==== Option [option]`cgroup_ps_blacklist` allows excluding processes which belong to the blacklisted cgroups. The regular expression specified by this option is matched against cgroup hierarchies from `/proc/PID/cgroups`. Cgroups v1 hierarchies from `/proc/PID/cgroups` are separated by commas ',' prior to regular expression matching. The following is an example of content against which the regular expression is matched against: `10:hugetlb:/,9:perf_event:/,8:blkio:/` Multiple regular expressions can be separated by semicolon ';'. The semicolon represents a logical 'or' operator. .Cgroup-based exclusion of processes from the scheduler ==== ---- [scheduler] isolated_cores=1 cgroup_ps_blacklist=:/daemons\b ---- The scheduler plug-in will move all processes away from core 1 except processes which belong to cgroup '/daemons'. The '\b' is a regular expression metacharacter that matches a word boundary. ---- [scheduler] isolated_cores=1 cgroup_ps_blacklist=\b8:blkio: ---- The scheduler plug-in will exclude all processes which belong to a cgroup with hierarchy-ID 8 and controller-list blkio. ==== Kernels 5.13 and newer moved some `sched_` and `numa_balancing_` kernel run-time parameters from `/proc/sys/kernel`, managed by the `sysctl` utility, to `debugfs`, typically mounted under `/sys/kernel/debug`. TuneD provides an abstraction mechanism for the following parameters via the scheduler plug-in: [option]`sched_min_granularity_ns`, [option]`sched_latency_ns`, [option]`sched_wakeup_granularity_ns`, [option]`sched_tunable_scaling`, [option]`sched_migration_cost_ns`, [option]`sched_nr_migrate`, [option]`numa_balancing_scan_delay_ms`, [option]`numa_balancing_scan_period_min_ms`, [option]`numa_balancing_scan_period_max_ms` and [option]`numa_balancing_scan_size_mb`. Moreover in kernel 6.6 and newer support for the `sched_wakeup_granularity_ns` and `sched_latency_ns` were removed. The `sched_min_granularity_ns` was renamed to `sched_base_slice_ns`. Based on the kernel used, TuneD will write the specified value to the correct location or ignore it. For the compatibility the alias [option]`sched_base_slice_ns` was added, but the [option]`sched_min_granularity_ns` can be still used instead. .Set tasks' "cache hot" value for migration decisions. ==== ---- [scheduler] sched_migration_cost_ns=500000 ---- On the old kernels, this is equivalent to: ---- [sysctl] kernel.sched_migration_cost_ns=500000 ---- that is, value `500000` will be written to `/proc/sys/kernel/sched_migration_cost_ns`. However, on more recent kernels, the value `500000` will be written to `/sys/kernel/debug/sched/migration_cost_ns`. ==== """ _dict_sched_knob_map = { "wakeup_granularity_ns": "", "min_granularity_ns": "base_slice_ns", "latency_ns": "", } def __init__(self, monitor_repository, storage_factory, hardware_inventory, device_matcher, device_matcher_udev, plugin_instance_factory, global_cfg, variables): super(SchedulerPlugin, self).__init__(monitor_repository, storage_factory, hardware_inventory, device_matcher, device_matcher_udev, plugin_instance_factory, global_cfg, variables) self._has_dynamic_options = True self._daemon = consts.CFG_DEF_DAEMON self._sleep_interval = int(consts.CFG_DEF_SLEEP_INTERVAL) if global_cfg is not None: self._daemon = global_cfg.get_bool(consts.CFG_DAEMON, consts.CFG_DEF_DAEMON) self._sleep_interval = int(global_cfg.get(consts.CFG_SLEEP_INTERVAL, consts.CFG_DEF_SLEEP_INTERVAL)) self._cmd = commands() # helper variable utilized for showing hint only once that the error may be caused by Secure Boot self._secure_boot_hint = None # paths cache for sched_ and numa_ tunings self._sched_knob_paths_cache = {} # default is to whitelist all and blacklist none self._ps_whitelist = ".*" self._ps_blacklist = "" self._kthread_process = True self._cgroup_ps_blacklist_re = "" # perf is optional, if unavailable, it will be disabled later try: self._cpus = perf.cpu_map() except (NameError, AttributeError): cpus = self._cmd.read_file(consts.SYSFS_CPUS_PRESENT_PATH) # it's different type than perf.cpu_map(), but without perf we use it as iterable # which should be compatible, fallback to single core CPU if sysfs is unavailable self._cpus = self._cmd.cpulist_unpack(cpus) if cpus else [ 0 ] self._scheduler_storage_key = self._storage_key( command_name = "scheduler") self._irq_process = True self._irq_storage_key = self._storage_key( command_name = "irq") self._evlist = None try: self._scheduler_utils = SchedulerUtils() except AttributeError: self._scheduler_utils = SchedulerUtilsSchedutils() def _calc_mmap_pages(self, mmap_pages): if mmap_pages is None: return None try: mp = int(mmap_pages) except ValueError: return 0 if mp <= 0: return 0 # round up to the nearest power of two value return int(2 ** math.ceil(math.log(mp, 2))) def _instance_init(self, instance): instance._evlist = None instance._has_dynamic_tuning = False instance._has_static_tuning = True # this is hack, runtime_tuning should be covered by dynamic_tuning configuration # TODO: add per plugin dynamic tuning configuration and use dynamic_tuning configuration # instead of runtime_tuning instance._runtime_tuning = True # FIXME: do we want to do this here? # recover original values in case of crash self._scheduler_original = self._storage.get( self._scheduler_storage_key, {}) if len(self._scheduler_original) > 0: log.info("recovering scheduling settings from previous run") self._restore_ps_affinity() self._scheduler_original = {} self._storage.unset(self._scheduler_storage_key) self._cgroups_original_affinity = dict() # calculated by isolated_cores setter self._affinity = None self._cgroup_affinity_initialized = False self._cgroup = None self._cgroups = collections.OrderedDict([(self._sanitize_cgroup_path(option[7:]), self._variables.expand(affinity)) for option, affinity in instance.options.items() if option[:7] == "cgroup." and len(option) > 7]) instance._scheduler = instance.options perf_mmap_pages_raw = self._variables.expand(instance.options["perf_mmap_pages"]) perf_mmap_pages = self._calc_mmap_pages(perf_mmap_pages_raw) if perf_mmap_pages == 0: log.error("Invalid 'perf_mmap_pages' value specified: '%s', using default kernel value" % perf_mmap_pages_raw) perf_mmap_pages = None if perf_mmap_pages is not None and str(perf_mmap_pages) != perf_mmap_pages_raw: log.info("'perf_mmap_pages' value has to be power of two, specified: '%s', using: '%d'" % (perf_mmap_pages_raw, perf_mmap_pages)) for k in instance._scheduler: instance._scheduler[k] = self._variables.expand(instance._scheduler[k]) if self._cmd.get_bool(instance._scheduler.get("runtime", 1)) == "0": instance._runtime_tuning = False instance._terminate = threading.Event() if self._daemon and instance._runtime_tuning: try: instance._threads = perf.thread_map() evsel = perf.evsel(type = perf.TYPE_SOFTWARE, config = perf.COUNT_SW_DUMMY, task = 1, comm = 1, mmap = 0, freq = 0, wakeup_events = 1, watermark = 1, sample_type = perf.SAMPLE_TID | perf.SAMPLE_CPU) evsel.open(cpus = self._cpus, threads = instance._threads) instance._evlist = perf.evlist(self._cpus, instance._threads) instance._evlist.add(evsel) if perf_mmap_pages is None: instance._evlist.mmap() else: instance._evlist.mmap(pages = perf_mmap_pages) # no perf except: log.warning("python-perf unavailable, disabling perf support and " \ "runtime tuning, you can try to (re)install python(3)-perf package") instance._runtime_tuning = False def _instance_cleanup(self, instance): if instance._evlist: for fd in instance._evlist.get_pollfd(): os.close(fd.name) @classmethod def _get_config_options(cls): return { "isolated_cores": None, "cgroup_mount_point": consts.DEF_CGROUP_MOUNT_POINT, "cgroup_mount_point_init": False, "cgroup_groups_init": True, "cgroup_for_isolated_cores": None, "cgroup_ps_blacklist": None, "ps_whitelist": None, "ps_blacklist": None, "kthread_process": True, "irq_process": True, "default_irq_smp_affinity": "calc", "perf_mmap_pages": None, "perf_process_fork": "false", "sched_min_granularity_ns": None, "sched_base_slice_ns": None, "sched_latency_ns": None, "sched_wakeup_granularity_ns": None, "sched_tunable_scaling": None, "sched_migration_cost_ns": None, "sched_nr_migrate": None, "numa_balancing_scan_delay_ms": None, "numa_balancing_scan_period_min_ms": None, "numa_balancing_scan_period_max_ms": None, "numa_balancing_scan_size_mb": None } def _sanitize_cgroup_path(self, value): return str(value).replace(".", "/") if value is not None else None # Raises OSError, IOError def _get_cmdline(self, process): if not isinstance(process, procfs.process): pid = process process = procfs.process(pid) cmdline = procfs.process_cmdline(process) if self._is_kthread(process): cmdline = "[" + cmdline + "]" return cmdline # Raises OSError, IOError def get_processes(self): ps = procfs.pidstats() ps.reload_threads() processes = {} for proc in ps.values(): try: if not self._kthread_process and self._is_kthread(proc): continue cmd = self._get_cmdline(proc) pid = proc["pid"] processes[pid] = cmd if "threads" in proc: for pid in proc["threads"].keys(): cmd = self._get_cmdline(proc) processes[pid] = cmd except (OSError, IOError) as e: if e.errno == errno.ENOENT \ or e.errno == errno.ESRCH: continue else: raise return processes # Raises OSError # Raises SystemError with old (pre-0.4) python-schedutils # instead of OSError # If PID doesn't exist, errno == ESRCH def _get_rt(self, pid): scheduler = self._scheduler_utils.get_scheduler(pid) sched_str = self._scheduler_utils.sched_num_to_const(scheduler) priority = self._scheduler_utils.get_priority(pid) log.debug("Read scheduler policy '%s' and priority '%d' of PID '%d'" % (sched_str, priority, pid)) return (scheduler, priority) def _set_rt(self, pid, sched, prio): sched_str = self._scheduler_utils.sched_num_to_const(sched) log.debug("Setting scheduler policy to '%s' and priority to '%d' of PID '%d'." % (sched_str, prio, pid)) try: prio_min = self._scheduler_utils.get_priority_min(sched) prio_max = self._scheduler_utils.get_priority_max(sched) if prio < prio_min or prio > prio_max: log.error("Priority for %s must be in range %d - %d. '%d' was given." % (sched_str, prio_min, prio_max, prio)) # Workaround for old (pre-0.4) python-schedutils which raised # SystemError instead of OSError except (SystemError, OSError) as e: log.error("Failed to get allowed priority range: %s" % e) try: self._scheduler_utils.set_scheduler(pid, sched, prio) except (SystemError, OSError) as e: if hasattr(e, "errno") and e.errno == errno.ESRCH: log.debug("Failed to set scheduling parameters of PID %d, the task vanished." % pid) else: log.error("Failed to set scheduling parameters of PID %d: %s" % (pid, e)) # process is a procfs.process object # Raises OSError, IOError def _is_kthread(self, process): return process["stat"]["flags"] & procfs.pidstat.PF_KTHREAD != 0 def _process_in_blacklisted_cgroup(self, process): if self._cgroup_ps_blacklist_re == "": return False return re.search(self._cgroup_ps_blacklist_re, self._get_stat_cgroup(process)) is not None # Returns True if we can ignore a failed affinity change of # a process with the given PID and therefore not report it as an error. def _ignore_set_affinity_error(self, process): pid = process.pid try: if process["stat"]["state"] == "Z": log.debug("Affinity of zombie task with PID %d could not be changed." % pid) return True if self._process_in_blacklisted_cgroup(process): log.debug("Affinity of task with PID %d could not be changed, the task was moved into a blacklisted cgroup." % pid) return True if process["stat"].is_bound_to_cpu(): if self._is_kthread(process): log.debug("Affinity of kernel thread with PID %d cannot be changed, the task's affinity mask is fixed." % pid) else: log.warning("Affinity of task with PID %d cannot be changed, the task's affinity mask is fixed." % pid) return True log.info("Task %d cmdline: %s" % (pid, self._get_cmdline(process))) log.info("Task %d cgroup: %s" % (pid, self._get_stat_cgroup(process))) log.info("Task %d affinity: %s" % (pid, list(self._scheduler_utils.get_affinity(pid)))) except (OSError, IOError) as e: if e.errno == errno.ENOENT or e.errno == errno.ESRCH: log.debug("Failed to get task info for PID %d, the task vanished." % pid) return True log.error("Failed to get task info for PID %d: %s" % (pid, e)) except (AttributeError, KeyError) as e: log.error("Failed to get task info for PID %d: %s" % (pid, e)) return False def _store_orig_process_rt(self, pid, scheduler, priority): try: params = self._scheduler_original[pid] except KeyError: params = SchedulerParams(self._cmd) self._scheduler_original[pid] = params if params.scheduler is None and params.priority is None: params.scheduler = scheduler params.priority = priority def _tune_process_rt(self, pid, sched, prio): cont = True if sched is None and prio is None: return cont try: (prev_sched, prev_prio) = self._get_rt(pid) if sched is None: sched = prev_sched self._set_rt(pid, sched, prio) self._store_orig_process_rt(pid, prev_sched, prev_prio) except (SystemError, OSError) as e: if hasattr(e, "errno") and e.errno == errno.ESRCH: log.debug("Failed to read scheduler policy of PID %d, the task vanished." % pid) if pid in self._scheduler_original: del self._scheduler_original[pid] cont = False else: log.error("Refusing to set scheduler and priority of PID %d, reading original scheduling parameters failed: %s" % (pid, e)) return cont def _is_cgroup_affinity(self, affinity): return str(affinity)[:7] == "cgroup." def _store_orig_process_affinity(self, pid, affinity, is_cgroup = False): try: params = self._scheduler_original[pid] except KeyError: params = SchedulerParams(self._cmd) self._scheduler_original[pid] = params if params.affinity is None and params.cgroup is None: if is_cgroup: params.cgroup = affinity else: params.affinity = affinity def _get_cgroup_affinity(self, pid): # we cannot use procfs, because it uses comma ',' delimiter which # can be ambiguous for l in self._cmd.read_file("%s/%s/%s" % (consts.PROCFS_MOUNT_POINT, str(pid), "cgroup"), no_error = True).split("\n"): try: cgroup = l.split(":cpuset:")[1][1:] return cgroup if cgroup != "" else "/" except IndexError: pass return "/" # it can be arbitrary cgroup even cgroup we didn't set, but it needs to be # under "cgroup_mount_point" def _set_cgroup(self, pid, cgroup): cgroup = self._sanitize_cgroup_path(cgroup) path = self._cgroup_mount_point if cgroup != "/": path = "%s/%s" % (path, cgroup) self._cmd.write_to_file("%s/tasks" % path, str(pid), no_error = True) def _parse_cgroup_affinity(self, cgroup): # "cgroup.CGROUP" cgroup = cgroup[7:] # this should be faster than string comparison is_cgroup = not isinstance(cgroup, list) and len(cgroup) > 0 return is_cgroup, cgroup def _tune_process_affinity(self, pid, affinity, intersect = False): cont = True if affinity is None: return cont try: (is_cgroup, cgroup) = self._parse_cgroup_affinity(affinity) if is_cgroup: prev_affinity = self._get_cgroup_affinity(pid) self._set_cgroup(pid, cgroup) else: prev_affinity = self._get_affinity(pid) if intersect: affinity = self._get_intersect_affinity( prev_affinity, affinity, affinity) self._set_affinity(pid, affinity) self._store_orig_process_affinity(pid, prev_affinity, is_cgroup) except (SystemError, OSError) as e: if hasattr(e, "errno") and e.errno == errno.ESRCH: log.debug("Failed to read affinity of PID %d, the task vanished." % pid) if pid in self._scheduler_original: del self._scheduler_original[pid] cont = False else: log.error("Refusing to set CPU affinity of PID %d, reading original affinity failed: %s" % (pid, e)) return cont #tune process and store previous values def _tune_process(self, pid, cmd, sched, prio, affinity): cont = self._tune_process_rt(pid, sched, prio) if not cont: return cont = self._tune_process_affinity(pid, affinity) if not cont or pid not in self._scheduler_original: return self._scheduler_original[pid].cmdline = cmd def _convert_sched_params(self, str_scheduler, str_priority): scheduler = self._scheduler_utils.sched_cfg_to_num(str_scheduler) if scheduler is None and str_scheduler != "*": log.error("Invalid scheduler: %s. Scheduler and priority will be ignored." % str_scheduler) return (None, None) else: try: priority = int(str_priority) except ValueError: log.error("Invalid priority: %s. Scheduler and priority will be ignored." % str_priority) return (None, None) return (scheduler, priority) def _convert_affinity(self, str_affinity): if str_affinity == "*": affinity = None elif self._is_cgroup_affinity(str_affinity): affinity = str_affinity else: affinity = self._cmd.hex2cpulist(str_affinity) if not affinity: log.error("Invalid affinity: %s. It will be ignored." % str_affinity) affinity = None return affinity def _convert_sched_cfg(self, vals): (rule_prio, scheduler, priority, affinity, regex) = vals (scheduler, priority) = self._convert_sched_params( scheduler, priority) affinity = self._convert_affinity(affinity) return (rule_prio, scheduler, priority, affinity, regex) def _cgroup_create_group(self, cgroup): path = "%s/%s" % (self._cgroup_mount_point, cgroup) try: os.mkdir(path, consts.DEF_CGROUP_MODE) except OSError as e: log.error("Unable to create cgroup '%s': %s" % (path, e)) if (not self._cmd.write_to_file("%s/%s" % (path, "cpuset.mems"), self._cmd.read_file("%s/%s" % (self._cgroup_mount_point, "cpuset.mems"), no_error = True), no_error = True)): log.error("Unable to initialize 'cpuset.mems ' for cgroup '%s'" % path) def _cgroup_initialize_groups(self): if self._cgroup is not None and not self._cgroup in self._cgroups: self._cgroup_create_group(self._cgroup) for cg in self._cgroups: self._cgroup_create_group(cg) def _cgroup_initialize(self): log.debug("Initializing cgroups settings") try: os.makedirs(self._cgroup_mount_point, consts.DEF_CGROUP_MODE) except OSError as e: log.error("Unable to create cgroup mount point: %s" % e) (ret, out) = self._cmd.execute(["mount", "-t", "cgroup", "-o", "cpuset", "cpuset", self._cgroup_mount_point]) if ret != 0: log.error("Unable to mount '%s'" % self._cgroup_mount_point) def _remove_dir(self, cgroup): try: os.rmdir(cgroup) except OSError as e: log.error("Unable to remove directory '%s': %s" % (cgroup, e)) def _cgroup_finalize_groups(self): for cg in reversed(self._cgroups): self._remove_dir("%s/%s" % (self._cgroup_mount_point, cg)) if self._cgroup is not None and not self._cgroup in self._cgroups: self._remove_dir("%s/%s" % (self._cgroup_mount_point, self._cgroup)) def _cgroup_finalize(self): log.debug("Removing cgroups settings") (ret, out) = self._cmd.execute(["umount", self._cgroup_mount_point]) if ret != 0: log.error("Unable to umount '%s'" % self._cgroup_mount_point) return False self._remove_dir(self._cgroup_mount_point) d = os.path.dirname(self._cgroup_mount_point) if (d != "/"): self._remove_dir(d) def _cgroup_set_affinity_one(self, cgroup, affinity, backup = False): if affinity != "": log.debug("Setting cgroup '%s' affinity to '%s'" % (cgroup, affinity)) else: log.debug("Skipping cgroup '%s', empty affinity requested" % cgroup) return path = "%s/%s/%s" % (self._cgroup_mount_point, cgroup, "cpuset.cpus") if backup: orig_affinity = self._cmd.read_file(path, err_ret = "ERR", no_error = True).strip() if orig_affinity != "ERR": self._cgroups_original_affinity[cgroup] = orig_affinity else: log.error("Refusing to set affinity of cgroup '%s', reading original affinity failed" % cgroup) return if not self._cmd.write_to_file(path, affinity, no_error = True): log.error("Unable to set affinity '%s' for cgroup '%s'" % (affinity, cgroup)) def _cgroup_set_affinity(self): if self._cgroup_affinity_initialized: return log.debug("Setting cgroups affinities") if self._affinity is not None and self._cgroup is not None and not self._cgroup in self._cgroups: self._cgroup_set_affinity_one(self._cgroup, self._affinity, backup = True) for cg in self._cgroups.items(): self._cgroup_set_affinity_one(cg[0], cg[1], backup = True) self._cgroup_affinity_initialized = True def _cgroup_restore_affinity(self): log.debug("Restoring cgroups affinities") for cg in self._cgroups_original_affinity.items(): self._cgroup_set_affinity_one(cg[0], cg[1]) def _instance_apply_static(self, instance): # need to get "cgroup_mount_point_init", "cgroup_mount_point", "cgroup_groups_init", # "cgroup", and initialize mount point and cgroups before super class implementation call self._cgroup_mount_point = self._variables.expand(instance.options["cgroup_mount_point"]) self._cgroup_mount_point_init = self._cmd.get_bool(self._variables.expand( instance.options["cgroup_mount_point_init"])) == "1" self._cgroup_groups_init = self._cmd.get_bool(self._variables.expand( instance.options["cgroup_groups_init"])) == "1" self._cgroup = self._sanitize_cgroup_path(self._variables.expand( instance.options["cgroup_for_isolated_cores"])) if self._cgroup_mount_point_init: self._cgroup_initialize() if self._cgroup_groups_init or self._cgroup_mount_point_init: self._cgroup_initialize_groups() super(SchedulerPlugin, self)._instance_apply_static(instance) self._cgroup_set_affinity() try: ps = self.get_processes() except (OSError, IOError) as e: log.error("error applying tuning, cannot get information about running processes: %s" % e) return sched_cfg = [(option, str(value).split(":", 4)) for option, value in instance._scheduler.items()] buf = [(option, self._convert_sched_cfg(vals)) for option, vals in sched_cfg if re.match(r"group\.", option) and len(vals) == 5] sched_cfg = sorted(buf, key=lambda option_vals: option_vals[1][0]) sched_all = dict() # for runtime tuning instance._sched_lookup = {} for option, (rule_prio, scheduler, priority, affinity, regex) \ in sched_cfg: try: r = re.compile(regex) except re.error as e: log.error("error compiling regular expression: '%s'" % str(regex)) continue processes = [(pid, cmd) for pid, cmd in ps.items() if re.search(r, cmd) is not None] #cmd - process name, option - group name sched = dict([(pid, (cmd, option, scheduler, priority, affinity, regex)) for pid, cmd in processes]) sched_all.update(sched) # make any contained regexes non-capturing: replace "(" with "(?:", # unless the "(" is preceded by "\" or followed by "?" regex = re.sub(r"(?<!\\)\((?!\?)", "(?:", str(regex)) instance._sched_lookup[regex] = [scheduler, priority, affinity] for pid, (cmd, option, scheduler, priority, affinity, regex) \ in sched_all.items(): self._tune_process(pid, cmd, scheduler, priority, affinity) self._storage.set(self._scheduler_storage_key, self._scheduler_original) if self._daemon and instance._runtime_tuning: instance._thread = threading.Thread(target = self._thread_code, args = [instance]) instance._thread.start() def _restore_ps_affinity(self): try: ps = self.get_processes() except (OSError, IOError) as e: log.error("error unapplying tuning, cannot get information about running processes: %s" % e) return for pid, orig_params in self._scheduler_original.items(): # if command line for the pid didn't change, it's very probably the same process if pid not in ps or ps[pid] != orig_params.cmdline: continue if orig_params.scheduler is not None \ and orig_params.priority is not None: self._set_rt(pid, orig_params.scheduler, orig_params.priority) if orig_params.cgroup is not None: self._set_cgroup(pid, orig_params.cgroup) elif orig_params.affinity is not None: self._set_affinity(pid, orig_params.affinity) self._scheduler_original = {} self._storage.unset(self._scheduler_storage_key) def _cgroup_cleanup_tasks_one(self, cgroup): cnt = int(consts.CGROUP_CLEANUP_TASKS_RETRY) data = " " while data != "" and cnt > 0: data = self._cmd.read_file("%s/%s/%s" % (self._cgroup_mount_point, cgroup, "tasks"), err_ret = " ", no_error = True) if data not in ["", " "]: for l in data.split("\n"): self._cmd.write_to_file("%s/%s" % (self._cgroup_mount_point, "tasks"), l, no_error = True) cnt -= 1 if cnt == 0: log.warning("Unable to cleanup tasks from cgroup '%s'" % cgroup) def _cgroup_cleanup_tasks(self): if self._cgroup is not None and not self._cgroup in self._cgroups: self._cgroup_cleanup_tasks_one(self._cgroup) for cg in self._cgroups: self._cgroup_cleanup_tasks_one(cg) def _instance_unapply_static(self, instance, rollback = consts.ROLLBACK_SOFT): super(SchedulerPlugin, self)._instance_unapply_static(instance, rollback) if self._daemon and instance._runtime_tuning: instance._terminate.set() instance._thread.join() self._restore_ps_affinity() self._cgroup_restore_affinity() self._cgroup_cleanup_tasks() if self._cgroup_groups_init or self._cgroup_mount_point_init: self._cgroup_finalize_groups() if self._cgroup_mount_point_init: self._cgroup_finalize() def _cgroup_verify_affinity_one(self, cgroup, affinity): log.debug("Verifying cgroup '%s' affinity" % cgroup) path = "%s/%s/%s" % (self._cgroup_mount_point, cgroup, "cpuset.cpus") current_affinity = self._cmd.read_file(path, err_ret = "ERR", no_error = True) if current_affinity == "ERR": return True current_affinity = self._cmd.cpulist2string(self._cmd.cpulist_pack(current_affinity)) affinity = self._cmd.cpulist2string(self._cmd.cpulist_pack(affinity)) affinity_description = "cgroup '%s' affinity" % cgroup if current_affinity == affinity: log.info(consts.STR_VERIFY_PROFILE_VALUE_OK % (affinity_description, current_affinity)) return True else: log.error(consts.STR_VERIFY_PROFILE_VALUE_FAIL % (affinity_description, current_affinity, affinity)) return False def _cgroup_verify_affinity(self): log.debug("Veryfying cgroups affinities") ret = True if self._affinity is not None and self._cgroup is not None and not self._cgroup in self._cgroups: ret = ret and self._cgroup_verify_affinity_one(self._cgroup, self._affinity) for cg in self._cgroups.items(): ret = ret and self._cgroup_verify_affinity_one(cg[0], cg[1]) return ret def _instance_verify_static(self, instance, ignore_missing, devices): ret1 = super(SchedulerPlugin, self)._instance_verify_static(instance, ignore_missing, devices) ret2 = self._cgroup_verify_affinity() return ret1 and ret2 def _add_pid(self, instance, pid, r): try: proc = procfs.process(pid) if not self._kthread_process and self._is_kthread(proc): return cmd = self._get_cmdline(pid) except (OSError, IOError) as e: if e.errno == errno.ENOENT \ or e.errno == errno.ESRCH: log.debug("Failed to get cmdline of PID %d, the task vanished." % pid) else: log.error("Failed to get cmdline of PID %d: %s" % (pid, e)) return v = self._cmd.re_lookup(instance._sched_lookup, cmd, r) if v is not None and not pid in self._scheduler_original: log.debug("tuning new process '%s' with PID '%d' by '%s'" % (cmd, pid, str(v))) (sched, prio, affinity) = v self._tune_process(pid, cmd, sched, prio, affinity) self._storage.set(self._scheduler_storage_key, self._scheduler_original) def _remove_pid(self, instance, pid): if pid in self._scheduler_original: del self._scheduler_original[pid] log.debug("removed PID %d from the rollback database" % pid) self._storage.set(self._scheduler_storage_key, self._scheduler_original) def _thread_code(self, instance): r = self._cmd.re_lookup_compile(instance._sched_lookup) poll = select.poll() # Store the file objects in a local variable so that they don't # go out of scope too soon. This is a workaround for # python3-perf bug rhbz#1659445. fds = instance._evlist.get_pollfd() for fd in fds: poll.register(fd) while not instance._terminate.is_set(): # timeout to poll in milliseconds if len(poll.poll(self._sleep_interval * 1000)) > 0 and not instance._terminate.is_set(): read_events = True while read_events: read_events = False for cpu in self._cpus: event = instance._evlist.read_on_cpu(cpu) if event: read_events = True if isinstance(event, perf.comm_event) or ( self._perf_process_fork_value and isinstance(event, perf.task_event) and event.type == perf.RECORD_FORK ): self._add_pid(instance, int(event.tid), r) elif isinstance(event, perf.task_event) and event.type == perf.RECORD_EXIT: self._remove_pid(instance, int(event.tid)) @command_custom("cgroup_ps_blacklist", per_device = False) def _cgroup_ps_blacklist(self, enabling, value, verify, ignore_missing, instance): # currently unsupported if verify: return None if enabling and value is not None: self._cgroup_ps_blacklist_re = "|".join(["(%s)" % v for v in re.split(r"(?<!\\);", str(value))]) @command_custom("ps_whitelist", per_device = False) def _ps_whitelist(self, enabling, value, verify, ignore_missing, instance): # currently unsupported if verify: return None if enabling and value is not None: self._ps_whitelist = "|".join(["(%s)" % v for v in re.split(r"(?<!\\);", str(value))]) @command_custom("ps_blacklist", per_device = False) def _ps_blacklist(self, enabling, value, verify, ignore_missing, instance): # currently unsupported if verify: return None if enabling and value is not None: self._ps_blacklist = "|".join(["(%s)" % v for v in re.split(r"(?<!\\);", str(value))]) @command_custom("kthread_process", per_device = False) def _kthread_process(self, enabling, value, verify, ignore_missing, instance): # currently unsupported if verify: return None if enabling and value is not None: self._kthread_process = self._cmd.get_bool(value) == "1" @command_custom("irq_process", per_device = False) def _irq_process(self, enabling, value, verify, ignore_missing, instance): # currently unsupported if verify: return None if enabling and value is not None: self._irq_process = self._cmd.get_bool(value) == "1" @command_custom("default_irq_smp_affinity", per_device = False) def _default_irq_smp_affinity(self, enabling, value, verify, ignore_missing, instance): # currently unsupported if verify: return None if enabling and value is not None: if value in ["calc", "ignore"]: self._default_irq_smp_affinity_value = value else: self._default_irq_smp_affinity_value = self._cmd.cpulist_unpack(value) @command_custom("perf_process_fork", per_device = False) def _perf_process_fork(self, enabling, value, verify, ignore_missing, instance): # currently unsupported if verify: return None if enabling and value is not None: self._perf_process_fork_value = self._cmd.get_bool(value) == "1" # Raises OSError # Raises SystemError with old (pre-0.4) python-schedutils # instead of OSError # If PID doesn't exist, errno == ESRCH def _get_affinity(self, pid): res = self._scheduler_utils.get_affinity(pid) log.debug("Read affinity '%s' of PID %d" % (res, pid)) return res def _set_affinity(self, pid, affinity): process = procfs.process(pid) if self._process_in_blacklisted_cgroup(process): log.debug("Not setting CPU affinity of PID %d, the task belongs to a blacklisted cgroup." % pid) return log.debug("Setting CPU affinity of PID %d to '%s'." % (pid, affinity)) try: self._scheduler_utils.set_affinity(pid, affinity) # Workaround for old python-schedutils (pre-0.4) which # incorrectly raised SystemError instead of OSError except (SystemError, OSError) as e: if not self._ignore_set_affinity_error(process): log.error("Failed to set affinity of PID %d to '%s': %s" % (pid, affinity, e)) # returns intersection of affinity1 with affinity2, if intersection is empty it returns affinity3 def _get_intersect_affinity(self, affinity1, affinity2, affinity3): aff = set(affinity1).intersection(set(affinity2)) if aff: return list(aff) return affinity3 def _set_all_obj_affinity(self, objs, affinity, threads = False): psl = objs if not self._kthread_process: psl = [v for v in psl if not self._is_kthread(v)] psl = [v for v in psl if re.search(self._ps_whitelist, self._get_stat_comm(v)) is not None] if self._ps_blacklist != "": psl = [v for v in psl if re.search(self._ps_blacklist, self._get_stat_comm(v)) is None] psd = dict([(v.pid, v) for v in psl]) for pid in psd: try: cmd = self._get_cmdline(psd[pid]) except (OSError, IOError) as e: if e.errno == errno.ENOENT \ or e.errno == errno.ESRCH: log.debug("Failed to get cmdline of PID %d, the task vanished." % pid) else: log.error("Refusing to set affinity of PID %d, failed to get its cmdline: %s" % (pid, e)) continue cont = self._tune_process_affinity(pid, affinity, intersect = True) if not cont: continue if pid in self._scheduler_original: self._scheduler_original[pid].cmdline = cmd # process threads if not threads and "threads" in psd[pid]: self._set_all_obj_affinity( psd[pid]["threads"].values(), affinity, True) def _get_stat_cgroup(self, o): try: return o["cgroups"] except (OSError, IOError, KeyError): return "" def _get_stat_comm(self, o): try: return o["stat"]["comm"] except (OSError, IOError, KeyError): return "" def _set_ps_affinity(self, affinity): try: ps = procfs.pidstats() ps.reload_threads() self._set_all_obj_affinity(ps.values(), affinity, False) except (OSError, IOError) as e: log.error("error applying tuning, cannot get information about running processes: %s" % e) # Returns 0 on success, -2 if changing the affinity is not # supported, -1 if some other error occurs. def _set_irq_affinity(self, irq, affinity, restoring): try: affinity_hex = self._cmd.cpulist2hex(affinity) log.debug("Setting SMP affinity of IRQ %s to '%s'" % (irq, affinity_hex)) filename = "/proc/irq/%s/smp_affinity" % irq with open(filename, "w") as f: f.write(affinity_hex) return 0 except (OSError, IOError) as e: # EIO is returned by # kernel/irq/proc.c:write_irq_affinity() if changing # the affinity is not supported # (at least on kernels 3.10 and 4.18) if hasattr(e, "errno") and e.errno == errno.EIO \ and not restoring: log.debug("Setting SMP affinity of IRQ %s is not supported" % irq) return -2 else: log.error("Failed to set SMP affinity of IRQ %s to '%s': %s" % (irq, affinity_hex, e)) return -1 def _set_default_irq_affinity(self, affinity): try: affinity_hex = self._cmd.cpulist2hex(affinity) log.debug("Setting default SMP IRQ affinity to '%s'" % affinity_hex) with open("/proc/irq/default_smp_affinity", "w") as f: f.write(affinity_hex) except (OSError, IOError) as e: log.error("Failed to set default SMP IRQ affinity to '%s': %s" % (affinity_hex, e)) def _set_all_irq_affinity(self, affinity): irq_original = IRQAffinities() irqs = procfs.interrupts() for irq in irqs.keys(): try: prev_affinity = irqs[irq]["affinity"] log.debug("Read affinity of IRQ '%s': '%s'" % (irq, prev_affinity)) except KeyError: continue _affinity = self._get_intersect_affinity(prev_affinity, affinity, affinity) if set(_affinity) == set(prev_affinity): continue res = self._set_irq_affinity(irq, _affinity, False) if res == 0: irq_original.irqs[irq] = prev_affinity elif res == -2: irq_original.unchangeable.append(irq) # default affinity prev_affinity_hex = self._cmd.read_file("/proc/irq/default_smp_affinity") prev_affinity = self._cmd.hex2cpulist(prev_affinity_hex) if self._default_irq_smp_affinity_value == "calc": _affinity = self._get_intersect_affinity(prev_affinity, affinity, affinity) elif self._default_irq_smp_affinity_value != "ignore": _affinity = self._default_irq_smp_affinity_value if self._default_irq_smp_affinity_value != "ignore": self._set_default_irq_affinity(_affinity) irq_original.default = prev_affinity self._storage.set(self._irq_storage_key, irq_original) def _restore_all_irq_affinity(self): irq_original = self._storage.get(self._irq_storage_key, None) if irq_original is None: return for irq, affinity in irq_original.irqs.items(): self._set_irq_affinity(irq, affinity, True) if self._default_irq_smp_affinity_value != "ignore": affinity = irq_original.default self._set_default_irq_affinity(affinity) self._storage.unset(self._irq_storage_key) def _verify_irq_affinity(self, irq_description, correct_affinity, current_affinity): res = set(current_affinity).issubset(set(correct_affinity)) if res: log.info(consts.STR_VERIFY_PROFILE_VALUE_OK % (irq_description, current_affinity)) else: log.error(consts.STR_VERIFY_PROFILE_VALUE_FAIL % (irq_description, current_affinity, correct_affinity)) return res def _verify_all_irq_affinity(self, correct_affinity, ignore_missing): irq_original = self._storage.get(self._irq_storage_key, None) irqs = procfs.interrupts() res = True for irq in irqs.keys(): if irq in irq_original.unchangeable and ignore_missing: description = "IRQ %s does not support changing SMP affinity" % irq log.info(consts.STR_VERIFY_PROFILE_VALUE_MISSING % description) continue try: current_affinity = irqs[irq]["affinity"] log.debug("Read SMP affinity of IRQ '%s': '%s'" % (irq, current_affinity)) irq_description = "SMP affinity of IRQ %s" % irq if not self._verify_irq_affinity( irq_description, correct_affinity, current_affinity): res = False except KeyError: continue current_affinity_hex = self._cmd.read_file( "/proc/irq/default_smp_affinity") current_affinity = self._cmd.hex2cpulist(current_affinity_hex) if self._default_irq_smp_affinity_value != "ignore" and not self._verify_irq_affinity("default IRQ SMP affinity", current_affinity, correct_affinity if self._default_irq_smp_affinity_value == "calc" else self._default_irq_smp_affinity_value): res = False return res @command_custom("isolated_cores", per_device = False, priority = 10) def _isolated_cores(self, enabling, value, verify, ignore_missing, instance): affinity = None self._affinity = None if value is not None: isolated = set(self._cmd.cpulist_unpack(value)) present = set(self._cpus) if isolated.issubset(present): affinity = list(present - isolated) self._affinity = self._cmd.cpulist2string(affinity) else: str_cpus = self._cmd.cpulist2string(self._cpus) log.error("Invalid isolated_cores specified, '%s' does not match available cores '%s'" % (value, str_cpus)) if (enabling or verify) and affinity is None: return None # currently only IRQ affinity verification is supported if verify: if self._irq_process: return self._verify_all_irq_affinity(affinity, ignore_missing) return True elif enabling: if self._cgroup: self._cgroup_set_affinity() ps_affinity = "cgroup.%s" % self._cgroup else: ps_affinity = affinity self._set_ps_affinity(ps_affinity) if self._irq_process: self._set_all_irq_affinity(affinity) else: # Restoring processes' affinity is done in # _instance_unapply_static() if self._irq_process: self._restore_all_irq_affinity() return True def _sched_assembly_path(self, prefix, namespace, knob): if prefix == "": path = "%s/%s" % (namespace, knob) else: path = "%s/%s/%s" % (prefix, namespace, knob) return "/sys/kernel/debug/%s" % path # map to kernel 6.6 paths, "" means that knob was dropped def _sched_assembly_path2(self, path, prefix, namespace, knob): lpath = path if namespace == "sched": lknob = self._dict_sched_knob_map.get(knob) if lknob is not None: if lknob: lpath = self._sched_assembly_path(prefix, namespace, lknob) else: lpath = "" return lpath def _get_sched_knob_path(self, prefix, namespace, knob): key = "%s_%s_%s" % (prefix, namespace, knob) path = self._sched_knob_paths_cache.get(key) if path or path == "": return path path = "/proc/sys/kernel/%s_%s" % (namespace, knob) if not os.path.exists(path): path = self._sched_assembly_path(prefix, namespace, knob) # kernel 6.6 drops and renames some knobs if not os.path.exists(path): path = self._sched_assembly_path2(path, prefix, namespace, knob) if path != "" and self._secure_boot_hint is None: self._secure_boot_hint = True self._sched_knob_paths_cache[key] = path return path def _get_sched_knob(self, prefix, namespace, knob): data = None path = self._get_sched_knob_path(prefix, namespace, knob) if path != "": data = self._cmd.read_file(path, err_ret = None) if data is None: log.error("Error reading '%s'" % knob) if self._secure_boot_hint: log.error("This may not work with Secure Boot or kernel_lockdown (this hint is logged only once)") self._secure_boot_hint = False return data def _set_sched_knob(self, prefix, namespace, knob, value, sim, remove = False): if value is None: return None path = self._get_sched_knob_path(prefix, namespace, knob) if not path: log.debug("knob '%s' ignored, unsupported by kernel" % knob) return None if not sim: if not self._cmd.write_to_file(path, value, \ no_error = [errno.ENOENT] if remove else False): log.error("Error writing value '%s' to '%s'" % (value, knob)) return value @command_get("sched_min_granularity_ns") def _get_sched_min_granularity_ns(self, instance): return self._get_sched_knob("", "sched", "min_granularity_ns") @command_set("sched_min_granularity_ns") def _set_sched_min_granularity_ns(self, value, instance, sim, remove): return self._set_sched_knob("", "sched", "min_granularity_ns", value, sim, remove) @command_get("sched_base_slice_ns") def _get_sched_base_slice_ns(self, instance): return self._get_sched_min_granularity_ns(instance) @command_set("sched_base_slice_ns") def _set_sched_base_slice_ns(self, value, instance, sim, remove): return self._set_sched_min_granularity_ns(value, instance, sim, remove) @command_get("sched_latency_ns") def _get_sched_latency_ns(self, instance): return self._get_sched_knob("", "sched", "latency_ns") @command_set("sched_latency_ns") def _set_sched_latency_ns(self, value, instance, sim, remove): return self._set_sched_knob("", "sched", "latency_ns", value, sim, remove) @command_get("sched_wakeup_granularity_ns") def _get_sched_wakeup_granularity_ns(self, instance): return self._get_sched_knob("", "sched", "wakeup_granularity_ns") @command_set("sched_wakeup_granularity_ns") def _set_sched_wakeup_granularity_ns(self, value, instance, sim, remove): return self._set_sched_knob("", "sched", "wakeup_granularity_ns", value, sim, remove) @command_get("sched_tunable_scaling") def _get_sched_tunable_scaling(self, instance): return self._get_sched_knob("", "sched", "tunable_scaling") @command_set("sched_tunable_scaling") def _set_sched_tunable_scaling(self, value, instance, sim, remove): return self._set_sched_knob("", "sched", "tunable_scaling", value, sim, remove) @command_get("sched_migration_cost_ns") def _get_sched_migration_cost_ns(self, instance): return self._get_sched_knob("", "sched", "migration_cost_ns") @command_set("sched_migration_cost_ns") def _set_sched_migration_cost_ns(self, value, instance, sim, remove): return self._set_sched_knob("", "sched", "migration_cost_ns", value, sim, remove) @command_get("sched_nr_migrate") def _get_sched_nr_migrate(self, instance): return self._get_sched_knob("", "sched", "nr_migrate") @command_set("sched_nr_migrate") def _set_sched_nr_migrate(self, value, instance, sim, remove): return self._set_sched_knob("", "sched", "nr_migrate", value, sim, remove) @command_get("numa_balancing_scan_delay_ms") def _get_numa_balancing_scan_delay_ms(self, instance): return self._get_sched_knob("sched", "numa_balancing", "scan_delay_ms") @command_set("numa_balancing_scan_delay_ms") def _set_numa_balancing_scan_delay_ms(self, value, instance, sim, remove): return self._set_sched_knob("sched", "numa_balancing", "scan_delay_ms", value, sim, remove) @command_get("numa_balancing_scan_period_min_ms") def _get_numa_balancing_scan_period_min_ms(self, instance): return self._get_sched_knob("sched", "numa_balancing", "scan_period_min_ms") @command_set("numa_balancing_scan_period_min_ms") def _set_numa_balancing_scan_period_min_ms(self, value, instance, sim, remove): return self._set_sched_knob("sched", "numa_balancing", "scan_period_min_ms", value, sim, remove) @command_get("numa_balancing_scan_period_max_ms") def _get_numa_balancing_scan_period_max_ms(self, instance): return self._get_sched_knob("sched", "numa_balancing", "scan_period_max_ms") @command_set("numa_balancing_scan_period_max_ms") def _set_numa_balancing_scan_period_max_ms(self, value, instance, sim, remove): return self._set_sched_knob("sched", "numa_balancing", "scan_period_max_ms", value, sim, remove) @command_get("numa_balancing_scan_size_mb") def _get_numa_balancing_scan_size_mb(self, instance): return self._get_sched_knob("sched", "numa_balancing", "scan_size_mb") @command_set("numa_balancing_scan_size_mb") def _set_numa_balancing_scan_size_mb(self, value, instance, sim, remove): return self._set_sched_knob("sched", "numa_balancing", "scan_size_mb", value, sim, remove)