Sfoglia il codice sorgente

tools: kvm_stat: Introduce pid monitoring

Having stats for single VMs can help to determine the problem of a VM
without the need of running other tools like perf.

The tracepoints already allowed pid level monitoring, but kvm_stat
didn't have support for it till now. Support for the newly implemented
debugfs vm monitoring was also implemented.

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Janosch Frank 9 anni fa
parent
commit
f0cf040f84
2 ha cambiato i file con 167 aggiunte e 22 eliminazioni
  1. 163 20
      tools/kvm/kvm_stat/kvm_stat
  2. 4 2
      tools/kvm/kvm_stat/kvm_stat.txt

+ 163 - 20
tools/kvm/kvm_stat/kvm_stat

@@ -367,12 +367,16 @@ class Group(object):
                                       os.read(self.events[0].fd, length))))
                                       os.read(self.events[0].fd, length))))
 
 
 class Event(object):
 class Event(object):
-    def __init__(self, name, group, trace_cpu, trace_point, trace_filter,
-                 trace_set='kvm'):
+    def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
+                 trace_filter, trace_set='kvm'):
         self.name = name
         self.name = name
         self.fd = None
         self.fd = None
-        self.setup_event(group, trace_cpu, trace_point, trace_filter,
-                         trace_set)
+        self.setup_event(group, trace_cpu, trace_pid, trace_point,
+                         trace_filter, trace_set)
+
+    def __del__(self):
+        if self.fd:
+            os.close(self.fd)
 
 
     def setup_event_attribute(self, trace_set, trace_point):
     def setup_event_attribute(self, trace_set, trace_point):
         id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
         id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
@@ -382,16 +386,16 @@ class Event(object):
         event_attr.config = int(open(id_path).read())
         event_attr.config = int(open(id_path).read())
         return event_attr
         return event_attr
 
 
-    def setup_event(self, group, trace_cpu, trace_point, trace_filter,
-                    trace_set):
+    def setup_event(self, group, trace_cpu, trace_pid, trace_point,
+                    trace_filter, trace_set):
         event_attr = self.setup_event_attribute(trace_set, trace_point)
         event_attr = self.setup_event_attribute(trace_set, trace_point)
 
 
         group_leader = -1
         group_leader = -1
         if group.events:
         if group.events:
             group_leader = group.events[0].fd
             group_leader = group.events[0].fd
 
 
-        fd = perf_event_open(event_attr, -1, trace_cpu,
-                             group_leader, 0)
+        fd = perf_event_open(event_attr, trace_pid,
+                             trace_cpu, group_leader, 0)
         if fd == -1:
         if fd == -1:
             err = ctypes.get_errno()
             err = ctypes.get_errno()
             raise OSError(err, os.strerror(err),
             raise OSError(err, os.strerror(err),
@@ -417,8 +421,7 @@ class TracepointProvider(object):
         self.group_leaders = []
         self.group_leaders = []
         self.filters = get_filters()
         self.filters = get_filters()
         self._fields = self.get_available_fields()
         self._fields = self.get_available_fields()
-        self.setup_traces()
-        self.fields = self._fields
+        self._pid = 0
 
 
     def get_available_fields(self):
     def get_available_fields(self):
         path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
         path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
@@ -433,11 +436,17 @@ class TracepointProvider(object):
         return fields
         return fields
 
 
     def setup_traces(self):
     def setup_traces(self):
-        cpus = get_online_cpus()
+        if self._pid > 0:
+            # Fetch list of all threads of the monitored pid, as qemu
+            # starts a thread for each vcpu.
+            path = os.path.join('/proc', str(self._pid), 'task')
+            groupids = walkdir(path)[1]
+        else:
+            groupids = get_online_cpus()
 
 
         # The constant is needed as a buffer for python libs, std
         # The constant is needed as a buffer for python libs, std
         # streams and other files that the script opens.
         # streams and other files that the script opens.
-        newlim = len(cpus) * len(self._fields) + 50
+        newlim = len(groupids) * len(self._fields) + 50
         try:
         try:
             softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
             softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
 
 
@@ -451,7 +460,7 @@ class TracepointProvider(object):
         except ValueError:
         except ValueError:
             sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
             sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
 
 
-        for cpu in cpus:
+        for groupid in groupids:
             group = Group()
             group = Group()
             for name in self._fields:
             for name in self._fields:
                 tracepoint = name
                 tracepoint = name
@@ -463,11 +472,22 @@ class TracepointProvider(object):
                                    (self.filters[tracepoint][0],
                                    (self.filters[tracepoint][0],
                                     self.filters[tracepoint][1][sub]))
                                     self.filters[tracepoint][1][sub]))
 
 
+                # From perf_event_open(2):
+                # pid > 0 and cpu == -1
+                # This measures the specified process/thread on any CPU.
+                #
+                # pid == -1 and cpu >= 0
+                # This measures all processes/threads on the specified CPU.
+                trace_cpu = groupid if self._pid == 0 else -1
+                trace_pid = int(groupid) if self._pid != 0 else -1
+
                 group.add_event(Event(name=name,
                 group.add_event(Event(name=name,
                                       group=group,
                                       group=group,
-                                      trace_cpu=cpu,
+                                      trace_cpu=trace_cpu,
+                                      trace_pid=trace_pid,
                                       trace_point=tracepoint,
                                       trace_point=tracepoint,
                                       trace_filter=tracefilter))
                                       trace_filter=tracefilter))
+
             self.group_leaders.append(group)
             self.group_leaders.append(group)
 
 
     def available_fields(self):
     def available_fields(self):
@@ -491,6 +511,17 @@ class TracepointProvider(object):
                     if index != 0:
                     if index != 0:
                         event.disable()
                         event.disable()
 
 
+    @property
+    def pid(self):
+        return self._pid
+
+    @pid.setter
+    def pid(self, pid):
+        self._pid = pid
+        self.group_leaders = []
+        self.setup_traces()
+        self.fields = self._fields
+
     def read(self):
     def read(self):
         ret = defaultdict(int)
         ret = defaultdict(int)
         for group in self.group_leaders:
         for group in self.group_leaders:
@@ -502,6 +533,8 @@ class TracepointProvider(object):
 class DebugfsProvider(object):
 class DebugfsProvider(object):
     def __init__(self):
     def __init__(self):
         self._fields = self.get_available_fields()
         self._fields = self.get_available_fields()
+        self._pid = 0
+        self.do_read = True
 
 
     def get_available_fields(self):
     def get_available_fields(self):
         return walkdir(PATH_DEBUGFS_KVM)[2]
         return walkdir(PATH_DEBUGFS_KVM)[2]
@@ -514,16 +547,57 @@ class DebugfsProvider(object):
     def fields(self, fields):
     def fields(self, fields):
         self._fields = fields
         self._fields = fields
 
 
+    @property
+    def pid(self):
+        return self._pid
+
+    @pid.setter
+    def pid(self, pid):
+        if pid != 0:
+            self._pid = pid
+
+            vms = walkdir(PATH_DEBUGFS_KVM)[1]
+            if len(vms) == 0:
+                self.do_read = False
+
+            self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
+
+        else:
+            self.paths = ['']
+            self.do_read = True
+
     def read(self):
     def read(self):
-        def val(key):
-            return int(file(PATH_DEBUGFS_KVM + '/' + key).read())
-        return dict([(key, val(key)) for key in self._fields])
+        """Returns a dict with format:'file name / field -> current value'."""
+        results = {}
+
+        # If no debugfs filtering support is available, then don't read.
+        if not self.do_read:
+            return results
+
+        for path in self.paths:
+            for field in self._fields:
+                results[field] = results.get(field, 0) \
+                                 + self.read_field(field, path)
+
+        return results
+
+    def read_field(self, field, path):
+        """Returns the value of a single field from a specific VM."""
+        try:
+            return int(open(os.path.join(PATH_DEBUGFS_KVM,
+                                         path,
+                                         field))
+                       .read())
+        except IOError:
+            return 0
 
 
 class Stats(object):
 class Stats(object):
-    def __init__(self, providers, fields=None):
+    def __init__(self, providers, pid, fields=None):
         self.providers = providers
         self.providers = providers
+        self._pid_filter = pid
         self._fields_filter = fields
         self._fields_filter = fields
         self.values = {}
         self.values = {}
+        self.update_provider_pid()
         self.update_provider_filters()
         self.update_provider_filters()
 
 
     def update_provider_filters(self):
     def update_provider_filters(self):
@@ -540,6 +614,10 @@ class Stats(object):
                                if wanted(key)]
                                if wanted(key)]
             provider.fields = provider_fields
             provider.fields = provider_fields
 
 
+    def update_provider_pid(self):
+        for provider in self.providers:
+            provider.pid = self._pid_filter
+
     @property
     @property
     def fields_filter(self):
     def fields_filter(self):
         return self._fields_filter
         return self._fields_filter
@@ -549,6 +627,16 @@ class Stats(object):
         self._fields_filter = fields_filter
         self._fields_filter = fields_filter
         self.update_provider_filters()
         self.update_provider_filters()
 
 
+    @property
+    def pid_filter(self):
+        return self._pid_filter
+
+    @pid_filter.setter
+    def pid_filter(self, pid):
+        self._pid_filter = pid
+        self.values = {}
+        self.update_provider_pid()
+
     def get(self):
     def get(self):
         for provider in self.providers:
         for provider in self.providers:
             new = provider.read()
             new = provider.read()
@@ -605,9 +693,17 @@ class Tui(object):
         elif self.stats.fields_filter == r'^[^\(]*$':
         elif self.stats.fields_filter == r'^[^\(]*$':
             self.stats.fields_filter = None
             self.stats.fields_filter = None
 
 
+    def update_pid(self, pid):
+        self.stats.pid_filter = pid
+
     def refresh(self, sleeptime):
     def refresh(self, sleeptime):
         self.screen.erase()
         self.screen.erase()
-        self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
+        if self.stats.pid_filter > 0:
+            self.screen.addstr(0, 0, 'kvm statistics - pid {0}'
+                               .format(self.stats.pid_filter),
+                               curses.A_BOLD)
+        else:
+            self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
         self.screen.addstr(2, 1, 'Event')
         self.screen.addstr(2, 1, 'Event')
         self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
         self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
                            len('Total'), 'Total')
                            len('Total'), 'Total')
@@ -659,6 +755,37 @@ class Tui(object):
             except re.error:
             except re.error:
                 continue
                 continue
 
 
+    def show_vm_selection(self):
+        while True:
+            self.screen.erase()
+            self.screen.addstr(0, 0,
+                               'Show statistics for specific pid.',
+                               curses.A_BOLD)
+            self.screen.addstr(1, 0,
+                               'This might limit the shown data to the trace '
+                               'statistics.')
+
+            curses.echo()
+            self.screen.addstr(3, 0, "Pid [0 or pid]: ")
+            pid = self.screen.getstr()
+            curses.noecho()
+
+            try:
+                pid = int(pid)
+
+                if pid == 0:
+                    self.update_pid(pid)
+                    break
+                else:
+                    if not os.path.isdir(os.path.join('/proc/', str(pid))):
+                        continue
+                    else:
+                        self.update_pid(pid)
+                        break
+
+            except ValueError:
+                continue
+
     def show_stats(self):
     def show_stats(self):
         sleeptime = 0.25
         sleeptime = 0.25
         while True:
         while True:
@@ -674,6 +801,8 @@ class Tui(object):
                     break
                     break
                 if char == 'f':
                 if char == 'f':
                     self.show_filter_selection()
                     self.show_filter_selection()
+                if char == 'p':
+                    self.show_vm_selection()
             except KeyboardInterrupt:
             except KeyboardInterrupt:
                 break
                 break
             except curses.error:
             except curses.error:
@@ -766,6 +895,13 @@ Requirements:
                          dest='fields',
                          dest='fields',
                          help='fields to display (regex)',
                          help='fields to display (regex)',
                          )
                          )
+    optparser.add_option('-p', '--pid',
+                        action='store',
+                        default=0,
+                        type=int,
+                        dest='pid',
+                        help='restrict statistics to pid',
+                        )
     (options, _) = optparser.parse_args(sys.argv)
     (options, _) = optparser.parse_args(sys.argv)
     return options
     return options
 
 
@@ -812,8 +948,15 @@ def check_access(options):
 def main():
 def main():
     options = get_options()
     options = get_options()
     options = check_access(options)
     options = check_access(options)
+
+    if (options.pid > 0 and
+        not os.path.isdir(os.path.join('/proc/',
+                                       str(options.pid)))):
+        sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
+        sys.exit('Specified pid does not exist.')
+
     providers = get_providers(options)
     providers = get_providers(options)
-    stats = Stats(providers, fields=options.fields)
+    stats = Stats(providers, options.pid, fields=options.fields)
 
 
     if options.log:
     if options.log:
         log(stats)
         log(stats)

+ 4 - 2
tools/kvm/kvm_stat/kvm_stat.txt

@@ -23,8 +23,6 @@ The set of KVM kernel module trace events may be specific to the kernel version
 or architecture.  It is best to check the KVM kernel module source code for the
 or architecture.  It is best to check the KVM kernel module source code for the
 meaning of events.
 meaning of events.
 
 
-Note that trace events are counted globally across all running guests.
-
 OPTIONS
 OPTIONS
 -------
 -------
 -1::
 -1::
@@ -44,6 +42,10 @@ OPTIONS
 --debugfs::
 --debugfs::
 	retrieve statistics from debugfs
 	retrieve statistics from debugfs
 
 
+-p<pid>::
+--pid=<pid>::
+	limit statistics to one virtual machine (pid)
+
 -f<fields>::
 -f<fields>::
 --fields=<fields>::
 --fields=<fields>::
 	fields to display (regex)
 	fields to display (regex)