Vega strike Python Modules doc  0.5.1
Documentation of the " Modules " folder of Vega strike
 All Data Structures Namespaces Files Functions Variables
profile.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 #
3 # Class for profiling python code. rev 1.0 6/2/94
4 #
5 # Based on prior profile module by Sjoerd Mullender...
6 # which was hacked somewhat by: Guido van Rossum
7 #
8 # See profile.doc for more information
9 
10 """Class for profiling Python code."""
11 
12 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
13 # Written by James Roskind
14 #
15 # Permission to use, copy, modify, and distribute this Python software
16 # and its associated documentation for any purpose (subject to the
17 # restriction in the following sentence) without fee is hereby granted,
18 # provided that the above copyright notice appears in all copies, and
19 # that both that copyright notice and this permission notice appear in
20 # supporting documentation, and that the name of InfoSeek not be used in
21 # advertising or publicity pertaining to distribution of the software
22 # without specific, written prior permission. This permission is
23 # explicitly restricted to the copying and modification of the software
24 # to remain in Python, compiled Python, or other languages (such as C)
25 # wherein the modified or derived code is exclusively imported into a
26 # Python module.
27 #
28 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
29 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
30 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
31 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
32 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
33 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
34 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
35 
36 
37 
38 import sys
39 import os
40 import time
41 import marshal
42 
43 __all__ = ["run","help","Profile"]
44 
45 # Sample timer for use with
46 #i_count = 0
47 #def integer_timer():
48 # global i_count
49 # i_count = i_count + 1
50 # return i_count
51 #itimes = integer_timer # replace with C coded timer returning integers
52 
53 #**************************************************************************
54 # The following are the static member functions for the profiler class
55 # Note that an instance of Profile() is *not* needed to call them.
56 #**************************************************************************
57 
58 def run(statement, filename=None):
59  """Run statement under profiler optionally saving results in filename
60 
61  This function takes a single argument that can be passed to the
62  "exec" statement, and an optional file name. In all cases this
63  routine attempts to "exec" its first argument and gather profiling
64  statistics from the execution. If no file name is present, then this
65  function automatically prints a simple profiling report, sorted by the
66  standard name string (file/line/function-name) that is presented in
67  each line.
68  """
69  prof = Profile()
70  try:
71  prof = prof.run(statement)
72  except SystemExit:
73  pass
74  if filename is not None:
75  prof.dump_stats(filename)
76  else:
77  return prof.print_stats()
78 
79 # print help
80 def help():
81  for dirname in sys.path:
82  fullname = os.path.join(dirname, 'profile.doc')
83  if os.path.exists(fullname):
84  sts = os.system('${PAGER-more} ' + fullname)
85  if sts: print '*** Pager exit status:', sts
86  break
87  else:
88  print 'Sorry, can\'t find the help file "profile.doc"',
89  print 'along the Python search path.'
90 
91 
92 if os.name == "mac":
93  import MacOS
94  def _get_time_mac(timer=MacOS.GetTicks):
95  return timer() / 60.0
96 
97 if hasattr(os, "times"):
98  def _get_time_times(timer=os.times):
99  t = timer()
100  return t[0] + t[1]
101 
102 
103 class Profile:
104  """Profiler class.
105 
106  self.cur is always a tuple. Each such tuple corresponds to a stack
107  frame that is currently active (self.cur[-2]). The following are the
108  definitions of its members. We use this external "parallel stack" to
109  avoid contaminating the program that we are profiling. (old profiler
110  used to write into the frames local dictionary!!) Derived classes
111  can change the definition of some entries, as long as they leave
112  [-2:] intact (frame and previous tuple). In case an internal error is
113  detected, the -3 element is used as the function name.
114 
115  [ 0] = Time that needs to be charged to the parent frame's function.
116  It is used so that a function call will not have to access the
117  timing data for the parent frame.
118  [ 1] = Total time spent in this frame's function, excluding time in
119  subfunctions (this latter is tallied in cur[2]).
120  [ 2] = Total time spent in subfunctions, excluding time executing the
121  frame's function (this latter is tallied in cur[1]).
122  [-3] = Name of the function that corresponds to this frame.
123  [-2] = Actual frame that we correspond to (used to sync exception handling).
124  [-1] = Our parent 6-tuple (corresponds to frame.f_back).
125 
126  Timing data for each function is stored as a 5-tuple in the dictionary
127  self.timings[]. The index is always the name stored in self.cur[-3].
128  The following are the definitions of the members:
129 
130  [0] = The number of times this function was called, not counting direct
131  or indirect recursion,
132  [1] = Number of times this function appears on the stack, minus one
133  [2] = Total time spent internal to this function
134  [3] = Cumulative time that this function was present on the stack. In
135  non-recursive functions, this is the total execution time from start
136  to finish of each invocation of a function, including time spent in
137  all subfunctions.
138  [4] = A dictionary indicating for each function name, the number of times
139  it was called by us.
140  """
141 
142  bias = 0 # calibration constant
143 
144  def __init__(self, timer=None, bias=None):
145  self.timings = {}
146  self.cur = None
147  self.cmd = ""
148 
149  if bias is None:
150  bias = self.bias
151  self.bias = bias # Materialize in local dict for lookup speed.
152 
153  if not timer:
154  if os.name == 'mac':
155  self.timer = MacOS.GetTicks
157  self.get_time = _get_time_mac
158  elif hasattr(time, 'clock'):
159  self.timer = self.get_time = time.clock
160  self.dispatcher = self.trace_dispatch_i
161  elif hasattr(os, 'times'):
162  self.timer = os.times
163  self.dispatcher = self.trace_dispatch
164  self.get_time = _get_time_times
165  else:
166  self.timer = self.get_time = time.time
167  self.dispatcher = self.trace_dispatch_i
168  else:
169  self.timer = timer
170  t = self.timer() # test out timer function
171  try:
172  length = len(t)
173  except TypeError:
174  self.get_time = timer
175  self.dispatcher = self.trace_dispatch_i
176  else:
177  if length == 2:
178  self.dispatcher = self.trace_dispatch
179  else:
180  self.dispatcher = self.trace_dispatch_l
181  # This get_time() implementation needs to be defined
182  # here to capture the passed-in timer in the parameter
183  # list (for performance). Note that we can't assume
184  # the timer() result contains two values in all
185  # cases.
186  import operator
187  def get_time_timer(timer=timer,
188  reduce=reduce, reducer=operator.add):
189  return reduce(reducer, timer(), 0)
190  self.get_time = get_time_timer
191  self.t = self.get_time()
192  self.simulate_call('profiler')
193 
194  # Heavily optimized dispatch routine for os.times() timer
195 
196  def trace_dispatch(self, frame, event, arg):
197  timer = self.timer
198  t = timer()
199  t = t[0] + t[1] - self.t - self.bias
200 
201  if self.dispatch[event](self, frame,t):
202  t = timer()
203  self.t = t[0] + t[1]
204  else:
205  r = timer()
206  self.t = r[0] + r[1] - t # put back unrecorded delta
207 
208  # Dispatch routine for best timer program (return = scalar, fastest if
209  # an integer but float works too -- and time.clock() relies on that).
210 
211  def trace_dispatch_i(self, frame, event, arg):
212  timer = self.timer
213  t = timer() - self.t - self.bias
214  if self.dispatch[event](self, frame,t):
215  self.t = timer()
216  else:
217  self.t = timer() - t # put back unrecorded delta
218 
219  # Dispatch routine for macintosh (timer returns time in ticks of
220  # 1/60th second)
221 
222  def trace_dispatch_mac(self, frame, event, arg):
223  timer = self.timer
224  t = timer()/60.0 - self.t - self.bias
225  if self.dispatch[event](self, frame, t):
226  self.t = timer()/60.0
227  else:
228  self.t = timer()/60.0 - t # put back unrecorded delta
229 
230  # SLOW generic dispatch routine for timer returning lists of numbers
231 
232  def trace_dispatch_l(self, frame, event, arg):
233  get_time = self.get_time
234  t = get_time() - self.t - self.bias
235 
236  if self.dispatch[event](self, frame, t):
237  self.t = get_time()
238  else:
239  self.t = get_time() - t # put back unrecorded delta
240 
241  # In the event handlers, the first 3 elements of self.cur are unpacked
242  # into vrbls w/ 3-letter names. The last two characters are meant to be
243  # mnemonic:
244  # _pt self.cur[0] "parent time" time to be charged to parent frame
245  # _it self.cur[1] "internal time" time spent directly in the function
246  # _et self.cur[2] "external time" time spent in subfunctions
247 
248  def trace_dispatch_exception(self, frame, t):
249  rpt, rit, ret, rfn, rframe, rcur = self.cur
250  if (rframe is not frame) and rcur:
251  return self.trace_dispatch_return(rframe, t)
252  self.cur = rpt, rit+t, ret, rfn, rframe, rcur
253  return 1
254 
255 
256  def trace_dispatch_call(self, frame, t):
257  if self.cur and frame.f_back is not self.cur[-2]:
258  rpt, rit, ret, rfn, rframe, rcur = self.cur
259  if not isinstance(rframe, Profile.fake_frame):
260  assert rframe.f_back is frame.f_back, ("Bad call", rfn,
261  rframe, rframe.f_back,
262  frame, frame.f_back)
263  self.trace_dispatch_return(rframe, 0)
264  assert (self.cur is None or \
265  frame.f_back is self.cur[-2]), ("Bad call",
266  self.cur[-3])
267  fcode = frame.f_code
268  fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
269  self.cur = (t, 0, 0, fn, frame, self.cur)
270  timings = self.timings
271  if timings.has_key(fn):
272  cc, ns, tt, ct, callers = timings[fn]
273  timings[fn] = cc, ns + 1, tt, ct, callers
274  else:
275  timings[fn] = 0, 0, 0, 0, {}
276  return 1
277 
278  def trace_dispatch_return(self, frame, t):
279  if frame is not self.cur[-2]:
280  assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
281  self.trace_dispatch_return(self.cur[-2], 0)
282 
283  # Prefix "r" means part of the Returning or exiting frame.
284  # Prefix "p" means part of the Previous or Parent or older frame.
285 
286  rpt, rit, ret, rfn, frame, rcur = self.cur
287  rit = rit + t
288  frame_total = rit + ret
289 
290  ppt, pit, pet, pfn, pframe, pcur = rcur
291  self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
292 
293  timings = self.timings
294  cc, ns, tt, ct, callers = timings[rfn]
295  if not ns:
296  # This is the only occurrence of the function on the stack.
297  # Else this is a (directly or indirectly) recursive call, and
298  # its cumulative time will get updated when the topmost call to
299  # it returns.
300  ct = ct + frame_total
301  cc = cc + 1
302 
303  if callers.has_key(pfn):
304  callers[pfn] = callers[pfn] + 1 # hack: gather more
305  # stats such as the amount of time added to ct courtesy
306  # of this specific call, and the contribution to cc
307  # courtesy of this call.
308  else:
309  callers[pfn] = 1
310 
311  timings[rfn] = cc, ns - 1, tt + rit, ct, callers
312 
313  return 1
314 
315 
316  dispatch = {
317  "call": trace_dispatch_call,
318  "exception": trace_dispatch_exception,
319  "return": trace_dispatch_return,
320  }
321 
322 
323  # The next few functions play with self.cmd. By carefully preloading
324  # our parallel stack, we can force the profiled result to include
325  # an arbitrary string as the name of the calling function.
326  # We use self.cmd as that string, and the resulting stats look
327  # very nice :-).
328 
329  def set_cmd(self, cmd):
330  if self.cur[-1]: return # already set
331  self.cmd = cmd
332  self.simulate_call(cmd)
333 
334  class fake_code:
335  def __init__(self, filename, line, name):
336  self.co_filename = filename
337  self.co_line = line
338  self.co_name = name
339  self.co_firstlineno = 0
340 
341  def __repr__(self):
342  return repr((self.co_filename, self.co_line, self.co_name))
343 
344  class fake_frame:
345  def __init__(self, code, prior):
346  self.f_code = code
347  self.f_back = prior
348 
349  def simulate_call(self, name):
350  code = self.fake_code('profile', 0, name)
351  if self.cur:
352  pframe = self.cur[-2]
353  else:
354  pframe = None
355  frame = self.fake_frame(code, pframe)
356  self.dispatch['call'](self, frame, 0)
357 
358  # collect stats from pending stack, including getting final
359  # timings for self.cmd frame.
360 
362  get_time = self.get_time
363  t = get_time() - self.t
364  while self.cur[-1]:
365  # We *can* cause assertion errors here if
366  # dispatch_trace_return checks for a frame match!
367  self.dispatch['return'](self, self.cur[-2], t)
368  t = 0
369  self.t = get_time() - t
370 
371 
372  def print_stats(self):
373  import pstats
374  pstats.Stats(self).strip_dirs().sort_stats(-1). \
375  print_stats()
376 
377  def dump_stats(self, file):
378  f = open(file, 'wb')
379  self.create_stats()
380  marshal.dump(self.stats, f)
381  f.close()
382 
383  def create_stats(self):
384  self.simulate_cmd_complete()
385  self.snapshot_stats()
386 
387  def snapshot_stats(self):
388  self.stats = {}
389  for func in self.timings.keys():
390  cc, ns, tt, ct, callers = self.timings[func]
391  callers = callers.copy()
392  nc = 0
393  for func_caller in callers.keys():
394  nc = nc + callers[func_caller]
395  self.stats[func] = cc, nc, tt, ct, callers
396 
397 
398  # The following two methods can be called by clients to use
399  # a profiler to profile a statement, given as a string.
400 
401  def run(self, cmd):
402  import __main__
403  dict = __main__.__dict__
404  return self.runctx(cmd, dict, dict)
405 
406  def runctx(self, cmd, globals, locals):
407  self.set_cmd(cmd)
408  sys.setprofile(self.dispatcher)
409  try:
410  exec cmd in globals, locals
411  finally:
412  sys.setprofile(None)
413  return self
414 
415  # This method is more useful to profile a single function call.
416  def runcall(self, func, *args, **kw):
417  self.set_cmd(`func`)
418  sys.setprofile(self.dispatcher)
419  try:
420  return apply(func, args, kw)
421  finally:
422  sys.setprofile(None)
423 
424 
425  #******************************************************************
426  # The following calculates the overhead for using a profiler. The
427  # problem is that it takes a fair amount of time for the profiler
428  # to stop the stopwatch (from the time it receives an event).
429  # Similarly, there is a delay from the time that the profiler
430  # re-starts the stopwatch before the user's code really gets to
431  # continue. The following code tries to measure the difference on
432  # a per-event basis.
433  #
434  # Note that this difference is only significant if there are a lot of
435  # events, and relatively little user code per event. For example,
436  # code with small functions will typically benefit from having the
437  # profiler calibrated for the current platform. This *could* be
438  # done on the fly during init() time, but it is not worth the
439  # effort. Also note that if too large a value specified, then
440  # execution time on some functions will actually appear as a
441  # negative number. It is *normal* for some functions (with very
442  # low call counts) to have such negative stats, even if the
443  # calibration figure is "correct."
444  #
445  # One alternative to profile-time calibration adjustments (i.e.,
446  # adding in the magic little delta during each event) is to track
447  # more carefully the number of events (and cumulatively, the number
448  # of events during sub functions) that are seen. If this were
449  # done, then the arithmetic could be done after the fact (i.e., at
450  # display time). Currently, we track only call/return events.
451  # These values can be deduced by examining the callees and callers
452  # vectors for each functions. Hence we *can* almost correct the
453  # internal time figure at print time (note that we currently don't
454  # track exception event processing counts). Unfortunately, there
455  # is currently no similar information for cumulative sub-function
456  # time. It would not be hard to "get all this info" at profiler
457  # time. Specifically, we would have to extend the tuples to keep
458  # counts of this in each frame, and then extend the defs of timing
459  # tuples to include the significant two figures. I'm a bit fearful
460  # that this additional feature will slow the heavily optimized
461  # event/time ratio (i.e., the profiler would run slower, fur a very
462  # low "value added" feature.)
463  #**************************************************************
464 
465  def calibrate(self, m, verbose=0):
466  if self.__class__ is not Profile:
467  raise TypeError("Subclasses must override .calibrate().")
468 
469  saved_bias = self.bias
470  self.bias = 0
471  try:
472  return self._calibrate_inner(m, verbose)
473  finally:
474  self.bias = saved_bias
475 
476  def _calibrate_inner(self, m, verbose):
477  get_time = self.get_time
478 
479  # Set up a test case to be run with and without profiling. Include
480  # lots of calls, because we're trying to quantify stopwatch overhead.
481  # Do not raise any exceptions, though, because we want to know
482  # exactly how many profile events are generated (one call event, +
483  # one return event, per Python-level call).
484 
485  def f1(n):
486  for i in range(n):
487  x = 1
488 
489  def f(m, f1=f1):
490  for i in range(m):
491  f1(100)
492 
493  f(m) # warm up the cache
494 
495  # elapsed_noprofile <- time f(m) takes without profiling.
496  t0 = get_time()
497  f(m)
498  t1 = get_time()
499  elapsed_noprofile = t1 - t0
500  if verbose:
501  print "elapsed time without profiling =", elapsed_noprofile
502 
503  # elapsed_profile <- time f(m) takes with profiling. The difference
504  # is profiling overhead, only some of which the profiler subtracts
505  # out on its own.
506  p = Profile()
507  t0 = get_time()
508  p.runctx('f(m)', globals(), locals())
509  t1 = get_time()
510  elapsed_profile = t1 - t0
511  if verbose:
512  print "elapsed time with profiling =", elapsed_profile
513 
514  # reported_time <- "CPU seconds" the profiler charged to f and f1.
515  total_calls = 0.0
516  reported_time = 0.0
517  for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
518  p.timings.items():
519  if funcname in ("f", "f1"):
520  total_calls += cc
521  reported_time += tt
522 
523  if verbose:
524  print "'CPU seconds' profiler reported =", reported_time
525  print "total # calls =", total_calls
526  if total_calls != m + 1:
527  raise ValueError("internal error: total calls = %d" % total_calls)
528 
529  # reported_time - elapsed_noprofile = overhead the profiler wasn't
530  # able to measure. Divide by twice the number of calls (since there
531  # are two profiler events per call in this test) to get the hidden
532  # overhead per event.
533  mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
534  if verbose:
535  print "mean stopwatch overhead per profile event =", mean
536  return mean
537 
538 #****************************************************************************
539 def Stats(*args):
540  print 'Report generating functions are in the "pstats" module\a'
541 
542 
543 # When invoked as main program, invoke the profiler on a script
544 if __name__ == '__main__':
545  if not sys.argv[1:]:
546  print "usage: profile.py scriptfile [arg] ..."
547  sys.exit(2)
548 
549  filename = sys.argv[1] # Get script filename
550 
551  del sys.argv[0] # Hide "profile.py" from argument list
552 
553  # Insert script directory in front of module search path
554  sys.path.insert(0, os.path.dirname(filename))
555 
556  run('execfile(' + `filename` + ')')