#!/usr/bin/env python
#
# svn_commit_summary - Summarize a repository commit history
#
# Revision History
#   12/21/2006 WEH
#	Initial revision
#

import sets
import datetime
import sys
import commands
import time

debug=False

#
# Returns a datetime object for a given revision
#
def get_date(rev,reposdir):
  tmp_date = commands.getoutput("svnlook date -r " + `rev` + " " + reposdir)
  s = "T".join( tmp_date.split(" ")[0:2] )
  return datetime.datetime(*time.strptime(s, "%Y-%m-%dT%H:%M:%S")[0:6])

#
# Generate a list of time periods
#
def get_periods(resposdir,nperiods,dpp):
  period=1
  day=1
  periods = []
  start_time = datetime.datetime.now()
  (status,text) = commands.getstatusoutput("svnlook youngest " + reposdir)
  if status != 0:
     return periods
  start_rev = eval(text)
  curr_time = start_time
  curr_rev = start_rev
  rev_time = get_date(curr_rev,reposdir)
  #
  # Iterate...
  #
  while True:
    #
    # The current revision for may not be in the next time period.  Add empty
    # periods until the current revision is in the next time period.
    #
    delta = start_time - rev_time
    #print "HERE",(day+dpp),delta.days
    while (day+dpp-1) <= delta.days:
      periods.append( (period,day,curr_time,None,None,None) )
      if debug:
         print "APPEND 1",period,day,curr_time,None,None
      period = period+1
      day = day+dpp
      curr_time = start_time + datetime.timedelta(-day+1)
      delta = start_time - rev_time
      #print "HERE",(day+dpp),delta.days
    #
    # Terminate the loop if we have enough periods, or if we're at the first revision
    # in the subversion repository.
    #
    # Note: this while loop can generate more than nperiod periods if there is a long stretch of
    # periods with no revisions.  However, this is necessary to get the 'last revision' in 
    # each period.
    #
    if period > nperiods or curr_rev == 1:
       break
    #
    # Bracket the next revision
    #
    offset=1
    while True:
      delta = curr_time - get_date(curr_rev-offset,reposdir)
      if delta.days >= dpp:
         break
      offset = offset*2
      if curr_rev-offset < 1:
         offset = curr_rev-1
	 break
    #
    # Check if we have reached the beginning of the repository...
    #
    if curr_rev-offset == 1:
       delta = curr_time - get_date(curr_rev-offset,reposdir)
       if delta.days < dpp:
          print "APPEND 3",period,day,curr_time,curr_rev,rev_time
          periods.append( (period,day,curr_time,curr_rev,rev_time,1) )
          break
    #
    # Now use a binary search to find the offset that represents the
    # beginning of the next period
    #
    tmp_rev = curr_rev
    while offset > 1:
      tmp_offset=offset/2
      delta = curr_time - get_date(tmp_rev-tmp_offset,reposdir)
      if debug:
         print "HERE", get_date(tmp_rev-offset,reposdir), get_date(tmp_rev-tmp_offset,reposdir), get_date(tmp_rev,reposdir),curr_time,offset,tmp_rev-tmp_offset,tmp_rev,curr_rev,delta.days,dpp
      if delta.days >= dpp:
         offset=tmp_offset
      else:
	 offset_rev=tmp_rev-offset
         tmp_rev = tmp_rev-tmp_offset
         offset = tmp_rev - offset_rev
      #print "OFFSET",offset
    #
    # Add the current period
    #
    periods.append( (period,day,curr_time,curr_rev,rev_time,tmp_rev-offset+1) )
    if debug:
       print "APPEND 2",period,day,curr_time,curr_rev,rev_time
    period = period+1
    day = day+dpp
    curr_time = start_time + datetime.timedelta(-day+1)
    #
    # Set the new revision number for the next period
    #
    curr_rev = tmp_rev-offset
    rev_time = get_date(curr_rev,reposdir)
  #
  # Debugging output
  #
  if debug:
     print ""
     print "  SUMMARIZING"
     for (period,day,period_time,rev,rev_time,last_rev) in periods:
       print ""
       print "Period      ",period
       print "Day         ",day
       print "Period Date ",period_time
       print "Rev         ",rev
       print "Rev Date    ",rev_time
       print "Last Rev    ",last_rev
       if last_rev is not None:
          print "LastRev Date",get_date(last_rev,reposdir)
       if rev is not None and rev != start_rev:
	  tmp = get_date(rev+1,reposdir)
          print "PrevRev Date",tmp
  #
  # Return the list of periods
  #
  return periods

#
# Summarize the commit and change history for nperiods that have length dpp
#
def summarize(resposdir,nperiods,dpp,format):
  periods = get_periods(reposdir,nperiods,dpp)
  if len(periods)==0:
     print "ERROR processing",reposdir
     return
  #
  # Collect statistics
  #
  authors = sets.Set()
  stats = []
  i=0
  for period in periods:
    commits={}
    changes={}
    rev=period[3]
    if rev is not None:
       while rev >= period[5]:
         author = commands.getoutput("svnlook author -r " + `rev` + " " + reposdir)
	 if author=="":
	    author="unknown"
	 authors.add(author)
         changed = commands.getoutput("svnlook changed -r " + `rev` + " " + reposdir)
         if author not in commits.keys():
	    commits[author] = 1
	    changes[author] = len(changed.split("\n"))
	 else:
	    commits[author] = commits[author]+1
	    changes[author] = changes[author]+len(changed.split("\n"))
         rev=rev-1
       stats.append( (commits,changes) )
    else:
       stats.append( (None,None) )
    i=i+1
    if i>=nperiods:
       break
  #
  # Generate output
  #
  author_list = list(authors)
  author_list.sort()
  if format=="text":
     i=0
     while i<nperiods:
       print ".................................."
       print "Period      ",periods[i][0]
       print "Period Date ",periods[i][2]
       print "Rev         ",periods[i][3]
       if periods[i][3] is not None:
          print "Rev Date    ",periods[i][4]
       print "Last Rev    ",periods[i][5]
       if periods[i][5] is not None:
          print "LastRev Date",get_date(periods[i][5],reposdir)
       ncommits=0
       nchanges=0
       if periods[i][3] is not None:
          for author in author_list:
            if author in stats[i][0].keys():
	       ncommits=ncommits + stats[i][0][author]
          for author in author_list:
            if author in stats[i][1].keys():
	       nchanges=nchanges + stats[i][1][author]
       print "Num Commits ",ncommits
       print "Num Changes ",nchanges
       if periods[i][3] is not None:
          print "Commits by Author"
          for author in author_list:
	    if author in stats[i][0].keys():
	       val = stats[i][0][author]
	    else:
	       val = 0
            print "  %-15s %8d" % (author,val)
          print "Changes by Author"
          for author in author_list:
	    if author in stats[i][1].keys():
	       val = stats[i][1][author]
	    else:
	       val = 0
            print "  %-15s %8d" % (author,val)
       print ""
       i = i+1

  elif format=="xml":
     i=0
     while i<nperiods:
       print "    <Period>"
       print "      <PeriodNum>%d</PeriodNum>" % (periods[i][0])
       print "      <PeriodDate>%s</PeriodDate>" % (periods[i][2])
       print "      <FirstRevisionNum>%s</FirstRevisionNum>" % (`periods[i][3]`)
       if periods[i][3] is not None:
          print "      <FirstRevisionDate>%s</FirstRevisionDate>" % (periods[i][4])
       print "      <LastRevisionNum>%s</LastRevisionNum>" % (`periods[i][5]`)
       if periods[i][5] is not None:
          print "      <LastRevisionDate>%s</LastRevisionDate>" % (get_date(periods[i][5],reposdir))
       ncommits=0
       nchanges=0
       if periods[i][3] is not None:
          for author in author_list:
            if author in stats[i][0].keys():
	       ncommits=ncommits + stats[i][0][author]
          for author in author_list:
            if author in stats[i][1].keys():
	       nchanges=nchanges + stats[i][1][author]
       print "      <NumCommits>%d</NumCommits>" % (ncommits)
       print "      <NumChanges>%d</NumChanges>" % (nchanges)
       if periods[i][3] is not None:
          for author in author_list:
	    if author in stats[i][0].keys():
	       val = stats[i][0][author]
	    else:
	       val = 0
            print "      <Commits author=\"%s\">%d</Commits>" %(author,val)
          for author in author_list:
	    if author in stats[i][1].keys():
	       val = stats[i][1][author]
	    else:
	       val = 0
            print "      <Changes author=\"%s\">%d</Changes>" %(author,val)
       print "    </Period>"
       i = i+1

##
## MAIN
##
if len(sys.argv) < 4:
   print "\n\
svn_commit_summary [-xml] <repository-dir> <ndays> <nweeks> <nquarters>\n\
\n\
This script summarizes recent commits and changes in a subversion repository.\n\
The default output format is plain text, but an XML format can be\n\
specified as well."
   sys.exit(1)

i=0
format="text"
if sys.argv[1] == "-xml":
   format="xml"
   i=1
reposdir=sys.argv[1+i]
ndays=eval(sys.argv[2+i])
nweeks=eval(sys.argv[3+i])
nmonths=eval(sys.argv[4+i])
nquarters=eval(sys.argv[5+i])

if format=="xml":
   print "<SVNCommitSummary>"
   print "  <Repository>%s</Repository>" % (reposdir)
   print "  <DailyStatistics>"
else:
   print ""
   print "---------------------------------"
   print "       Daily Statistics"
   print "---------------------------------"
   print ""
summarize(reposdir,ndays,1,format)

if format=="xml":
   print "  </DailyStatistics>"
   print "  <WeeklyStatistics>"
else:
   print ""
   print "---------------------------------"
   print "       Weekly Statistics"
   print "---------------------------------"
   print ""
summarize(reposdir,nweeks,7,format)

if format=="xml":
   print "  </WeeklyStatistics>"
   print "  <MonthlyStatistics>"
else:
   print ""
   print "---------------------------------"
   print "       Monthly Statistics"
   print "---------------------------------"
   print ""
summarize(reposdir,nmonths,30,format)

if format=="xml":
   print "  </MonthlyStatistics>"
   print "  <QuarterlyStatistics>"
else:
   print ""
   print "---------------------------------"
   print "       Quarterly Statistics"
   print "---------------------------------"
   print ""
summarize(reposdir,nquarters,90,format)

if format=="xml":
   print "  </QuarterlyStatistics>"
   print "</SVNCommitSummary>"

