sync: Order projects according to last fetch time

Some projects may consistently take longer to fetch than others, for
example a more active project may have many more Gerrit changes than a
less active project, which take longer to transfer. Use a simple
heuristic based on the last fetch time to fetch slower projects first,
so we do not tend to spend the end of the sync fetching a small number
of outliers.

This algorithm is probably not optimal, and due to inter-run latency
variance and Python thread scheduling, we may not even have good
estimates of a project sync time.

Change-Id: I9a463f214b3ed742e4d807c42925b62cb8b1745b
diff --git a/subcmds/sync.py b/subcmds/sync.py
index e68a025..a8022d9 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -16,6 +16,7 @@
 import netrc
 from optparse import SUPPRESS_HELP
 import os
+import pickle
 import re
 import shutil
 import socket
@@ -47,6 +48,8 @@
 from project import SyncBuffer
 from progress import Progress
 
+_ONE_DAY_S = 24 * 60 * 60
+
 class _FetchError(Exception):
   """Internal error thrown in _FetchHelper() when we don't want stack trace."""
   pass
@@ -212,10 +215,12 @@
       # - We always make sure we unlock the lock if we locked it.
       try:
         try:
+          start = time.time()
           success = project.Sync_NetworkHalf(
             quiet=opt.quiet,
             current_branch_only=opt.current_branch_only,
             clone_bundle=not opt.no_clone_bundle)
+          self._fetch_times.Set(project, time.time() - start)
 
           # Lock around all the rest of the code, since printing, updating a set
           # and Progress.update() are not thread safe.
@@ -293,6 +298,7 @@
         sys.exit(1)
 
     pm.end()
+    self._fetch_times.Save()
     for project in projects:
       project.bare_git.gc('--auto')
     return fetched
@@ -496,12 +502,15 @@
         self.jobs = self.manifest.default.sync_j
     all_projects = self.GetProjects(args, missing_ok=True)
 
+    self._fetch_times = _FetchTimes(self.manifest)
     if not opt.local_only:
       to_fetch = []
       now = time.time()
-      if (24 * 60 * 60) <= (now - rp.LastFetch):
+      if _ONE_DAY_S <= (now - rp.LastFetch):
         to_fetch.append(rp)
       to_fetch.extend(all_projects)
+      to_fetch.sort(key=self._fetch_times.Get, reverse=True)
+      self._fetch_times.Clear()
 
       self._Fetch(to_fetch, opt)
       _PostRepoFetch(rp, opt.no_repo_verify)
@@ -602,3 +611,53 @@
     print >>sys.stderr
     return False
   return True
+
+class _FetchTimes(object):
+  def __init__(self, manifest):
+    self._path = os.path.join(manifest.repodir, '.repopickle_fetchtimes')
+    self._times = None
+
+  def Clear(self):
+    self._times = {}
+
+  def Get(self, project):
+    self._Load()
+    return self._times.get(project.name, _ONE_DAY_S)
+
+  def Set(self, project, t):
+    self._times[project.name] = t
+
+  def _Load(self):
+    if self._times is None:
+      try:
+        f = open(self._path)
+      except IOError:
+        self._times = {}
+        return self._times
+      try:
+        try:
+          self._times = pickle.load(f)
+        except:
+          try:
+            os.remove(self._path)
+          except OSError:
+            pass
+          self._times = {}
+      finally:
+        f.close()
+    return self._times
+
+  def Save(self):
+    if self._times is None:
+      return
+    try:
+      f = open(self._path, 'wb')
+      try:
+        pickle.dump(self._times, f)
+      except (IOError, OSError, pickle.PickleError):
+        try:
+          os.remove(self._path)
+        except OSError:
+          pass
+    finally:
+      f.close()