sync: Enable use of git clone --reference

Use git clone to initialize a new repository, and when possible
allow callers to use --reference to reuse an existing checkout as
the initial object storage area for the new checkout.

Change-Id: Ie27f760247f311ce484c6d3e85a90d94da2febfc
Signed-off-by: Shawn O. Pearce <sop@google.com>
diff --git a/project.py b/project.py
index 1b5d9a6..8ffed84 100644
--- a/project.py
+++ b/project.py
@@ -622,13 +622,14 @@
     """Perform only the network IO portion of the sync process.
        Local working directory/branch state is not affected.
     """
-    if not self.Exists:
+    is_new = not self.Exists
+    if is_new:
       print >>sys.stderr
       print >>sys.stderr, 'Initializing project %s ...' % self.name
       self._InitGitDir()
 
     self._InitRemote()
-    if not self._RemoteFetch():
+    if not self._RemoteFetch(initial = is_new):
       return False
 
     #Check that the requested ref was found after fetch
@@ -1024,7 +1025,7 @@
 
 ## Direct Git Commands ##
 
-  def _RemoteFetch(self, name=None, tag=None):
+  def _RemoteFetch(self, name=None, tag=None, initial=False):
     if not name:
       name = self.remote.name
 
@@ -1032,6 +1033,60 @@
     if self.GetRemote(name).PreConnectFetch():
       ssh_proxy = True
 
+    if initial:
+      alt = os.path.join(self.gitdir, 'objects/info/alternates')
+      try:
+        fd = open(alt, 'rb')
+        try:
+          ref_dir = fd.readline()
+          if ref_dir and ref_dir.endswith('\n'):
+            ref_dir = ref_dir[:-1]
+        finally:
+          fd.close()
+      except IOError, e:
+        ref_dir = None
+
+      if ref_dir and 'objects' == os.path.basename(ref_dir):
+        ref_dir = os.path.dirname(ref_dir)
+        packed_refs = os.path.join(self.gitdir, 'packed-refs')
+        remote = self.GetRemote(name)
+
+        all = self.bare_ref.all
+        ids = set(all.values())
+        tmp = set()
+
+        for r, id in GitRefs(ref_dir).all.iteritems():
+          if r not in all:
+            if r.startswith(R_TAGS) or remote.WritesTo(r):
+              all[r] = id
+              ids.add(id)
+              continue
+
+          if id in ids:
+            continue
+
+          r = 'refs/_alt/%s' % id
+          all[r] = id
+          ids.add(id)
+          tmp.add(r)
+
+        ref_names = list(all.keys())
+        ref_names.sort()
+
+        tmp_packed = ''
+        old_packed = ''
+
+        for r in ref_names:
+          line = '%s %s\n' % (all[r], r)
+          tmp_packed += line
+          if r not in tmp:
+            old_packed += line
+
+        _lwrite(packed_refs, tmp_packed)
+
+      else:
+        ref_dir = None
+
     cmd = ['fetch']
     if not self.worktree:
       cmd.append('--update-head-ok')
@@ -1039,10 +1094,21 @@
     if tag is not None:
       cmd.append('tag')
       cmd.append(tag)
-    return GitCommand(self,
-                      cmd,
-                      bare = True,
-                      ssh_proxy = ssh_proxy).Wait() == 0
+
+    ok = GitCommand(self,
+                    cmd,
+                    bare = True,
+                    ssh_proxy = ssh_proxy).Wait() == 0
+
+    if initial:
+      if ref_dir:
+        if old_packed != '':
+          _lwrite(packed_refs, old_packed)
+        else:
+          os.remove(packed_refs)
+      self.bare_git.pack_refs('--all', '--prune')
+
+    return ok
 
   def _Checkout(self, rev, quiet=False):
     cmd = ['checkout']
@@ -1080,6 +1146,27 @@
       os.makedirs(self.gitdir)
       self.bare_git.init()
 
+      mp = self.manifest.manifestProject
+      ref_dir = mp.config.GetString('repo.reference')
+
+      if ref_dir:
+        mirror_git = os.path.join(ref_dir, self.name + '.git')
+        repo_git = os.path.join(ref_dir, '.repo', 'projects',
+                                self.relpath + '.git')
+
+        if os.path.exists(mirror_git):
+          ref_dir = mirror_git
+
+        elif os.path.exists(repo_git):
+          ref_dir = repo_git
+
+        else:
+          ref_dir = None
+
+        if ref_dir:
+          _lwrite(os.path.join(self.gitdir, 'objects/info/alternates'),
+                  os.path.join(ref_dir, 'objects') + '\n')
+
       if self.manifest.IsMirror:
         self.config.SetString('core.bare', 'true')
       else:
diff --git a/repo b/repo
index 1374255..bdc05c3 100755
--- a/repo
+++ b/repo
@@ -28,7 +28,7 @@
 del magic
 
 # increment this whenever we make important changes to this script
-VERSION = (1, 8)
+VERSION = (1, 9)
 
 # increment this if the MAINTAINER_KEYS block is modified
 KEYRING_VERSION = (1,0)
@@ -118,6 +118,9 @@
 group.add_option('--mirror',
                  dest='mirror', action='store_true',
                  help='mirror the forrest')
+group.add_option('--reference',
+                 dest='reference',
+                 help='location of mirror directory', metavar='DIR')
 
 # Tool
 group = init_optparse.add_option_group('repo Version options')
diff --git a/subcmds/init.py b/subcmds/init.py
index 4023ab6..17edfa0 100644
--- a/subcmds/init.py
+++ b/subcmds/init.py
@@ -41,6 +41,13 @@
 to be used. If no manifest is specified, the manifest default.xml
 will be used.
 
+The --reference option can be used to point to a directory that
+has the content of a --mirror sync. This will make the working
+directory use as much data as possible from the local reference
+directory when fetching from the server. This will make the sync
+go a lot faster by reducing data traffic on the network.
+
+
 Switching Manifest Branches
 ---------------------------
 
@@ -71,7 +78,9 @@
     g.add_option('--mirror',
                  dest='mirror', action='store_true',
                  help='mirror the forrest')
-
+    g.add_option('--reference',
+                 dest='reference',
+                 help='location of mirror directory', metavar='DIR')
 
     # Tool
     g = p.add_option_group('repo Version options')
@@ -115,6 +124,9 @@
       r.ResetFetch()
       r.Save()
 
+    if opt.reference:
+      m.config.SetString('repo.reference', opt.reference)
+
     if opt.mirror:
       if is_new:
         m.config.SetString('repo.mirror', 'true')