Add --archive option to init to sync using git archive

This significantly reduces sync time and used brandwidth as only
a tar of each project's revision is checked out, but git is not
accessible from projects anymore.

This is relevant when git is not needed in projects but sync
speed/brandwidth may be important like on CI servers when building
several versions from scratch regularly for example.

Archive is not supported over http/https.

Change-Id: I48c3c7de2cd5a1faec33e295fcdafbc7807d0e4d
Signed-off-by: Julien Campergue <julien.campergue@parrot.com>
diff --git a/manifest_xml.py b/manifest_xml.py
index 457d5ab..785976b 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -329,6 +329,10 @@
   def IsMirror(self):
     return self.manifestProject.config.GetBoolean('repo.mirror')
 
+  @property
+  def IsArchive(self):
+    return self.manifestProject.config.GetBoolean('repo.archive')
+
   def _Unload(self):
     self._loaded = False
     self._projects = {}
diff --git a/project.py b/project.py
index b01a52b..0d49111 100644
--- a/project.py
+++ b/project.py
@@ -23,6 +23,7 @@
 import stat
 import subprocess
 import sys
+import tarfile
 import tempfile
 import time
 
@@ -982,15 +983,62 @@
 
 ## Sync ##
 
+  def _ExtractArchive(self, tarpath, path=None):
+    """Extract the given tar on its current location
+
+    Args:
+        - tarpath: The path to the actual tar file
+
+    """
+    try:
+      with tarfile.open(tarpath, 'r') as tar:
+        tar.extractall(path=path)
+        return True
+    except (IOError, tarfile.TarError) as e:
+      print("error: Cannot extract archive %s: "
+            "%s" % (tarpath, str(e)), file=sys.stderr)
+    return False
+
   def Sync_NetworkHalf(self,
       quiet=False,
       is_new=None,
       current_branch_only=False,
       clone_bundle=True,
-      no_tags=False):
+      no_tags=False,
+      archive=False):
     """Perform only the network IO portion of the sync process.
        Local working directory/branch state is not affected.
     """
+    if archive and not isinstance(self, MetaProject):
+      if self.remote.url.startswith(('http://', 'https://')):
+        print("error: %s: Cannot fetch archives from http/https "
+              "remotes." % self.name, file=sys.stderr)
+        return False
+
+      name = self.relpath.replace('\\', '/')
+      name = name.replace('/', '_')
+      tarpath = '%s.tar' % name
+      topdir = self.manifest.topdir
+
+      try:
+        self._FetchArchive(tarpath, cwd=topdir)
+      except GitError as e:
+        print('error: %s' % str(e), file=sys.stderr)
+        return False
+
+      # From now on, we only need absolute tarpath
+      tarpath = os.path.join(topdir, tarpath)
+
+      if not self._ExtractArchive(tarpath, path=topdir):
+        return False
+      try:
+        os.remove(tarpath)
+      except OSError as e:
+        print("warn: Cannot remove archive %s: "
+              "%s" % (tarpath, str(e)), file=sys.stderr)
+      self._CopyFiles()
+      return True
+
     if is_new is None:
       is_new = not self.Exists
     if is_new:
@@ -1573,6 +1621,19 @@
 
 ## Direct Git Commands ##
 
+  def _FetchArchive(self, tarpath, cwd=None):
+    cmd = ['archive', '-v', '-o', tarpath]
+    cmd.append('--remote=%s' % self.remote.url)
+    cmd.append('--prefix=%s/' % self.relpath)
+    cmd.append(self.revisionExpr)
+
+    command = GitCommand(self, cmd, cwd=cwd,
+                         capture_stdout=True,
+                         capture_stderr=True)
+
+    if command.Wait() != 0:
+      raise GitError('git archive %s: %s' % (self.name, command.stderr))
+
   def _RemoteFetch(self, name=None,
                    current_branch_only=False,
                    initial=False,
diff --git a/repo b/repo
index 277bbc6..d81cdd6 100755
--- a/repo
+++ b/repo
@@ -180,6 +180,10 @@
 group.add_option('--depth', type='int', default=None,
                  dest='depth',
                  help='create a shallow clone with given depth; see git clone')
+group.add_option('--archive',
+                 dest='archive', action='store_true',
+                 help='checkout an archive instead of a git repository for '
+                      'each project. See git archive.')
 group.add_option('-g', '--groups',
                  dest='groups', default='default',
                  help='restrict manifest projects to ones with specified '
diff --git a/subcmds/init.py b/subcmds/init.py
index a44fb7a..b1fcb69 100644
--- a/subcmds/init.py
+++ b/subcmds/init.py
@@ -99,6 +99,10 @@
     g.add_option('--depth', type='int', default=None,
                  dest='depth',
                  help='create a shallow clone with given depth; see git clone')
+    g.add_option('--archive',
+                 dest='archive', action='store_true',
+                 help='checkout an archive instead of a git repository for '
+                      'each project. See git archive.')
     g.add_option('-g', '--groups',
                  dest='groups', default='default',
                  help='restrict manifest projects to ones with specified '
@@ -198,6 +202,16 @@
     if opt.reference:
       m.config.SetString('repo.reference', opt.reference)
 
+    if opt.archive:
+      if is_new:
+        m.config.SetString('repo.archive', 'true')
+      else:
+        print('fatal: --archive is only supported when initializing a new '
+              'workspace.', file=sys.stderr)
+        print('Either delete the .repo folder in this workspace, or initialize '
+              'in another location.', file=sys.stderr)
+        sys.exit(1)
+
     if opt.mirror:
       if is_new:
         m.config.SetString('repo.mirror', 'true')
@@ -366,6 +380,13 @@
     if opt.reference:
       opt.reference = os.path.expanduser(opt.reference)
 
+    # Check this here, else manifest will be tagged "not new" and init won't be
+    # possible anymore without removing the .repo/manifests directory.
+    if opt.archive and opt.mirror:
+      print('fatal: --mirror and --archive cannot be used together.',
+            file=sys.stderr)
+      sys.exit(1)
+
     self._SyncManifest(opt)
     self._LinkManifest(opt.manifest_name)
 
diff --git a/subcmds/sync.py b/subcmds/sync.py
index d8aec59..0279ff6 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -253,7 +253,7 @@
           quiet=opt.quiet,
           current_branch_only=opt.current_branch_only,
           clone_bundle=not opt.no_clone_bundle,
-          no_tags=opt.no_tags)
+          no_tags=opt.no_tags, archive=self.manifest.IsArchive)
         self._fetch_times.Set(project, time.time() - start)
 
         # Lock around all the rest of the code, since printing, updating a set
@@ -294,7 +294,8 @@
             quiet=opt.quiet,
             current_branch_only=opt.current_branch_only,
             clone_bundle=not opt.no_clone_bundle,
-            no_tags=opt.no_tags):
+            no_tags=opt.no_tags,
+            archive=self.manifest.IsArchive):
           fetched.add(project.gitdir)
         else:
           print('error: Cannot fetch %s' % project.name, file=sys.stderr)
@@ -338,7 +339,9 @@
     pm.end()
     self._fetch_times.Save()
 
-    self._GCProjects(projects)
+    if not self.manifest.IsArchive:
+      self._GCProjects(projects)
+
     return fetched
 
   def _GCProjects(self, projects):
@@ -641,7 +644,7 @@
         previously_missing_set = missing_set
         fetched.update(self._Fetch(missing, opt))
 
-    if self.manifest.IsMirror:
+    if self.manifest.IsMirror or self.manifest.IsArchive:
       # bail out now, we have no working tree
       return