Add --archive option to init to sync using git archive
This significantly reduces sync time and used brandwidth as only
a tar of each project's revision is checked out, but git is not
accessible from projects anymore.
This is relevant when git is not needed in projects but sync
speed/brandwidth may be important like on CI servers when building
several versions from scratch regularly for example.
Archive is not supported over http/https.
Change-Id: I48c3c7de2cd5a1faec33e295fcdafbc7807d0e4d
Signed-off-by: Julien Campergue <julien.campergue@parrot.com>
diff --git a/manifest_xml.py b/manifest_xml.py
index 457d5ab..785976b 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -329,6 +329,10 @@
def IsMirror(self):
return self.manifestProject.config.GetBoolean('repo.mirror')
+ @property
+ def IsArchive(self):
+ return self.manifestProject.config.GetBoolean('repo.archive')
+
def _Unload(self):
self._loaded = False
self._projects = {}
diff --git a/project.py b/project.py
index b01a52b..0d49111 100644
--- a/project.py
+++ b/project.py
@@ -23,6 +23,7 @@
import stat
import subprocess
import sys
+import tarfile
import tempfile
import time
@@ -982,15 +983,62 @@
## Sync ##
+ def _ExtractArchive(self, tarpath, path=None):
+ """Extract the given tar on its current location
+
+ Args:
+ - tarpath: The path to the actual tar file
+
+ """
+ try:
+ with tarfile.open(tarpath, 'r') as tar:
+ tar.extractall(path=path)
+ return True
+ except (IOError, tarfile.TarError) as e:
+ print("error: Cannot extract archive %s: "
+ "%s" % (tarpath, str(e)), file=sys.stderr)
+ return False
+
def Sync_NetworkHalf(self,
quiet=False,
is_new=None,
current_branch_only=False,
clone_bundle=True,
- no_tags=False):
+ no_tags=False,
+ archive=False):
"""Perform only the network IO portion of the sync process.
Local working directory/branch state is not affected.
"""
+ if archive and not isinstance(self, MetaProject):
+ if self.remote.url.startswith(('http://', 'https://')):
+ print("error: %s: Cannot fetch archives from http/https "
+ "remotes." % self.name, file=sys.stderr)
+ return False
+
+ name = self.relpath.replace('\\', '/')
+ name = name.replace('/', '_')
+ tarpath = '%s.tar' % name
+ topdir = self.manifest.topdir
+
+ try:
+ self._FetchArchive(tarpath, cwd=topdir)
+ except GitError as e:
+ print('error: %s' % str(e), file=sys.stderr)
+ return False
+
+ # From now on, we only need absolute tarpath
+ tarpath = os.path.join(topdir, tarpath)
+
+ if not self._ExtractArchive(tarpath, path=topdir):
+ return False
+ try:
+ os.remove(tarpath)
+ except OSError as e:
+ print("warn: Cannot remove archive %s: "
+ "%s" % (tarpath, str(e)), file=sys.stderr)
+ self._CopyFiles()
+ return True
+
if is_new is None:
is_new = not self.Exists
if is_new:
@@ -1573,6 +1621,19 @@
## Direct Git Commands ##
+ def _FetchArchive(self, tarpath, cwd=None):
+ cmd = ['archive', '-v', '-o', tarpath]
+ cmd.append('--remote=%s' % self.remote.url)
+ cmd.append('--prefix=%s/' % self.relpath)
+ cmd.append(self.revisionExpr)
+
+ command = GitCommand(self, cmd, cwd=cwd,
+ capture_stdout=True,
+ capture_stderr=True)
+
+ if command.Wait() != 0:
+ raise GitError('git archive %s: %s' % (self.name, command.stderr))
+
def _RemoteFetch(self, name=None,
current_branch_only=False,
initial=False,
diff --git a/repo b/repo
index 277bbc6..d81cdd6 100755
--- a/repo
+++ b/repo
@@ -180,6 +180,10 @@
group.add_option('--depth', type='int', default=None,
dest='depth',
help='create a shallow clone with given depth; see git clone')
+group.add_option('--archive',
+ dest='archive', action='store_true',
+ help='checkout an archive instead of a git repository for '
+ 'each project. See git archive.')
group.add_option('-g', '--groups',
dest='groups', default='default',
help='restrict manifest projects to ones with specified '
diff --git a/subcmds/init.py b/subcmds/init.py
index a44fb7a..b1fcb69 100644
--- a/subcmds/init.py
+++ b/subcmds/init.py
@@ -99,6 +99,10 @@
g.add_option('--depth', type='int', default=None,
dest='depth',
help='create a shallow clone with given depth; see git clone')
+ g.add_option('--archive',
+ dest='archive', action='store_true',
+ help='checkout an archive instead of a git repository for '
+ 'each project. See git archive.')
g.add_option('-g', '--groups',
dest='groups', default='default',
help='restrict manifest projects to ones with specified '
@@ -198,6 +202,16 @@
if opt.reference:
m.config.SetString('repo.reference', opt.reference)
+ if opt.archive:
+ if is_new:
+ m.config.SetString('repo.archive', 'true')
+ else:
+ print('fatal: --archive is only supported when initializing a new '
+ 'workspace.', file=sys.stderr)
+ print('Either delete the .repo folder in this workspace, or initialize '
+ 'in another location.', file=sys.stderr)
+ sys.exit(1)
+
if opt.mirror:
if is_new:
m.config.SetString('repo.mirror', 'true')
@@ -366,6 +380,13 @@
if opt.reference:
opt.reference = os.path.expanduser(opt.reference)
+ # Check this here, else manifest will be tagged "not new" and init won't be
+ # possible anymore without removing the .repo/manifests directory.
+ if opt.archive and opt.mirror:
+ print('fatal: --mirror and --archive cannot be used together.',
+ file=sys.stderr)
+ sys.exit(1)
+
self._SyncManifest(opt)
self._LinkManifest(opt.manifest_name)
diff --git a/subcmds/sync.py b/subcmds/sync.py
index d8aec59..0279ff6 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -253,7 +253,7 @@
quiet=opt.quiet,
current_branch_only=opt.current_branch_only,
clone_bundle=not opt.no_clone_bundle,
- no_tags=opt.no_tags)
+ no_tags=opt.no_tags, archive=self.manifest.IsArchive)
self._fetch_times.Set(project, time.time() - start)
# Lock around all the rest of the code, since printing, updating a set
@@ -294,7 +294,8 @@
quiet=opt.quiet,
current_branch_only=opt.current_branch_only,
clone_bundle=not opt.no_clone_bundle,
- no_tags=opt.no_tags):
+ no_tags=opt.no_tags,
+ archive=self.manifest.IsArchive):
fetched.add(project.gitdir)
else:
print('error: Cannot fetch %s' % project.name, file=sys.stderr)
@@ -338,7 +339,9 @@
pm.end()
self._fetch_times.Save()
- self._GCProjects(projects)
+ if not self.manifest.IsArchive:
+ self._GCProjects(projects)
+
return fetched
def _GCProjects(self, projects):
@@ -641,7 +644,7 @@
previously_missing_set = missing_set
fetched.update(self._Fetch(missing, opt))
- if self.manifest.IsMirror:
+ if self.manifest.IsMirror or self.manifest.IsArchive:
# bail out now, we have no working tree
return