Introduce manifest format using git submodules

If a manifest top level directory contains '.gitmodules' we now
assume this is a git module format manifest and switch to using
that code, rather than the legacy XML based manifest.

At the same time, we move the bare repository for a project from
$TOP/.repo/projects/$REPO_PATH.git to be $REPO_NAME.git instead.
This makes it easier for us to later support a repo init from an
existing work tree, as we can more accurately predict the path of
the project's repository in the workspace.  It also means that the
$TOP/.repo/projects/ directory is layed out like a mirror would be.

Signed-off-by: Shawn O. Pearce <sop@google.com>
diff --git a/docs/manifest_submodule.txt b/docs/manifest_submodule.txt
new file mode 100644
index 0000000..e7d1f64
--- /dev/null
+++ b/docs/manifest_submodule.txt
@@ -0,0 +1,130 @@
+repo Manifest Format (submodule)
+================================
+
+A repo manifest describes the structure of a repo client; that is
+the directories that are visible and where they should be obtained
+from with git.
+
+The basic structure of a manifest is a bare Git repository holding
+a 'gitmodules' file in the top level directory, and one or more
+gitlink references pointing at commits from the referenced projects.
+This is the same structure as used by 'git submodule'.
+
+Manifests are inherently version controlled, since they are kept
+within a Git repository.  Updates to manifests are automatically
+obtained by clients during `repo sync`.
+
+.gitmodules
+===========
+
+The '.gitmodules' file, located in the top-level directory of the
+client's working tree (or manifest repository), is a text file with
+a syntax matching the requirements of 'git config'.
+
+This file contains one subsection per project (also called a
+submodule by git), and the subsection value is a unique name to
+describe the project. Each submodule section must contain the
+following required keys:
+
+  * path
+  * url
+
+submodule.<name>.path
+---------------------
+
+Defines the path, relative to the top-level directory of the client's
+working tree, where the project is expected to be checked out. The
+path name must not end with a '/'. All paths must be unique within
+the .gitmodules file.
+
+At the specified path within the manifest repository a gitlink
+tree entry (an entry with file mode 160000) must exist referencing
+a commit SHA-1 from the project.  This tree entry specifies the
+exact version of the project that `repo sync` will synchronize the
+client's working tree to.
+
+submodule.<name>.url
+--------------------
+
+Defines a URL from where the project repository can be cloned.
+By default `repo sync` will clone from this URL whenever a user
+needs to access this project.
+
+submodule.<name>.revision
+-------------------------
+
+Name of the branch in the project repository that Gerrit Code Review
+should automatically refresh the project's gitlink entry from.
+
+If set, during submit of a change within the referenced project,
+Gerrit Code Review will automatically update the manifest
+repository's corresponding gitlink to the new commit SHA-1 of
+this branch.
+
+Valid values are a short branch name (e.g. 'master'), a full ref
+name (e.g. 'refs/heads/master'), or '.' to request using the same
+branch name as the manifest branch itself.  Since '.' automatically
+uses the manifest branch, '.' is the recommended value.
+
+If this key is not set, Gerrit Code Review will NOT automatically
+update the gitlink. An unset key requires the manifest maintainer
+to manually update the gitlink when it is necessary to reference
+a different revision of the project.
+
+submodule.<name>.update
+-----------------------
+
+This key is not supported by repo.  If set, it will be ignored.
+
+.review
+=======
+
+The optional '.review' file, located in the top-level directory of
+the client's working tree (or manifest repository), is a text file
+with a syntax matching the requirements of 'git config'.
+
+This file describes how `repo upload` should interact with the
+project's preferred code review system.
+
+review.url
+----------
+
+URL of the default Gerrit Code Review server.  If a project does
+not have a specific URL in the '.review' file, this default URL
+will be used instead.
+
+review.<name>.url
+-----------------
+
+Project specific URL of the Gerrit Code Review server, for the
+submodule whose project name is <name>.
+
+Example
+=======
+
+  $ cat .gitmodules
+  [submodule "app/Clock"]
+    path = clock
+    url = git://vcs.example.com/ClockWidget.git
+    revision = .
+  [submodule "app/Browser"]
+    path = net/browser
+    url = git://netgroup.example.com/network/web/Browser.git
+    revision = .
+
+  $ cat .review
+  [review]
+    url = vcs-gerrit.example.com
+  [review "app/Browser"]
+    url = netgroup.example.com
+
+In the above example, the app/Clock project will send its code
+reviews to the default server, vcs-gerrit.example.com, while
+app/Browser will send its code reviews to netgroup.example.com.
+
+See Also
+========
+
+ * http://www.kernel.org/pub/software/scm/git/docs/gitmodules.html
+ * http://www.kernel.org/pub/software/scm/git/docs/git-config.html
+ * http://code.google.com/p/gerrit/
diff --git a/manifest_loader.py b/manifest_loader.py
index 1ce1c1f..467cb42 100644
--- a/manifest_loader.py
+++ b/manifest_loader.py
@@ -13,11 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from manifest_submodule import SubmoduleManifest
 from manifest_xml import XmlManifest
 
 def ParseManifest(repodir, type=None):
   if type:
     return type(repodir)
+  if SubmoduleManifest.Is(repodir):
+    return SubmoduleManifest(repodir)
   return XmlManifest(repodir)
 
 _manifest = None
diff --git a/manifest_submodule.py b/manifest_submodule.py
new file mode 100644
index 0000000..92f187a
--- /dev/null
+++ b/manifest_submodule.py
@@ -0,0 +1,474 @@
+#
+# Copyright (C) 2009 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os
+import shutil
+
+from error import GitError
+from error import ManifestParseError
+from git_command import GitCommand
+from git_config import GitConfig
+from git_config import IsId
+from manifest import Manifest
+from progress import Progress
+from project import RemoteSpec
+from project import Project
+from project import MetaProject
+from project import R_HEADS
+from project import HEAD
+from project import _lwrite
+
+import manifest_xml
+
+GITLINK = '160000'
+
+def _rmdir(dir, top):
+  while dir != top:
+    try:
+      os.rmdir(dir)
+    except OSError:
+      break
+    dir = os.path.dirname(dir)
+
+def _rmref(gitdir, ref):
+  os.remove(os.path.join(gitdir, ref))
+  log = os.path.join(gitdir, 'logs', ref)
+  if os.path.exists(log):
+    os.remove(log)
+    _rmdir(os.path.dirname(log), gitdir)
+
+def _has_gitmodules(d):
+  return os.path.exists(os.path.join(d, '.gitmodules'))
+
+class SubmoduleManifest(Manifest):
+  """manifest from .gitmodules file"""
+
+  @classmethod
+  def Is(cls, repodir):
+    return _has_gitmodules(os.path.dirname(repodir)) \
+        or _has_gitmodules(os.path.join(repodir, 'manifest')) \
+        or _has_gitmodules(os.path.join(repodir, 'manifests'))
+
+  @classmethod
+  def IsBare(cls, p):
+    try:
+      p.bare_git.cat_file('-e', '%s:.gitmodules' % p.GetRevisionId())
+    except GitError:
+      return False
+    return True
+
+  def __init__(self, repodir):
+    Manifest.__init__(self, repodir)
+
+    gitdir = os.path.join(repodir, 'manifest.git')
+    config = GitConfig.ForRepository(gitdir = gitdir)
+
+    if config.GetBoolean('repo.mirror'):
+      worktree = os.path.join(repodir, 'manifest')
+      relpath = None
+    else:
+      worktree = self.topdir
+      relpath  = '.'
+
+    self.manifestProject = MetaProject(self, '__manifest__',
+      gitdir   = gitdir,
+      worktree = worktree,
+      relpath  = relpath)
+    self._modules = GitConfig(os.path.join(worktree, '.gitmodules'),
+                              pickleFile = os.path.join(
+                                repodir, '.repopickle_gitmodules'
+                              ))
+    self._review = GitConfig(os.path.join(worktree, '.review'),
+                             pickleFile = os.path.join(
+                               repodir, '.repopickle_review'
+                             ))
+    self._Unload()
+
+  @property
+  def projects(self):
+    self._Load()
+    return self._projects
+
+  def InitBranch(self):
+    m = self.manifestProject
+    if m.CurrentBranch is None:
+      b = m.revisionExpr
+      if b.startswith(R_HEADS):
+        b = b[len(R_HEADS):]
+      return m.StartBranch(b)
+    return True
+
+  def SetMRefs(self, project):
+    if project.revisionId is None:
+      # Special project, e.g. the manifest or repo executable.
+      #
+      return
+
+    ref = 'refs/remotes/m'
+    cur = project.bare_ref.get(ref)
+    exp = project.revisionId
+    if cur != exp:
+      msg = 'manifest set to %s' % exp
+      project.bare_git.UpdateRef(ref, exp, message = msg, detach = True)
+
+    ref = 'refs/remotes/m-revision'
+    cur = project.bare_ref.symref(ref)
+    exp = project.revisionExpr
+    if exp is None:
+      if cur:
+        _rmref(project.gitdir, ref)
+    elif cur != exp:
+      remote = project.GetRemote(project.remote.name)
+      dst = remote.ToLocal(exp)
+      msg = 'manifest set to %s (%s)' % (exp, dst)
+      project.bare_git.symbolic_ref('-m', msg, ref, dst)
+
+  def Upgrade_Local(self, old):
+    if isinstance(old, manifest_xml.XmlManifest):
+      self.FromXml_Local_1(old, checkout=True)
+      self.FromXml_Local_2(old)
+    else:
+      raise ManifestParseError, 'cannot upgrade manifest'
+
+  def FromXml_Local_1(self, old, checkout):
+    os.rename(old.manifestProject.gitdir,
+              os.path.join(old.repodir, 'manifest.git'))
+
+    oldmp = old.manifestProject
+    oldBranch = oldmp.CurrentBranch
+    b = oldmp.GetBranch(oldBranch).merge
+    if not b:
+      raise ManifestParseError, 'cannot upgrade manifest'
+    if b.startswith(R_HEADS):
+      b = b[len(R_HEADS):]
+
+    newmp = self.manifestProject
+    self._CleanOldMRefs(newmp)
+    if oldBranch != b:
+      newmp.bare_git.branch('-m', oldBranch, b)
+      newmp.config.ClearCache()
+
+    old_remote = newmp.GetBranch(b).remote.name
+    act_remote = self._GuessRemoteName(old)
+    if old_remote != act_remote:
+      newmp.bare_git.remote('rename', old_remote, act_remote)
+      newmp.config.ClearCache()
+    newmp.remote.name = act_remote
+    print >>sys.stderr, "Assuming remote named '%s'" % act_remote
+
+    if checkout:
+      for p in old.projects.values():
+        for c in p.copyfiles:
+          if os.path.exists(c.abs_dest):
+            os.remove(c.abs_dest)
+      newmp._InitWorkTree()
+    else:
+      newmp._LinkWorkTree()
+
+    _lwrite(os.path.join(newmp.worktree,'.git',HEAD),
+            'ref: refs/heads/%s\n' % b)
+
+  def _GuessRemoteName(self, old):
+    used = {}
+    for p in old.projects.values():
+      n = p.remote.name
+      used[n] = used.get(n, 0) + 1
+
+    remote_name = 'origin'
+    remote_used = 0
+    for n in used.keys():
+      if remote_used < used[n]:
+        remote_used = used[n]
+        remote_name = n
+    return remote_name
+
+  def FromXml_Local_2(self, old):
+    shutil.rmtree(old.manifestProject.worktree)
+    os.remove(old._manifestFile)
+
+    my_remote = self._Remote().name
+    new_base = os.path.join(self.repodir, 'projects')
+    old_base = os.path.join(self.repodir, 'projects.old')
+    os.rename(new_base, old_base)
+    os.makedirs(new_base)
+
+    info = []
+    pm = Progress('Converting projects', len(self.projects))
+    for p in self.projects.values():
+      pm.update()
+
+      old_p = old.projects.get(p.name)
+      old_gitdir = os.path.join(old_base, '%s.git' % p.relpath)
+      if not os.path.isdir(old_gitdir):
+        continue
+
+      parent = os.path.dirname(p.gitdir)
+      if not os.path.isdir(parent):
+        os.makedirs(parent)
+      os.rename(old_gitdir, p.gitdir)
+      _rmdir(os.path.dirname(old_gitdir), self.repodir)
+
+      if not os.path.isdir(p.worktree):
+        os.makedirs(p.worktree)
+
+      if os.path.isdir(os.path.join(p.worktree, '.git')):
+        p._LinkWorkTree(relink=True)
+
+      self._CleanOldMRefs(p)
+      if old_p and old_p.remote.name != my_remote:
+        info.append("%s/: renamed remote '%s' to '%s'" \
+                    % (p.relpath, old_p.remote.name, my_remote))
+        p.bare_git.remote('rename', old_p.remote.name, my_remote)
+        p.config.ClearCache()
+
+      self.SetMRefs(p)
+    pm.end()
+    for i in info:
+      print >>sys.stderr, i
+
+  def _CleanOldMRefs(self, p):
+    all_refs = p._allrefs
+    for ref in all_refs.keys():
+      if ref.startswith(manifest_xml.R_M):
+        if p.bare_ref.symref(ref) != '':
+          _rmref(p.gitdir, ref)
+        else:
+          p.bare_git.DeleteRef(ref, all_refs[ref])
+
+  def FromXml_Definition(self, old):
+    """Convert another manifest representation to this one.
+    """
+    mp = self.manifestProject
+    gm = self._modules
+    gr = self._review
+
+    fd = open(os.path.join(mp.worktree, '.gitignore'), 'ab')
+    fd.write('/.repo\n')
+    fd.close()
+
+    sort_projects = list(old.projects.keys())
+    sort_projects.sort()
+
+    b = mp.GetBranch(mp.CurrentBranch).merge
+    if b.startswith(R_HEADS):
+      b = b[len(R_HEADS):]
+
+    info = []
+    pm = Progress('Converting manifest', len(sort_projects))
+    for p in sort_projects:
+      pm.update()
+      p = old.projects[p]
+
+      gm.SetString('submodule.%s.path' % p.name, p.relpath)
+      gm.SetString('submodule.%s.url' % p.name, p.remote.url)
+
+      if gr.GetString('review.url') is None:
+        gr.SetString('review.url', p.remote.review)
+      elif gr.GetString('review.url') != p.remote.review:
+        gr.SetString('review.%s.url' % p.name, p.remote.review)
+
+      r = p.revisionExpr
+      if r and not IsId(r):
+        if r.startswith(R_HEADS):
+          r = r[len(R_HEADS):]
+        if r == b:
+          r = '.'
+        gm.SetString('submodule.%s.revision' % p.name, r)
+
+      for c in p.copyfiles:
+        info.append('Moved %s out of %s' % (c.src, p.relpath))
+        c._Copy()
+        p.work_git.rm(c.src)
+        mp.work_git.add(c.dest)
+
+      self.SetRevisionId(p.relpath, p.GetRevisionId())
+    mp.work_git.add('.gitignore', '.gitmodules', '.review')
+    pm.end()
+    for i in info:
+      print >>sys.stderr, i
+
+  def _Unload(self):
+    self._loaded = False
+    self._projects = {}
+    self._revisionIds = None
+    self.branch = None
+
+  def _Load(self):
+    if not self._loaded:
+      f = os.path.join(self.repodir, manifest_xml.LOCAL_MANIFEST_NAME)
+      if os.path.exists(f):
+        print >>sys.stderr, 'warning: ignoring %s' % f
+
+      m = self.manifestProject
+      b = m.CurrentBranch
+      if not b:
+        raise ManifestParseError, 'manifest cannot be on detached HEAD'
+      b = m.GetBranch(b).merge
+      if b.startswith(R_HEADS):
+        b = b[len(R_HEADS):]
+      self.branch = b
+      m.remote.name = self._Remote().name
+
+      self._ParseModules()
+
+      if self.IsMirror:
+        self._AddMetaProjectMirror(self.repoProject)
+        self._AddMetaProjectMirror(self.manifestProject)
+
+      self._loaded = True
+
+  def _ParseModules(self):
+    byPath = dict()
+    for name in self._modules.GetSubSections('submodule'):
+      p = self._ParseProject(name)
+      if self._projects.get(p.name):
+        raise ManifestParseError, 'duplicate project "%s"' % p.name
+      if byPath.get(p.relpath):
+        raise ManifestParseError, 'duplicate path "%s"' % p.relpath
+      self._projects[p.name] = p
+      byPath[p.relpath] = p
+
+    for relpath in self._allRevisionIds.keys():
+      if relpath not in byPath:
+        raise ManifestParseError, \
+          'project "%s" not in .gitmodules' \
+          % relpath
+
+  def _Remote(self):
+    m = self.manifestProject
+    b = m.GetBranch(m.CurrentBranch)
+    return b.remote
+
+  def _ResolveUrl(self, url):
+    if url.startswith('./') or url.startswith('../'):
+      base = self._Remote().url
+      try:
+        base = base[:base.rindex('/')+1]
+      except ValueError:
+        base = base[:base.rindex(':')+1]
+      if url.startswith('./'):
+        url = url[2:]
+      while '/' in base and url.startswith('../'):
+        base = base[:base.rindex('/')+1]
+        url = url[3:]
+      return base + url
+    return url
+
+  def _GetRevisionId(self, path):
+    return self._allRevisionIds.get(path)
+
+  @property
+  def _allRevisionIds(self):
+    if self._revisionIds is None:
+      a = dict()
+      p = GitCommand(self.manifestProject,
+                     ['ls-files','-z','--stage'],
+                     capture_stdout = True)
+      for line in p.process.stdout.read().split('\0')[:-1]:
+        l_info, l_path = line.split('\t', 2)
+        l_mode, l_id, l_stage = l_info.split(' ', 2)
+        if l_mode == GITLINK and l_stage == '0':
+          a[l_path] = l_id
+      p.Wait()
+      self._revisionIds = a
+    return self._revisionIds
+
+  def SetRevisionId(self, path, id):
+    self.manifestProject.work_git.update_index(
+      '--add','--cacheinfo', GITLINK, id, path)
+
+  def _ParseProject(self, name):
+    gm = self._modules
+    gr = self._review
+
+    path = gm.GetString('submodule.%s.path' % name)
+    if not path:
+      path = name
+
+    revId = self._GetRevisionId(path)
+    if not revId:
+      raise ManifestParseError(
+        'submodule "%s" has no revision at "%s"' \
+        % (name, path))
+
+    url = gm.GetString('submodule.%s.url' % name)
+    if not url:
+      url = name
+    url = self._ResolveUrl(url)
+
+    review = gr.GetString('review.%s.url' % name)
+    if not review:
+      review = gr.GetString('review.url')
+    if not review:
+      review = self._Remote().review
+
+    remote = RemoteSpec(self._Remote().name, url, review)
+    revExpr = gm.GetString('submodule.%s.revision' % name)
+    if revExpr == '.':
+      revExpr = self.branch
+
+    if self.IsMirror:
+      relpath = None
+      worktree = None
+      gitdir = os.path.join(self.topdir, '%s.git' % name)
+    else:
+      worktree = os.path.join(self.topdir, path)
+      gitdir = os.path.join(self.repodir, 'projects/%s.git' % name)
+
+    return Project(manifest = self,
+                   name = name,
+                   remote = remote,
+                   gitdir = gitdir,
+                   worktree = worktree,
+                   relpath = path,
+                   revisionExpr = revExpr,
+                   revisionId = revId)
+
+  def _AddMetaProjectMirror(self, m):
+    m_url = m.GetRemote(m.remote.name).url
+    if m_url.endswith('/.git'):
+      raise ManifestParseError, 'refusing to mirror %s' % m_url
+
+    name = self._GuessMetaName(m_url)
+    if name.endswith('.git'):
+      name = name[:-4]
+
+    if name not in self._projects:
+      m.PreSync()
+      gitdir = os.path.join(self.topdir, '%s.git' % name)
+      project = Project(manifest = self,
+                        name = name,
+                        remote = RemoteSpec(self._Remote().name, m_url),
+                        gitdir = gitdir,
+                        worktree = None,
+                        relpath = None,
+                        revisionExpr = m.revisionExpr,
+                        revisionId = None)
+      self._projects[project.name] = project
+
+  def _GuessMetaName(self, m_url):
+    parts = m_url.split('/')
+    name = parts[-1]
+    parts = parts[0:-1]
+    s = len(parts) - 1
+    while s > 0:
+      l = '/'.join(parts[0:s]) + '/'
+      r = '/'.join(parts[s:]) + '/'
+      for p in self._projects.values():
+        if p.name.startswith(r) and p.remote.url.startswith(l):
+          return r + name
+      s -= 1
+    return m_url[m_url.rindex('/') + 1:]
diff --git a/subcmds/init.py b/subcmds/init.py
index b5207fb..cdbbfdf 100644
--- a/subcmds/init.py
+++ b/subcmds/init.py
@@ -21,6 +21,7 @@
 from error import ManifestParseError
 from project import SyncBuffer
 from git_command import git_require, MIN_GIT_VERSION
+from manifest_submodule import SubmoduleManifest
 from manifest_xml import XmlManifest
 from subcmds.sync import _ReloadManifest
 
@@ -144,6 +145,14 @@
       print >>sys.stderr, 'fatal: cannot obtain manifest %s' % r.url
       sys.exit(1)
 
+    if is_new and SubmoduleManifest.IsBare(m):
+      new = self.GetManifest(reparse=True, type=SubmoduleManifest)
+      if m.gitdir != new.manifestProject.gitdir:
+        os.rename(m.gitdir, new.manifestProject.gitdir)
+        new = self.GetManifest(reparse=True, type=SubmoduleManifest)
+      m = new.manifestProject
+      self._ApplyOptions(opt, is_new)
+
     if not is_new:
       # Force the manifest to load if it exists, the old graph
       # may be needed inside of _ReloadManifest().
diff --git a/subcmds/manifest.py b/subcmds/manifest.py
index 551b13b..7a8b2ee 100644
--- a/subcmds/manifest.py
+++ b/subcmds/manifest.py
@@ -22,7 +22,7 @@
 def _doc(name):
   r = os.path.dirname(__file__)
   r = os.path.dirname(r)
-  fd = open(os.path.join(r, 'docs', 'manifest_xml.txt'))
+  fd = open(os.path.join(r, 'docs', name))
   try:
     return fd.read()
   finally:
@@ -48,6 +48,8 @@
     help = ''
     if isinstance(self.manifest, XmlManifest):
       help += self._xmlHelp + '\n' + _doc('manifest_xml.txt')
+    if isinstance(self.manifest, SubmoduleManifest):
+      help += _doc('manifest_submodule.txt')
     return help
 
   def _Options(self, p):