Merge "Change PyDev project version to "python 2.6""
diff --git a/command.py b/command.py
index 5a5f468..51c0cb4 100644
--- a/command.py
+++ b/command.py
@@ -60,6 +60,32 @@
     """
     raise NotImplementedError
 
+  def _ResetPathToProjectMap(self, projects):
+    self._by_path = dict((p.worktree, p) for p in projects)
+
+  def _UpdatePathToProjectMap(self, project):
+    self._by_path[project.worktree] = project
+
+  def _GetProjectByPath(self, path):
+    project = None
+    if os.path.exists(path):
+      oldpath = None
+      while path \
+        and path != oldpath \
+        and path != self.manifest.topdir:
+        try:
+          project = self._by_path[path]
+          break
+        except KeyError:
+          oldpath = path
+          path = os.path.dirname(path)
+    else:
+      try:
+        project = self._by_path[path]
+      except KeyError:
+        pass
+    return project
+
   def GetProjects(self, args, missing_ok=False):
     """A list of projects that match the arguments.
     """
@@ -74,40 +100,38 @@
     groups = [x for x in re.split('[,\s]+', groups) if x]
 
     if not args:
-      for project in all_projects.values():
+      all_projects_list = all_projects.values()
+      derived_projects = []
+      for project in all_projects_list:
+        if project.Registered:
+          # Do not search registered subproject for derived projects
+          # since its parent has been searched already
+          continue
+        derived_projects.extend(project.GetDerivedSubprojects())
+      all_projects_list.extend(derived_projects)
+      for project in all_projects_list:
         if ((missing_ok or project.Exists) and
             project.MatchesGroups(groups)):
           result.append(project)
     else:
-      by_path = None
+      self._ResetPathToProjectMap(all_projects.values())
 
       for arg in args:
         project = all_projects.get(arg)
 
         if not project:
           path = os.path.abspath(arg).replace('\\', '/')
+          project = self._GetProjectByPath(path)
 
-          if not by_path:
-            by_path = dict()
-            for p in all_projects.values():
-              by_path[p.worktree] = p
-
-          if os.path.exists(path):
-            oldpath = None
-            while path \
-              and path != oldpath \
-              and path != self.manifest.topdir:
-              try:
-                project = by_path[path]
-                break
-              except KeyError:
-                oldpath = path
-                path = os.path.dirname(path)
-          else:
-            try:
-              project = by_path[path]
-            except KeyError:
-              pass
+          # If it's not a derived project, update path->project mapping and
+          # search again, as arg might actually point to a derived subproject.
+          if project and not project.Derived:
+            search_again = False
+            for subproject in project.GetDerivedSubprojects():
+              self._UpdatePathToProjectMap(subproject)
+              search_again = True
+            if search_again:
+              project = self._GetProjectByPath(path) or project
 
         if not project:
           raise NoSuchProjectError(arg)
diff --git a/docs/manifest-format.txt b/docs/manifest-format.txt
index f499868..a36af67 100644
--- a/docs/manifest-format.txt
+++ b/docs/manifest-format.txt
@@ -45,7 +45,8 @@
     <!ELEMENT manifest-server (EMPTY)>
     <!ATTLIST url              CDATA #REQUIRED>
   
-    <!ELEMENT project (annotation?)>
+    <!ELEMENT project (annotation?,
+                       project*)>
     <!ATTLIST project name     CDATA #REQUIRED>
     <!ATTLIST project path     CDATA #IMPLIED>
     <!ATTLIST project remote   IDREF #IMPLIED>
@@ -152,7 +153,10 @@
 
 One or more project elements may be specified.  Each element
 describes a single Git repository to be cloned into the repo
-client workspace.
+client workspace.  You may specify Git-submodules by creating a
+nested project.  Git-submodules will be automatically
+recognized and inherit their parent's attributes, but those
+may be overridden by an explicitly specified project element.
 
 Attribute `name`: A unique name for this project.  The project's
 name is appended onto its remote's fetch URL to generate the actual
@@ -163,7 +167,8 @@
 where ${remote_fetch} is the remote's fetch attribute and
 ${project_name} is the project's name attribute.  The suffix ".git"
 is always appended as repo assumes the upstream is a forest of
-bare Git repositories.
+bare Git repositories.  If the project has a parent element, its
+name will be prefixed by the parent's.
 
 The project name must match the name Gerrit knows, if Gerrit is
 being used for code reviews.
@@ -171,6 +176,8 @@
 Attribute `path`: An optional path relative to the top directory
 of the repo client where the Git working directory for this project
 should be placed.  If not supplied the project name is used.
+If the project has a parent element, its path will be prefixed
+by the parent's.
 
 Attribute `remote`: Name of a previously defined remote element.
 If not supplied the remote given by the default element is used.
@@ -190,6 +197,8 @@
 definition is implicitly in the following manifest groups:
 default, name:monkeys, and path:barrel-of.  If you place a project in the
 group "notdefault", it will not be automatically downloaded by repo.
+If the project has a parent element, the `name` and `path` here
+are the prefixed ones.
 
 Element annotation
 ------------------
diff --git a/manifest_xml.py b/manifest_xml.py
index 04cabaa..a2a56e9 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -180,20 +180,25 @@
       root.appendChild(e)
       root.appendChild(doc.createTextNode(''))
 
-    sort_projects = list(self.projects.keys())
-    sort_projects.sort()
+    def output_projects(parent, parent_node, projects):
+      for p in projects:
+        output_project(parent, parent_node, self.projects[p])
 
-    for p in sort_projects:
-      p = self.projects[p]
-
+    def output_project(parent, parent_node, p):
       if not p.MatchesGroups(groups):
-        continue
+        return
+
+      name = p.name
+      relpath = p.relpath
+      if parent:
+        name = self._UnjoinName(parent.name, name)
+        relpath = self._UnjoinRelpath(parent.relpath, relpath)
 
       e = doc.createElement('project')
-      root.appendChild(e)
-      e.setAttribute('name', p.name)
-      if p.relpath != p.name:
-        e.setAttribute('path', p.relpath)
+      parent_node.appendChild(e)
+      e.setAttribute('name', name)
+      if relpath != name:
+        e.setAttribute('path', relpath)
       if not d.remote or p.remote.name != d.remote.name:
         e.setAttribute('remote', p.remote.name)
       if peg_rev:
@@ -231,6 +236,16 @@
       if p.sync_c:
         e.setAttribute('sync-c', 'true')
 
+      if p.subprojects:
+        sort_projects = [subp.name for subp in p.subprojects]
+        sort_projects.sort()
+        output_projects(p, e, sort_projects)
+
+    sort_projects = [key for key in self.projects.keys()
+                     if not self.projects[key].parent]
+    sort_projects.sort()
+    output_projects(None, root, sort_projects)
+
     if self._repo_hooks_project:
       root.appendChild(doc.createTextNode(''))
       e = doc.createElement('repo-hooks')
@@ -383,11 +398,15 @@
     for node in itertools.chain(*node_list):
       if node.nodeName == 'project':
         project = self._ParseProject(node)
-        if self._projects.get(project.name):
-          raise ManifestParseError(
-              'duplicate project %s in %s' %
-              (project.name, self.manifestFile))
-        self._projects[project.name] = project
+        def recursively_add_projects(project):
+          if self._projects.get(project.name):
+            raise ManifestParseError(
+                'duplicate project %s in %s' %
+                (project.name, self.manifestFile))
+          self._projects[project.name] = project
+          for subproject in project.subprojects:
+            recursively_add_projects(subproject)
+        recursively_add_projects(project)
       if node.nodeName == 'repo-hooks':
         # Get the name of the project and the (space-separated) list of enabled.
         repo_hooks_project = self._reqatt(node, 'in-project')
@@ -537,11 +556,19 @@
 
     return '\n'.join(cleanLines)
 
-  def _ParseProject(self, node):
+  def _JoinName(self, parent_name, name):
+    return os.path.join(parent_name, name)
+
+  def _UnjoinName(self, parent_name, name):
+    return os.path.relpath(name, parent_name)
+
+  def _ParseProject(self, node, parent = None):
     """
     reads a <project> element from the manifest file
     """
     name = self._reqatt(node, 'name')
+    if parent:
+      name = self._JoinName(parent.name, name)
 
     remote = self._get_remote(node)
     if remote is None:
@@ -586,37 +613,66 @@
       groups = node.getAttribute('groups')
     groups = [x for x in re.split('[,\s]+', groups) if x]
 
-    default_groups = ['all', 'name:%s' % name, 'path:%s' % path]
-    groups.extend(set(default_groups).difference(groups))
-
-    if self.IsMirror:
-      worktree = None
-      gitdir = os.path.join(self.topdir, '%s.git' % name)
+    if parent is None:
+      relpath, worktree, gitdir = self.GetProjectPaths(name, path)
     else:
-      worktree = os.path.join(self.topdir, path).replace('\\', '/')
-      gitdir = os.path.join(self.repodir, 'projects/%s.git' % path)
+      relpath, worktree, gitdir = self.GetSubprojectPaths(parent, path)
+
+    default_groups = ['all', 'name:%s' % name, 'path:%s' % relpath]
+    groups.extend(set(default_groups).difference(groups))
 
     project = Project(manifest = self,
                       name = name,
                       remote = remote.ToRemoteSpec(name),
                       gitdir = gitdir,
                       worktree = worktree,
-                      relpath = path,
+                      relpath = relpath,
                       revisionExpr = revisionExpr,
                       revisionId = None,
                       rebase = rebase,
                       groups = groups,
                       sync_c = sync_c,
-                      upstream = upstream)
+                      upstream = upstream,
+                      parent = parent)
 
     for n in node.childNodes:
       if n.nodeName == 'copyfile':
         self._ParseCopyFile(project, n)
       if n.nodeName == 'annotation':
         self._ParseAnnotation(project, n)
+      if n.nodeName == 'project':
+        project.subprojects.append(self._ParseProject(n, parent = project))
 
     return project
 
+  def GetProjectPaths(self, name, path):
+    relpath = path
+    if self.IsMirror:
+      worktree = None
+      gitdir = os.path.join(self.topdir, '%s.git' % name)
+    else:
+      worktree = os.path.join(self.topdir, path).replace('\\', '/')
+      gitdir = os.path.join(self.repodir, 'projects', '%s.git' % path)
+    return relpath, worktree, gitdir
+
+  def GetSubprojectName(self, parent, submodule_path):
+    return os.path.join(parent.name, submodule_path)
+
+  def _JoinRelpath(self, parent_relpath, relpath):
+    return os.path.join(parent_relpath, relpath)
+
+  def _UnjoinRelpath(self, parent_relpath, relpath):
+    return os.path.relpath(relpath, parent_relpath)
+
+  def GetSubprojectPaths(self, parent, path):
+    relpath = self._JoinRelpath(parent.relpath, path)
+    gitdir = os.path.join(parent.gitdir, 'subprojects', '%s.git' % path)
+    if self.IsMirror:
+      worktree = None
+    else:
+      worktree = os.path.join(parent.worktree, path).replace('\\', '/')
+    return relpath, worktree, gitdir
+
   def _ParseCopyFile(self, project, node):
     src = self._reqatt(node, 'src')
     dest = self._reqatt(node, 'dest')
diff --git a/project.py b/project.py
index 472b1d3..2989d38 100644
--- a/project.py
+++ b/project.py
@@ -22,6 +22,7 @@
 import stat
 import subprocess
 import sys
+import tempfile
 import time
 
 from color import Coloring
@@ -484,7 +485,28 @@
                rebase = True,
                groups = None,
                sync_c = False,
-               upstream = None):
+               upstream = None,
+               parent = None,
+               is_derived = False):
+    """Init a Project object.
+
+    Args:
+      manifest: The XmlManifest object.
+      name: The `name` attribute of manifest.xml's project element.
+      remote: RemoteSpec object specifying its remote's properties.
+      gitdir: Absolute path of git directory.
+      worktree: Absolute path of git working tree.
+      relpath: Relative path of git working tree to repo's top directory.
+      revisionExpr: The `revision` attribute of manifest.xml's project element.
+      revisionId: git commit id for checking out.
+      rebase: The `rebase` attribute of manifest.xml's project element.
+      groups: The `groups` attribute of manifest.xml's project element.
+      sync_c: The `sync-c` attribute of manifest.xml's project element.
+      upstream: The `upstream` attribute of manifest.xml's project element.
+      parent: The parent Project object.
+      is_derived: False if the project was explicitly defined in the manifest;
+                  True if the project is a discovered submodule.
+    """
     self.manifest = manifest
     self.name = name
     self.remote = remote
@@ -507,6 +529,9 @@
     self.groups = groups
     self.sync_c = sync_c
     self.upstream = upstream
+    self.parent = parent
+    self.is_derived = is_derived
+    self.subprojects = []
 
     self.snapshots = {}
     self.copyfiles = []
@@ -527,6 +552,14 @@
     self.enabled_repo_hooks = []
 
   @property
+  def Registered(self):
+    return self.parent and not self.is_derived
+
+  @property
+  def Derived(self):
+    return self.is_derived
+
+  @property
   def Exists(self):
     return os.path.isdir(self.gitdir)
 
@@ -1370,6 +1403,151 @@
     return kept
 
 
+## Submodule Management ##
+
+  def GetRegisteredSubprojects(self):
+    result = []
+    def rec(subprojects):
+      if not subprojects:
+        return
+      result.extend(subprojects)
+      for p in subprojects:
+        rec(p.subprojects)
+    rec(self.subprojects)
+    return result
+
+  def _GetSubmodules(self):
+    # Unfortunately we cannot call `git submodule status --recursive` here
+    # because the working tree might not exist yet, and it cannot be used
+    # without a working tree in its current implementation.
+
+    def get_submodules(gitdir, rev, path):
+      # Parse .gitmodules for submodule sub_paths and sub_urls
+      sub_paths, sub_urls = parse_gitmodules(gitdir, rev)
+      if not sub_paths:
+        return []
+      # Run `git ls-tree` to read SHAs of submodule object, which happen to be
+      # revision of submodule repository
+      sub_revs = git_ls_tree(gitdir, rev, sub_paths)
+      submodules = []
+      for sub_path, sub_url in zip(sub_paths, sub_urls):
+        try:
+          sub_rev = sub_revs[sub_path]
+        except KeyError:
+          # Ignore non-exist submodules
+          continue
+        sub_gitdir = self.manifest.GetSubprojectPaths(self, sub_path)[2]
+        submodules.append((sub_rev, sub_path, sub_url))
+      return submodules
+
+    re_path = re.compile(r'submodule.(\w+).path')
+    re_url = re.compile(r'submodule.(\w+).url')
+    def parse_gitmodules(gitdir, rev):
+      cmd = ['cat-file', 'blob', '%s:.gitmodules' % rev]
+      try:
+        p = GitCommand(None, cmd, capture_stdout = True, capture_stderr = True,
+                       bare = True, gitdir = gitdir)
+      except GitError as e:
+        return [], []
+      if p.Wait() != 0:
+        return [], []
+
+      gitmodules_lines = []
+      fd, temp_gitmodules_path = tempfile.mkstemp()
+      try:
+        os.write(fd, p.stdout)
+        os.close(fd)
+        cmd = ['config', '--file', temp_gitmodules_path, '--list']
+        p = GitCommand(None, cmd, capture_stdout = True, capture_stderr = True,
+                       bare = True, gitdir = gitdir)
+        if p.Wait() != 0:
+          return [], []
+        gitmodules_lines = p.stdout.split('\n')
+      except GitError as e:
+        return [], []
+      finally:
+        os.remove(temp_gitmodules_path)
+
+      names = set()
+      paths = {}
+      urls = {}
+      for line in gitmodules_lines:
+        if not line:
+          continue
+        key, value = line.split('=')
+        m = re_path.match(key)
+        if m:
+          names.add(m.group(1))
+          paths[m.group(1)] = value
+          continue
+        m = re_url.match(key)
+        if m:
+          names.add(m.group(1))
+          urls[m.group(1)] = value
+          continue
+      names = sorted(names)
+      return [paths[name] for name in names], [urls[name] for name in names]
+
+    def git_ls_tree(gitdir, rev, paths):
+      cmd = ['ls-tree', rev, '--']
+      cmd.extend(paths)
+      try:
+        p = GitCommand(None, cmd, capture_stdout = True, capture_stderr = True,
+                       bare = True, gitdir = gitdir)
+      except GitError:
+        return []
+      if p.Wait() != 0:
+        return []
+      objects = {}
+      for line in p.stdout.split('\n'):
+        if not line.strip():
+          continue
+        object_rev, object_path = line.split()[2:4]
+        objects[object_path] = object_rev
+      return objects
+
+    try:
+      rev = self.GetRevisionId()
+    except GitError:
+      return []
+    return get_submodules(self.gitdir, rev, '')
+
+  def GetDerivedSubprojects(self):
+    result = []
+    if not self.Exists:
+      # If git repo does not exist yet, querying its submodules will
+      # mess up its states; so return here.
+      return result
+    for rev, path, url in self._GetSubmodules():
+      name = self.manifest.GetSubprojectName(self, path)
+      project = self.manifest.projects.get(name)
+      if project and project.Registered:
+        # If it has been registered, skip it because we are searching
+        # derived subprojects, but search for its derived subprojects.
+        result.extend(project.GetDerivedSubprojects())
+        continue
+      relpath, worktree, gitdir = self.manifest.GetSubprojectPaths(self, path)
+      remote = RemoteSpec(self.remote.name,
+                          url = url,
+                          review = self.remote.review)
+      subproject = Project(manifest = self.manifest,
+                           name = name,
+                           remote = remote,
+                           gitdir = gitdir,
+                           worktree = worktree,
+                           relpath = relpath,
+                           revisionExpr = self.revisionExpr,
+                           revisionId = rev,
+                           rebase = self.rebase,
+                           groups = self.groups,
+                           sync_c = self.sync_c,
+                           parent = self,
+                           is_derived = True)
+      result.append(subproject)
+      result.extend(subproject.GetDerivedSubprojects())
+    return result
+
+
 ## Direct Git Commands ##
 
   def _RemoteFetch(self, name=None,
diff --git a/subcmds/sync.py b/subcmds/sync.py
index e68a025..90e2908 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -503,12 +503,31 @@
         to_fetch.append(rp)
       to_fetch.extend(all_projects)
 
-      self._Fetch(to_fetch, opt)
+      fetched = self._Fetch(to_fetch, opt)
       _PostRepoFetch(rp, opt.no_repo_verify)
       if opt.network_only:
         # bail out now; the rest touches the working tree
         return
 
+      # Iteratively fetch missing and/or nested unregistered submodules
+      previously_missing_set = set()
+      while True:
+        self.manifest._Unload()
+        all = self.GetProjects(args, missing_ok=True)
+        missing = []
+        for project in all:
+          if project.gitdir not in fetched:
+            missing.append(project)
+        if not missing:
+          break
+        # Stop us from non-stopped fetching actually-missing repos: If set of
+        # missing repos has not been changed from last fetch, we break.
+        missing_set = set(p.name for p in missing)
+        if previously_missing_set == missing_set:
+          break
+        previously_missing_set = missing_set
+        fetched.update(self._Fetch(missing, opt))
+
     if self.manifest.IsMirror:
       # bail out now, we have no working tree
       return