Message ID | 20220826131047.51373-1-liezhi.yang@windriver.com |
---|---|
State | New |
Headers | show |
Series | [RFC,V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions | expand |
> -----Original Message----- > From: bitbake-devel@lists.openembedded.org <bitbake- > devel@lists.openembedded.org> On Behalf Of Robert Yang > Sent: den 26 augusti 2022 15:11 > To: bitbake-devel@lists.openembedded.org > Subject: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git > fetch to shallow clone revisions > > * V2 > Fixed typos in commit message Patch history should go after the --- below. > The "git clone --depth" only works for refs, doesn't support revisions, but > "git fetch --depth" supports revisions, so use it to do the shallow clone, the > idea is from "git clone --recurse-submodules --shallow-submodules". > > The workflow is (Only enabled when BB_GIT_SHALLOW = "1"): > $ git init --bare <clonedir> > $ git remote add origin <url> > $ git fetch origin --depth <depth> revision > $ git branch <branchname> FETCH_HEAD > $ git tag v<branchname> FETCH_HEAD > > Here is the testing data based on poky, the testing server has a very good > network bandwidth: > > Add 'BB_GIT_SHALLOW = "1"' conf/local.conf > $ rm -fr tmp downloads # Fresh download for each build > $ time bitbake world --runall=fetch > $ du -sh downloads/git2/ > > Full Shallow Saved > -------------------------------------- > Time: 15m59s 2m31s 84% (13m28s) > Size: 12G 1.2G 90% (10.8G) > > * The Size is for downloads/git2/, the tarballs are not counted. > > We can see that it saves a lot of download time and disk space, for > example: > > linux-yocto: 2.8G -> 228M > llvm: 2.5G -> 171M > cryptography: 1.5G -> 35M > > And "$ bitbake world" works well. > > This a RFC patch, please feel free to give you comments. > > Signed-off-by: Robert Yang <liezhi.yang@windriver.com> > --- > bitbake/lib/bb/fetch2/git.py | 83 ++++++++++++++++++++++++++++-------- > 1 file changed, 66 insertions(+), 17 deletions(-) > > diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py > index 4534bd75800..57bb61d5ee1 100644 > --- a/bitbake/lib/bb/fetch2/git.py > +++ b/bitbake/lib/bb/fetch2/git.py > @@ -244,6 +244,7 @@ class Git(FetchMethod): > ud.unresolvedrev[name] = 'HEAD' > > ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false -c core.pager=cat" > + ud.basecmd = "LANG=C %s" % ud.basecmd > > write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" > ud.write_tarballs = write_tarballs != "0" or ud.rebaseable > @@ -344,6 +345,49 @@ class Git(FetchMethod): > return False > return True > > + def shallow_clone_by_fetch(self, ud, repourl, d): > + """ > + Use "git fetch --depth <depth> revision" to implement shallow clone > + since git can't clone a revision, a better solution should be: > + "git fetch --depth <depth> revision:<branchname>" but it doesn't work > + when revision is a tag, e.g.: > + error: cannot update ref 'refs/heads/master': trying to write > + non-commit object <revision> to branch 'refs/heads/master' > + """ > + > + import datetime > + > + depth = ud.shallow_depths[ud.names[0]] > + revision = ud.revisions[ud.names[0]] > + branchname = ud.branches[ud.names[0]] > + if not branchname: > + branchname = "master" > + > + # Rename branchname if it exists which can: > + # - Avoid conflicts during update > + # - Keep the revision on a branch so that "git submodule update --recursive" > + # can work since it requires the revision on a branch. > + branch_path = os.path.join(ud.clonedir, 'refs/heads/%s' % branchname) > + if os.path.exists(branch_path): > + os.rename(branch_path, '%s.%s' % (branch_path, datetime.datetime.now().strftime("%Y%m%d%H%M%S"))) Any reason this is done using os.rename() rather than `git branch -m? > + > + init_cmd = "%s init --bare -q" % ud.basecmd > + add_remote_cmd = "%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl)) > + fetch_cmd = "%s fetch --progress origin --depth %s %s" % (ud.basecmd, depth, revision) > + # Create both branch and tag for the revision > + branch_cmd = "%s branch -f %s FETCH_HEAD" % (ud.basecmd, branchname) > + tag_cmd = "%s tag -f v%s FETCH_HEAD" % (ud.basecmd, branchname) Why not define these as a list instead: cmds = [ "%s init --bare -q" % ud.basecmd, "%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl)) "%s fetch --progress origin --depth %s %s" % (ud.basecmd, depth, revision), # Create both branch and tag for the revision "%s branch -f %s FETCH_HEAD" % (ud.basecmd, branchname), "%s tag -f v%s FETCH_HEAD" % (ud.basecmd, branchname), ] > + > + if ud.proto.lower() != 'file': > + bb.fetch2.check_network_access(d, fetch_cmd, ud.url) > + > + if not os.path.exists(ud.clonedir): > + bb.utils.mkdirhier(ud.clonedir) > + > + progresshandler = GitProgressHandler(d) > + for cmd in (init_cmd, add_remote_cmd, fetch_cmd, branch_cmd, tag_cmd): > + runfetchcmd(cmd, d, log=progresshandler, workdir=ud.clonedir) > + > def download(self, ud, d): > """Fetch url""" > > @@ -360,7 +404,7 @@ class Git(FetchMethod): > else: > tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) > runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir) > - fetch_cmd = "LANG=C %s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir)) > + fetch_cmd = "%s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir)) > runfetchcmd(fetch_cmd, d, workdir=ud.clonedir) > repourl = self._get_repo_url(ud) > > @@ -369,27 +413,32 @@ class Git(FetchMethod): > # We do this since git will use a "-l" option automatically for local urls where possible > if repourl.startswith("file://"): > repourl = repourl[7:] > - clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) > - if ud.proto.lower() != 'file': > - bb.fetch2.check_network_access(d, clone_cmd, ud.url) > - progresshandler = GitProgressHandler(d) > - runfetchcmd(clone_cmd, d, log=progresshandler) > + if ud.shallow: > + self.shallow_clone_by_fetch(ud, repourl, d) > + else: > + clone_cmd = "%s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) > + progresshandler = GitProgressHandler(d) > + if ud.proto.lower() != 'file': > + bb.fetch2.check_network_access(d, clone_cmd, ud.url) > + runfetchcmd(clone_cmd, d, log=progresshandler) > > # Update the checkout if needed > if self.clonedir_need_update(ud, d): > output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir) > if "origin" in output: > - runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) > - > - runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) > - fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) > - if ud.proto.lower() != 'file': > - bb.fetch2.check_network_access(d, fetch_cmd, ud.url) > - progresshandler = GitProgressHandler(d) > - runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir) > - runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir) > - runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir) > - runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir) > + runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) > + if ud.shallow: > + self.shallow_clone_by_fetch(ud, repourl, d) > + else: > + runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) > + fetch_cmd = "%s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) > + if ud.proto.lower() != 'file': > + bb.fetch2.check_network_access(d, fetch_cmd, ud.url) > + progresshandler = GitProgressHandler(d) > + runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir) > + runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir) > + runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir) > + runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir) > try: > os.unlink(ud.fullmirror) > except OSError as exc: > -- > 2.35.1 //Peter
Hi Peter, On 8/26/22 22:21, Peter Kjellerstedt wrote: >> -----Original Message----- >> From: bitbake-devel@lists.openembedded.org <bitbake- >> devel@lists.openembedded.org> On Behalf Of Robert Yang >> Sent: den 26 augusti 2022 15:11 >> To: bitbake-devel@lists.openembedded.org >> Subject: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git >> fetch to shallow clone revisions >> >> * V2 >> Fixed typos in commit message > > Patch history should go after the --- below. > >> The "git clone --depth" only works for refs, doesn't support revisions, but >> "git fetch --depth" supports revisions, so use it to do the shallow clone, the >> idea is from "git clone --recurse-submodules --shallow-submodules". >> >> The workflow is (Only enabled when BB_GIT_SHALLOW = "1"): >> $ git init --bare <clonedir> >> $ git remote add origin <url> >> $ git fetch origin --depth <depth> revision >> $ git branch <branchname> FETCH_HEAD >> $ git tag v<branchname> FETCH_HEAD >> >> Here is the testing data based on poky, the testing server has a very good >> network bandwidth: >> >> Add 'BB_GIT_SHALLOW = "1"' conf/local.conf >> $ rm -fr tmp downloads # Fresh download for each build >> $ time bitbake world --runall=fetch >> $ du -sh downloads/git2/ >> >> Full Shallow Saved >> -------------------------------------- >> Time: 15m59s 2m31s 84% (13m28s) >> Size: 12G 1.2G 90% (10.8G) >> >> * The Size is for downloads/git2/, the tarballs are not counted. >> >> We can see that it saves a lot of download time and disk space, for >> example: >> >> linux-yocto: 2.8G -> 228M >> llvm: 2.5G -> 171M >> cryptography: 1.5G -> 35M >> >> And "$ bitbake world" works well. >> >> This a RFC patch, please feel free to give you comments. >> >> Signed-off-by: Robert Yang <liezhi.yang@windriver.com> >> --- >> bitbake/lib/bb/fetch2/git.py | 83 ++++++++++++++++++++++++++++-------- >> 1 file changed, 66 insertions(+), 17 deletions(-) >> >> diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py >> index 4534bd75800..57bb61d5ee1 100644 >> --- a/bitbake/lib/bb/fetch2/git.py >> +++ b/bitbake/lib/bb/fetch2/git.py >> @@ -244,6 +244,7 @@ class Git(FetchMethod): >> ud.unresolvedrev[name] = 'HEAD' >> >> ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false -c core.pager=cat" >> + ud.basecmd = "LANG=C %s" % ud.basecmd >> >> write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" >> ud.write_tarballs = write_tarballs != "0" or ud.rebaseable >> @@ -344,6 +345,49 @@ class Git(FetchMethod): >> return False >> return True >> >> + def shallow_clone_by_fetch(self, ud, repourl, d): >> + """ >> + Use "git fetch --depth <depth> revision" to implement shallow clone >> + since git can't clone a revision, a better solution should be: >> + "git fetch --depth <depth> revision:<branchname>" but it doesn't work >> + when revision is a tag, e.g.: >> + error: cannot update ref 'refs/heads/master': trying to write >> + non-commit object <revision> to branch 'refs/heads/master' >> + """ >> + >> + import datetime >> + >> + depth = ud.shallow_depths[ud.names[0]] >> + revision = ud.revisions[ud.names[0]] >> + branchname = ud.branches[ud.names[0]] >> + if not branchname: >> + branchname = "master" >> + >> + # Rename branchname if it exists which can: >> + # - Avoid conflicts during update >> + # - Keep the revision on a branch so that "git submodule update --recursive" >> + # can work since it requires the revision on a branch. >> + branch_path = os.path.join(ud.clonedir, 'refs/heads/%s' % branchname) >> + if os.path.exists(branch_path): >> + os.rename(branch_path, '%s.%s' % (branch_path, datetime.datetime.now().strftime("%Y%m%d%H%M%S"))) > > Any reason this is done using os.rename() rather than `git branch -m? It is because this is simpler and to keep align with branch_path, otherwise, we need: - git branch --list to get the branch list and split them by '\n', remove the star. - Check branch in the list - git branch -m to rename the branch > >> + >> + init_cmd = "%s init --bare -q" % ud.basecmd >> + add_remote_cmd = "%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl)) >> + fetch_cmd = "%s fetch --progress origin --depth %s %s" % (ud.basecmd, depth, revision) >> + # Create both branch and tag for the revision >> + branch_cmd = "%s branch -f %s FETCH_HEAD" % (ud.basecmd, branchname) >> + tag_cmd = "%s tag -f v%s FETCH_HEAD" % (ud.basecmd, branchname) > > Why not define these as a list instead: > > cmds = [ > "%s init --bare -q" % ud.basecmd, > "%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl)) > "%s fetch --progress origin --depth %s %s" % (ud.basecmd, depth, revision), > # Create both branch and tag for the revision > "%s branch -f %s FETCH_HEAD" % (ud.basecmd, branchname), > "%s tag -f v%s FETCH_HEAD" % (ud.basecmd, branchname), > ] Thanks, I will update it with others' comments in the following days (If there are any). // Robert > >> + >> + if ud.proto.lower() != 'file': >> + bb.fetch2.check_network_access(d, fetch_cmd, ud.url) >> + >> + if not os.path.exists(ud.clonedir): >> + bb.utils.mkdirhier(ud.clonedir) >> + >> + progresshandler = GitProgressHandler(d) >> + for cmd in (init_cmd, add_remote_cmd, fetch_cmd, branch_cmd, tag_cmd): >> + runfetchcmd(cmd, d, log=progresshandler, workdir=ud.clonedir) >> + >> def download(self, ud, d): >> """Fetch url""" >> >> @@ -360,7 +404,7 @@ class Git(FetchMethod): >> else: >> tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) >> runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir) >> - fetch_cmd = "LANG=C %s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir)) >> + fetch_cmd = "%s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir)) >> runfetchcmd(fetch_cmd, d, workdir=ud.clonedir) >> repourl = self._get_repo_url(ud) >> >> @@ -369,27 +413,32 @@ class Git(FetchMethod): >> # We do this since git will use a "-l" option automatically for local urls where possible >> if repourl.startswith("file://"): >> repourl = repourl[7:] >> - clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) >> - if ud.proto.lower() != 'file': >> - bb.fetch2.check_network_access(d, clone_cmd, ud.url) >> - progresshandler = GitProgressHandler(d) >> - runfetchcmd(clone_cmd, d, log=progresshandler) >> + if ud.shallow: >> + self.shallow_clone_by_fetch(ud, repourl, d) >> + else: >> + clone_cmd = "%s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) >> + progresshandler = GitProgressHandler(d) >> + if ud.proto.lower() != 'file': >> + bb.fetch2.check_network_access(d, clone_cmd, ud.url) >> + runfetchcmd(clone_cmd, d, log=progresshandler) >> >> # Update the checkout if needed >> if self.clonedir_need_update(ud, d): >> output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir) >> if "origin" in output: >> - runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) >> - >> - runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) >> - fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) >> - if ud.proto.lower() != 'file': >> - bb.fetch2.check_network_access(d, fetch_cmd, ud.url) >> - progresshandler = GitProgressHandler(d) >> - runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir) >> - runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir) >> - runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir) >> - runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir) >> + runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) >> + if ud.shallow: >> + self.shallow_clone_by_fetch(ud, repourl, d) >> + else: >> + runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) >> + fetch_cmd = "%s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) >> + if ud.proto.lower() != 'file': >> + bb.fetch2.check_network_access(d, fetch_cmd, ud.url) >> + progresshandler = GitProgressHandler(d) >> + runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir) >> + runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir) >> + runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir) >> + runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir) >> try: >> os.unlink(ud.fullmirror) >> except OSError as exc: >> -- >> 2.35.1 > > //Peter >
> -----Original Message----- > From: Robert Yang <liezhi.yang@windriver.com> > Sent: den 27 augusti 2022 05:37 > To: Peter Kjellerstedt <peter.kjellerstedt@axis.com>; bitbake-devel@lists.openembedded.org > Subject: Re: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions > > Hi Peter, > > On 8/26/22 22:21, Peter Kjellerstedt wrote: > >> -----Original Message----- > >> From: bitbake-devel@lists.openembedded.org <bitbake-devel@lists.openembedded.org> On Behalf Of Robert Yang > >> Sent: den 26 augusti 2022 15:11 > >> To: bitbake-devel@lists.openembedded.org > >> Subject: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions > >> > >> * V2 > >> Fixed typos in commit message > > > > Patch history should go after the --- below. > > > >> The "git clone --depth" only works for refs, doesn't support revisions, but > >> "git fetch --depth" supports revisions, so use it to do the shallow clone, the > >> idea is from "git clone --recurse-submodules --shallow-submodules". > >> > >> The workflow is (Only enabled when BB_GIT_SHALLOW = "1"): > >> $ git init --bare <clonedir> > >> $ git remote add origin <url> > >> $ git fetch origin --depth <depth> revision > >> $ git branch <branchname> FETCH_HEAD > >> $ git tag v<branchname> FETCH_HEAD > >> > >> Here is the testing data based on poky, the testing server has a very good > >> network bandwidth: > >> > >> Add 'BB_GIT_SHALLOW = "1"' conf/local.conf > >> $ rm -fr tmp downloads # Fresh download for each build > >> $ time bitbake world --runall=fetch > >> $ du -sh downloads/git2/ > >> > >> Full Shallow Saved > >> -------------------------------------- > >> Time: 15m59s 2m31s 84% (13m28s) > >> Size: 12G 1.2G 90% (10.8G) > >> > >> * The Size is for downloads/git2/, the tarballs are not counted. > >> > >> We can see that it saves a lot of download time and disk space, for > >> example: > >> > >> linux-yocto: 2.8G -> 228M > >> llvm: 2.5G -> 171M > >> cryptography: 1.5G -> 35M > >> > >> And "$ bitbake world" works well. > >> > >> This a RFC patch, please feel free to give you comments. > >> > >> Signed-off-by: Robert Yang <liezhi.yang@windriver.com> > >> --- > >> bitbake/lib/bb/fetch2/git.py | 83 ++++++++++++++++++++++++++++-------- > >> 1 file changed, 66 insertions(+), 17 deletions(-) > >> > >> diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py > >> index 4534bd75800..57bb61d5ee1 100644 > >> --- a/bitbake/lib/bb/fetch2/git.py > >> +++ b/bitbake/lib/bb/fetch2/git.py > >> @@ -244,6 +244,7 @@ class Git(FetchMethod): > >> ud.unresolvedrev[name] = 'HEAD' > >> > >> ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false -c core.pager=cat" > >> + ud.basecmd = "LANG=C %s" % ud.basecmd > >> > >> write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" > >> ud.write_tarballs = write_tarballs != "0" or ud.rebaseable > >> @@ -344,6 +345,49 @@ class Git(FetchMethod): > >> return False > >> return True > >> > >> + def shallow_clone_by_fetch(self, ud, repourl, d): > >> + """ > >> + Use "git fetch --depth <depth> revision" to implement shallow clone > >> + since git can't clone a revision, a better solution should be: > >> + "git fetch --depth <depth> revision:<branchname>" but it doesn't work > >> + when revision is a tag, e.g.: > >> + error: cannot update ref 'refs/heads/master': trying to write > >> + non-commit object <revision> to branch 'refs/heads/master' > >> + """ > >> + > >> + import datetime > >> + > >> + depth = ud.shallow_depths[ud.names[0]] > >> + revision = ud.revisions[ud.names[0]] > >> + branchname = ud.branches[ud.names[0]] > >> + if not branchname: > >> + branchname = "master" > >> + > >> + # Rename branchname if it exists which can: > >> + # - Avoid conflicts during update > >> + # - Keep the revision on a branch so that "git submodule update --recursive" > >> + # can work since it requires the revision on a branch. > >> + branch_path = os.path.join(ud.clonedir, 'refs/heads/%s' % branchname) > >> + if os.path.exists(branch_path): > >> + os.rename(branch_path, '%s.%s' % (branch_path, datetime.datetime.now().strftime("%Y%m%d%H%M%S"))) > > > > Any reason this is done using os.rename() rather than `git branch -m? > > It is because this is simpler and to keep align with branch_path, otherwise, we > need: > - git branch --list to get the branch list and split them by '\n', remove the star. > - Check branch in the list > - git branch -m to rename the branch If you accept that the command can fail, then you do not need to list the branches. Just do the rename. If the branch exists, then the rename will succeed, otherwise it will fail, but that is expected and ignored. What I do not like about the use of os.rename() here is that it uses internal knowledge of how Git stores its data. //Peter
On 8/29/22 6:46 PM, Peter Kjellerstedt wrote: >> -----Original Message----- >> From: Robert Yang <liezhi.yang@windriver.com> >> Sent: den 27 augusti 2022 05:37 >> To: Peter Kjellerstedt <peter.kjellerstedt@axis.com>; bitbake-devel@lists.openembedded.org >> Subject: Re: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions >> >> Hi Peter, >> >> On 8/26/22 22:21, Peter Kjellerstedt wrote: >>>> -----Original Message----- >>>> From: bitbake-devel@lists.openembedded.org <bitbake-devel@lists.openembedded.org> On Behalf Of Robert Yang >>>> Sent: den 26 augusti 2022 15:11 >>>> To: bitbake-devel@lists.openembedded.org >>>> Subject: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions >>>> >>>> * V2 >>>> Fixed typos in commit message >>> >>> Patch history should go after the --- below. >>> >>>> The "git clone --depth" only works for refs, doesn't support revisions, but >>>> "git fetch --depth" supports revisions, so use it to do the shallow clone, the >>>> idea is from "git clone --recurse-submodules --shallow-submodules". >>>> >>>> The workflow is (Only enabled when BB_GIT_SHALLOW = "1"): >>>> $ git init --bare <clonedir> >>>> $ git remote add origin <url> >>>> $ git fetch origin --depth <depth> revision >>>> $ git branch <branchname> FETCH_HEAD >>>> $ git tag v<branchname> FETCH_HEAD >>>> >>>> Here is the testing data based on poky, the testing server has a very good >>>> network bandwidth: >>>> >>>> Add 'BB_GIT_SHALLOW = "1"' conf/local.conf >>>> $ rm -fr tmp downloads # Fresh download for each build >>>> $ time bitbake world --runall=fetch >>>> $ du -sh downloads/git2/ >>>> >>>> Full Shallow Saved >>>> -------------------------------------- >>>> Time: 15m59s 2m31s 84% (13m28s) >>>> Size: 12G 1.2G 90% (10.8G) >>>> >>>> * The Size is for downloads/git2/, the tarballs are not counted. >>>> >>>> We can see that it saves a lot of download time and disk space, for >>>> example: >>>> >>>> linux-yocto: 2.8G -> 228M >>>> llvm: 2.5G -> 171M >>>> cryptography: 1.5G -> 35M >>>> >>>> And "$ bitbake world" works well. >>>> >>>> This a RFC patch, please feel free to give you comments. >>>> >>>> Signed-off-by: Robert Yang <liezhi.yang@windriver.com> >>>> --- >>>> bitbake/lib/bb/fetch2/git.py | 83 ++++++++++++++++++++++++++++-------- >>>> 1 file changed, 66 insertions(+), 17 deletions(-) >>>> >>>> diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py >>>> index 4534bd75800..57bb61d5ee1 100644 >>>> --- a/bitbake/lib/bb/fetch2/git.py >>>> +++ b/bitbake/lib/bb/fetch2/git.py >>>> @@ -244,6 +244,7 @@ class Git(FetchMethod): >>>> ud.unresolvedrev[name] = 'HEAD' >>>> >>>> ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false -c core.pager=cat" >>>> + ud.basecmd = "LANG=C %s" % ud.basecmd >>>> >>>> write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" >>>> ud.write_tarballs = write_tarballs != "0" or ud.rebaseable >>>> @@ -344,6 +345,49 @@ class Git(FetchMethod): >>>> return False >>>> return True >>>> >>>> + def shallow_clone_by_fetch(self, ud, repourl, d): >>>> + """ >>>> + Use "git fetch --depth <depth> revision" to implement shallow clone >>>> + since git can't clone a revision, a better solution should be: >>>> + "git fetch --depth <depth> revision:<branchname>" but it doesn't work >>>> + when revision is a tag, e.g.: >>>> + error: cannot update ref 'refs/heads/master': trying to write >>>> + non-commit object <revision> to branch 'refs/heads/master' >>>> + """ >>>> + >>>> + import datetime >>>> + >>>> + depth = ud.shallow_depths[ud.names[0]] >>>> + revision = ud.revisions[ud.names[0]] >>>> + branchname = ud.branches[ud.names[0]] >>>> + if not branchname: >>>> + branchname = "master" >>>> + >>>> + # Rename branchname if it exists which can: >>>> + # - Avoid conflicts during update >>>> + # - Keep the revision on a branch so that "git submodule update --recursive" >>>> + # can work since it requires the revision on a branch. >>>> + branch_path = os.path.join(ud.clonedir, 'refs/heads/%s' % branchname) >>>> + if os.path.exists(branch_path): >>>> + os.rename(branch_path, '%s.%s' % (branch_path, datetime.datetime.now().strftime("%Y%m%d%H%M%S"))) >>> >>> Any reason this is done using os.rename() rather than `git branch -m? >> >> It is because this is simpler and to keep align with branch_path, otherwise, we >> need: >> - git branch --list to get the branch list and split them by '\n', remove the star. >> - Check branch in the list >> - git branch -m to rename the branch > > If you accept that the command can fail, then you do not need to list > the branches. Just do the rename. If the branch exists, then the > rename will succeed, otherwise it will fail, but that is expected and > ignored. > > What I do not like about the use of os.rename() here is that it uses > internal knowledge of how Git stores its data. Thanks, I will update it. // Robert > > //Peter >
diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py index 4534bd75800..57bb61d5ee1 100644 --- a/bitbake/lib/bb/fetch2/git.py +++ b/bitbake/lib/bb/fetch2/git.py @@ -244,6 +244,7 @@ class Git(FetchMethod): ud.unresolvedrev[name] = 'HEAD' ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false -c core.pager=cat" + ud.basecmd = "LANG=C %s" % ud.basecmd write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" ud.write_tarballs = write_tarballs != "0" or ud.rebaseable @@ -344,6 +345,49 @@ class Git(FetchMethod): return False return True + def shallow_clone_by_fetch(self, ud, repourl, d): + """ + Use "git fetch --depth <depth> revision" to implement shallow clone + since git can't clone a revision, a better solution should be: + "git fetch --depth <depth> revision:<branchname>" but it doesn't work + when revision is a tag, e.g.: + error: cannot update ref 'refs/heads/master': trying to write + non-commit object <revision> to branch 'refs/heads/master' + """ + + import datetime + + depth = ud.shallow_depths[ud.names[0]] + revision = ud.revisions[ud.names[0]] + branchname = ud.branches[ud.names[0]] + if not branchname: + branchname = "master" + + # Rename branchname if it exists which can: + # - Avoid conflicts during update + # - Keep the revision on a branch so that "git submodule update --recursive" + # can work since it requires the revision on a branch. + branch_path = os.path.join(ud.clonedir, 'refs/heads/%s' % branchname) + if os.path.exists(branch_path): + os.rename(branch_path, '%s.%s' % (branch_path, datetime.datetime.now().strftime("%Y%m%d%H%M%S"))) + + init_cmd = "%s init --bare -q" % ud.basecmd + add_remote_cmd = "%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl)) + fetch_cmd = "%s fetch --progress origin --depth %s %s" % (ud.basecmd, depth, revision) + # Create both branch and tag for the revision + branch_cmd = "%s branch -f %s FETCH_HEAD" % (ud.basecmd, branchname) + tag_cmd = "%s tag -f v%s FETCH_HEAD" % (ud.basecmd, branchname) + + if ud.proto.lower() != 'file': + bb.fetch2.check_network_access(d, fetch_cmd, ud.url) + + if not os.path.exists(ud.clonedir): + bb.utils.mkdirhier(ud.clonedir) + + progresshandler = GitProgressHandler(d) + for cmd in (init_cmd, add_remote_cmd, fetch_cmd, branch_cmd, tag_cmd): + runfetchcmd(cmd, d, log=progresshandler, workdir=ud.clonedir) + def download(self, ud, d): """Fetch url""" @@ -360,7 +404,7 @@ class Git(FetchMethod): else: tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir) - fetch_cmd = "LANG=C %s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir)) + fetch_cmd = "%s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir)) runfetchcmd(fetch_cmd, d, workdir=ud.clonedir) repourl = self._get_repo_url(ud) @@ -369,27 +413,32 @@ class Git(FetchMethod): # We do this since git will use a "-l" option automatically for local urls where possible if repourl.startswith("file://"): repourl = repourl[7:] - clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) - if ud.proto.lower() != 'file': - bb.fetch2.check_network_access(d, clone_cmd, ud.url) - progresshandler = GitProgressHandler(d) - runfetchcmd(clone_cmd, d, log=progresshandler) + if ud.shallow: + self.shallow_clone_by_fetch(ud, repourl, d) + else: + clone_cmd = "%s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) + progresshandler = GitProgressHandler(d) + if ud.proto.lower() != 'file': + bb.fetch2.check_network_access(d, clone_cmd, ud.url) + runfetchcmd(clone_cmd, d, log=progresshandler) # Update the checkout if needed if self.clonedir_need_update(ud, d): output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir) if "origin" in output: - runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) - - runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) - fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) - if ud.proto.lower() != 'file': - bb.fetch2.check_network_access(d, fetch_cmd, ud.url) - progresshandler = GitProgressHandler(d) - runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir) - runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir) - runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir) - runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir) + runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) + if ud.shallow: + self.shallow_clone_by_fetch(ud, repourl, d) + else: + runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) + fetch_cmd = "%s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) + if ud.proto.lower() != 'file': + bb.fetch2.check_network_access(d, fetch_cmd, ud.url) + progresshandler = GitProgressHandler(d) + runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir) + runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir) + runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir) + runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir) try: os.unlink(ud.fullmirror) except OSError as exc:
* V2 Fixed typos in commit message The "git clone --depth" only works for refs, doesn't support revisions, but "git fetch --depth" supports revisions, so use it to do the shallow clone, the idea is from "git clone --recurse-submodules --shallow-submodules". The workflow is (Only enabled when BB_GIT_SHALLOW = "1"): $ git init --bare <clonedir> $ git remote add origin <url> $ git fetch origin --depth <depth> revision $ git branch <branchname> FETCH_HEAD $ git tag v<branchname> FETCH_HEAD Here is the testing data based on poky, the testing server has a very good network bandwidth: Add 'BB_GIT_SHALLOW = "1"' conf/local.conf $ rm -fr tmp downloads # Fresh download for each build $ time bitbake world --runall=fetch $ du -sh downloads/git2/ Full Shallow Saved -------------------------------------- Time: 15m59s 2m31s 84% (13m28s) Size: 12G 1.2G 90% (10.8G) * The Size is for downloads/git2/, the tarballs are not counted. We can see that it saves a lot of download time and disk space, for example: linux-yocto: 2.8G -> 228M llvm: 2.5G -> 171M cryptography: 1.5G -> 35M And "$ bitbake world" works well. This a RFC patch, please feel free to give you comments. Signed-off-by: Robert Yang <liezhi.yang@windriver.com> --- bitbake/lib/bb/fetch2/git.py | 83 ++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 17 deletions(-)