Fix slow patch checking with commits that add or remove many files (#31548)

Running git update-index for every individual file is slow, so add and
remove everything with a single git command.

When such a big commit lands in the default branch, it could cause PR
creation and patch checking for all open PRs to be slow, or time out
entirely. For example, a commit that removes 1383 files was measured to
take more than 60 seconds and timed out. With this change checking took
about a second.

This is related to #27967, though this will not help with commits that
change many lines in few files.

(cherry picked from commit b88e5fc72d99e9d4a0aa9c13f70e0a9e967fe057)
This commit is contained in:
Brecht Van Lommel 2024-07-04 20:57:11 +02:00 committed by Earl Warren
parent f92591b825
commit 33f9fb8150
No known key found for this signature in database
GPG key ID: 0579CB2928A78A00
2 changed files with 48 additions and 16 deletions

View file

@ -104,11 +104,8 @@ func (repo *Repository) RemoveFilesFromIndex(filenames ...string) error {
buffer := new(bytes.Buffer) buffer := new(bytes.Buffer)
for _, file := range filenames { for _, file := range filenames {
if file != "" { if file != "" {
buffer.WriteString("0 ") // using format: mode SP type SP sha1 TAB path
buffer.WriteString(objectFormat.EmptyObjectID().String()) buffer.WriteString("0 blob " + objectFormat.EmptyObjectID().String() + "\t" + file + "\000")
buffer.WriteByte('\t')
buffer.WriteString(file)
buffer.WriteByte('\000')
} }
} }
return cmd.Run(&RunOpts{ return cmd.Run(&RunOpts{
@ -119,11 +116,33 @@ func (repo *Repository) RemoveFilesFromIndex(filenames ...string) error {
}) })
} }
type IndexObjectInfo struct {
Mode string
Object ObjectID
Filename string
}
// AddObjectsToIndex adds the provided object hashes to the index at the provided filenames
func (repo *Repository) AddObjectsToIndex(objects ...IndexObjectInfo) error {
cmd := NewCommand(repo.Ctx, "update-index", "--add", "--replace", "-z", "--index-info")
stdout := new(bytes.Buffer)
stderr := new(bytes.Buffer)
buffer := new(bytes.Buffer)
for _, object := range objects {
// using format: mode SP type SP sha1 TAB path
buffer.WriteString(object.Mode + " blob " + object.Object.String() + "\t" + object.Filename + "\000")
}
return cmd.Run(&RunOpts{
Dir: repo.Path,
Stdin: bytes.NewReader(buffer.Bytes()),
Stdout: stdout,
Stderr: stderr,
})
}
// AddObjectToIndex adds the provided object hash to the index at the provided filename // AddObjectToIndex adds the provided object hash to the index at the provided filename
func (repo *Repository) AddObjectToIndex(mode string, object ObjectID, filename string) error { func (repo *Repository) AddObjectToIndex(mode string, object ObjectID, filename string) error {
cmd := NewCommand(repo.Ctx, "update-index", "--add", "--replace", "--cacheinfo").AddDynamicArguments(mode, object.String(), filename) return repo.AddObjectsToIndex(IndexObjectInfo{Mode: mode, Object: object, Filename: filename})
_, _, err := cmd.RunStdString(&RunOpts{Dir: repo.Path})
return err
} }
// WriteTree writes the current index as a tree to the object db and returns its hash // WriteTree writes the current index as a tree to the object db and returns its hash

View file

@ -128,7 +128,7 @@ func (e *errMergeConflict) Error() string {
return fmt.Sprintf("conflict detected at: %s", e.filename) return fmt.Sprintf("conflict detected at: %s", e.filename)
} }
func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, gitRepo *git.Repository) error { func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, filesToRemove *[]string, filesToAdd *[]git.IndexObjectInfo) error {
log.Trace("Attempt to merge:\n%v", file) log.Trace("Attempt to merge:\n%v", file)
switch { switch {
@ -142,14 +142,13 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g
} }
// Not a genuine conflict and we can simply remove the file from the index // Not a genuine conflict and we can simply remove the file from the index
return gitRepo.RemoveFilesFromIndex(file.stage1.path) *filesToRemove = append(*filesToRemove, file.stage1.path)
return nil
case file.stage1 == nil && file.stage2 != nil && (file.stage3 == nil || file.stage2.SameAs(file.stage3)): case file.stage1 == nil && file.stage2 != nil && (file.stage3 == nil || file.stage2.SameAs(file.stage3)):
// 2. Added in ours but not in theirs or identical in both // 2. Added in ours but not in theirs or identical in both
// //
// Not a genuine conflict just add to the index // Not a genuine conflict just add to the index
if err := gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(file.stage2.sha), file.stage2.path); err != nil { *filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage2.mode, Object: git.MustIDFromString(file.stage2.sha), Filename: file.stage2.path})
return err
}
return nil return nil
case file.stage1 == nil && file.stage2 != nil && file.stage3 != nil && file.stage2.sha == file.stage3.sha && file.stage2.mode != file.stage3.mode: case file.stage1 == nil && file.stage2 != nil && file.stage3 != nil && file.stage2.sha == file.stage3.sha && file.stage2.mode != file.stage3.mode:
// 3. Added in both with the same sha but the modes are different // 3. Added in both with the same sha but the modes are different
@ -160,7 +159,8 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g
// 4. Added in theirs but not ours: // 4. Added in theirs but not ours:
// //
// Not a genuine conflict just add to the index // Not a genuine conflict just add to the index
return gitRepo.AddObjectToIndex(file.stage3.mode, git.MustIDFromString(file.stage3.sha), file.stage3.path) *filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage3.mode, Object: git.MustIDFromString(file.stage3.sha), Filename: file.stage3.path})
return nil
case file.stage1 == nil: case file.stage1 == nil:
// 5. Created by new in both // 5. Created by new in both
// //
@ -221,7 +221,8 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g
return err return err
} }
hash = strings.TrimSpace(hash) hash = strings.TrimSpace(hash)
return gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(hash), file.stage2.path) *filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage2.mode, Object: git.MustIDFromString(hash), Filename: file.stage2.path})
return nil
default: default:
if file.stage1 != nil { if file.stage1 != nil {
return &errMergeConflict{file.stage1.path} return &errMergeConflict{file.stage1.path}
@ -245,6 +246,9 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo
return false, nil, fmt.Errorf("unable to run read-tree -m! Error: %w", err) return false, nil, fmt.Errorf("unable to run read-tree -m! Error: %w", err)
} }
var filesToRemove []string
var filesToAdd []git.IndexObjectInfo
// Then we use git ls-files -u to list the unmerged files and collate the triples in unmergedfiles // Then we use git ls-files -u to list the unmerged files and collate the triples in unmergedfiles
unmerged := make(chan *unmergedFile) unmerged := make(chan *unmergedFile)
go unmergedFiles(ctx, gitPath, unmerged) go unmergedFiles(ctx, gitPath, unmerged)
@ -270,7 +274,7 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo
} }
// OK now we have the unmerged file triplet attempt to merge it // OK now we have the unmerged file triplet attempt to merge it
if err := attemptMerge(ctx, file, gitPath, gitRepo); err != nil { if err := attemptMerge(ctx, file, gitPath, &filesToRemove, &filesToAdd); err != nil {
if conflictErr, ok := err.(*errMergeConflict); ok { if conflictErr, ok := err.(*errMergeConflict); ok {
log.Trace("Conflict: %s in %s", conflictErr.filename, description) log.Trace("Conflict: %s in %s", conflictErr.filename, description)
conflict = true conflict = true
@ -283,6 +287,15 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo
return false, nil, err return false, nil, err
} }
} }
// Add and remove files in one command, as this is slow with many files otherwise
if err := gitRepo.RemoveFilesFromIndex(filesToRemove...); err != nil {
return false, nil, err
}
if err := gitRepo.AddObjectsToIndex(filesToAdd...); err != nil {
return false, nil, err
}
return conflict, conflictedFiles, nil return conflict, conflictedFiles, nil
} }