...@@ -136,18 +136,9 @@ module TreeHelper ...@@ -136,18 +136,9 @@ module TreeHelper
end end
# returns the relative path of the first subdir that doesn't have only one directory descendant # returns the relative path of the first subdir that doesn't have only one directory descendant
# rubocop: disable CodeReuse/ActiveRecord
def flatten_tree(root_path, tree) def flatten_tree(root_path, tree)
return tree.flat_path.sub(%r{\A#{Regexp.escape(root_path)}/}, '') if tree.flat_path.present? tree.flat_path.sub(%r{\A#{Regexp.escape(root_path)}/}, '')
subtree = Gitlab::Git::Tree.where(@repository, @commit.id, tree.path)
if subtree.count == 1 && subtree.first.dir?
return tree_join(tree.name, flatten_tree(root_path, subtree.first))
else
return tree.name
end end
end
# rubocop: enable CodeReuse/ActiveRecord
def selected_branch def selected_branch
@branch_name || tree_edit_branch @branch_name || tree_edit_branch
... ...
......
---
title: Bring back Rugged implementation of ListCommitsByOid
merge_request: 27441
author:
type: performance
---
title: Avoid excessive recursive calls with Rugged TreeEntries
merge_request: 26813
author:
type: fixed
---
title: Bring back Rugged implementation of CommitIsAncestor
merge_request: 25702
author:
type: other
---
title: Bring back Rugged implementation of commit_tree_entry
merge_request: 25896
author:
type: other
---
title: Bring back Rugged implementation of find_commit
merge_request: 25477
author:
type: fixed
---
title: Bring back Rugged implementation of TreeEntry
merge_request: 25706
author:
type: other
---
title: Bring back Rugged implementation of GetTreeEntries
merge_request: 25674
author:
type: other
...@@ -37,6 +37,28 @@ options: ...@@ -37,6 +37,28 @@ options:
circumstances it could lead to data loss if a failure occurs before data has circumstances it could lead to data loss if a failure occurs before data has
synced. synced.
### Improving NFS performance with GitLab
If you are using NFS to share Git data, we recommend that you enable a
number of feature flags that will allow GitLab application processes to
access Git data directly instead of going through the [Gitaly
service](../gitaly/index.md). Depending on your workload and disk
performance, these flags may help improve performance. See [the
issue](https://gitlab.com/gitlab-org/gitlab-ce/issues/57317) for more
details.
To do this, run the Rake task:
```sh
gitlab-rake gitlab:features:enable_rugged
```
If you need to undo this setting for some reason, run:
```sh
gitlab-rake gitlab:features:disable_rugged
```
### Known issues ### Known issues
On some customer systems, we have seen NFS clients slow precipitously due to On some customer systems, we have seen NFS clients slow precipitously due to
... ...
......
...@@ -56,6 +56,46 @@ If your test-suite is failing with Gitaly issues, as a first step, try running: ...@@ -56,6 +56,46 @@ If your test-suite is failing with Gitaly issues, as a first step, try running:
rm -rf tmp/tests/gitaly rm -rf tmp/tests/gitaly
``` ```
## Legacy Rugged code
While Gitaly can handle all Git access, many of GitLab customers still
run Gitaly atop NFS. The legacy Rugged implementation for Git calls may
be faster than the Gitaly RPC due to N+1 Gitaly calls and other
reasons. See [the
issue](https://gitlab.com/gitlab-org/gitlab-ce/issues/57317) for more
details.
Until GitLab has eliminated most of these inefficiencies or the use of
NFS is discontinued for Git data, Rugged implementations of some of the
most commonly-used RPCs can be enabled via feature flags:
* `rugged_find_commit`
* `rugged_get_tree_entries`
* `rugged_tree_entry`
* `rugged_commit_is_ancestor`
* `rugged_commit_tree_entry`
* `rugged_list_commits_by_oid`
A convenience Rake task can be used to enable or disable these flags
all together. To enable:
```sh
bundle exec rake gitlab:features:enable_rugged
```
To disable:
```sh
bundle exec rake gitlab:features:disable_rugged
```
Most of this code exists in the `lib/gitlab/git/rugged_impl` directory.
NOTE: **Note:** You should NOT need to add or modify code related to
Rugged unless explicitly discussed with the [Gitaly
Team](https://gitlab.com/groups/gl-gitaly/group_members). This code will
NOT work on GitLab.com or other GitLab instances that do not use NFS.
## `TooManyInvocationsError` errors ## `TooManyInvocationsError` errors
During development and testing, you may experience `Gitlab::GitalyClient::TooManyInvocationsError` failures. During development and testing, you may experience `Gitlab::GitalyClient::TooManyInvocationsError` failures.
... ...
......
...@@ -23,6 +23,10 @@ module Gitlab ...@@ -23,6 +23,10 @@ module Gitlab
class << self class << self
def find(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE) def find(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE)
tree_entry(repository, sha, path, limit)
end
def tree_entry(repository, sha, path, limit)
return unless path return unless path
path = path.sub(%r{\A/*}, '') path = path.sub(%r{\A/*}, '')
...@@ -179,3 +183,5 @@ module Gitlab ...@@ -179,3 +183,5 @@ module Gitlab
end end
end end
end end
Gitlab::Git::Blob.singleton_class.prepend Gitlab::Git::RuggedImpl::Blob::ClassMethods
...@@ -3,6 +3,7 @@ module Gitlab ...@@ -3,6 +3,7 @@ module Gitlab
module Git module Git
class Commit class Commit
include Gitlab::EncodingHelper include Gitlab::EncodingHelper
prepend Gitlab::Git::RuggedImpl::Commit
extend Gitlab::Git::WrapsGitalyErrors extend Gitlab::Git::WrapsGitalyErrors
attr_accessor :raw_commit, :head attr_accessor :raw_commit, :head
...@@ -60,15 +61,19 @@ module Gitlab ...@@ -60,15 +61,19 @@ module Gitlab
# This saves us an RPC round trip. # This saves us an RPC round trip.
return nil if commit_id.include?(':') return nil if commit_id.include?(':')
commit = wrapped_gitaly_errors do commit = find_commit(repo, commit_id)
repo.gitaly_commit_client.find_commit(commit_id)
end
decorate(repo, commit) if commit decorate(repo, commit) if commit
rescue Gitlab::Git::CommandError, Gitlab::Git::Repository::NoRepository, ArgumentError rescue Gitlab::Git::CommandError, Gitlab::Git::Repository::NoRepository, ArgumentError
nil nil
end end
def find_commit(repo, commit_id)
wrapped_gitaly_errors do
repo.gitaly_commit_client.find_commit(commit_id)
end
end
# Get last commit for HEAD # Get last commit for HEAD
# #
# Ex. # Ex.
...@@ -199,6 +204,10 @@ module Gitlab ...@@ -199,6 +204,10 @@ module Gitlab
@repository = repository @repository = repository
@head = head @head = head
init_commit(raw_commit)
end
def init_commit(raw_commit)
case raw_commit case raw_commit
when Hash when Hash
init_from_hash(raw_commit) init_from_hash(raw_commit)
...@@ -319,11 +328,16 @@ module Gitlab ...@@ -319,11 +328,16 @@ module Gitlab
def tree_entry(path) def tree_entry(path)
return unless path.present? return unless path.present?
commit_tree_entry(path)
end
def commit_tree_entry(path)
# We're only interested in metadata, so limit actual data to 1 byte # We're only interested in metadata, so limit actual data to 1 byte
# since Gitaly doesn't support "send no data" option. # since Gitaly doesn't support "send no data" option.
entry = @repository.gitaly_commit_client.tree_entry(id, path, 1) entry = @repository.gitaly_commit_client.tree_entry(id, path, 1)
return unless entry return unless entry
# To be compatible with the rugged format
entry = entry.to_h entry = entry.to_h
entry.delete(:data) entry.delete(:data)
entry[:name] = File.basename(path) entry[:name] = File.basename(path)
...@@ -414,3 +428,5 @@ module Gitlab ...@@ -414,3 +428,5 @@ module Gitlab
end end
end end
end end
Gitlab::Git::Commit.singleton_class.prepend Gitlab::Git::RuggedImpl::Commit::ClassMethods
...@@ -2,6 +2,7 @@ module Gitlab ...@@ -2,6 +2,7 @@ module Gitlab
module Git module Git
class Ref class Ref
include Gitlab::EncodingHelper include Gitlab::EncodingHelper
include Gitlab::Git::RuggedImpl::Ref
# Branch or tag name # Branch or tag name
# without "refs/tags|heads" prefix # without "refs/tags|heads" prefix
... ...
......
...@@ -9,6 +9,7 @@ module Gitlab ...@@ -9,6 +9,7 @@ module Gitlab
include Gitlab::Git::WrapsGitalyErrors include Gitlab::Git::WrapsGitalyErrors
include Gitlab::EncodingHelper include Gitlab::EncodingHelper
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
prepend Gitlab::Git::RuggedImpl::Repository
SEARCH_CONTEXT_LINES = 3 SEARCH_CONTEXT_LINES = 3
REV_LIST_COMMIT_LIMIT = 2_000 REV_LIST_COMMIT_LIMIT = 2_000
... ...
......
# frozen_string_literal: true
# NOTE: This code is legacy. Do not add/modify code here unless you have
# discussed with the Gitaly team. See
# https://docs.gitlab.com/ee/development/gitaly.html#legacy-rugged-code
# for more details.
module Gitlab
module Git
module RuggedImpl
module Blob
module ClassMethods
extend ::Gitlab::Utils::Override
override :tree_entry
def tree_entry(repository, sha, path, limit)
if Feature.enabled?(:rugged_tree_entry)
rugged_tree_entry(repository, sha, path, limit)
else
super
end
end
private
def rugged_tree_entry(repository, sha, path, limit)
return unless path
# Strip any leading / characters from the path
path = path.sub(%r{\A/*}, '')
rugged_commit = repository.lookup(sha)
root_tree = rugged_commit.tree
blob_entry = find_entry_by_path(repository, root_tree.oid, *path.split('/'))
return unless blob_entry
if blob_entry[:type] == :commit
submodule_blob(blob_entry, path, sha)
else
blob = repository.lookup(blob_entry[:oid])
if blob
new(
id: blob.oid,
name: blob_entry[:name],
size: blob.size,
# Rugged::Blob#content is expensive; don't call it if we don't have to.
data: limit.zero? ? '' : blob.content(limit),
mode: blob_entry[:filemode].to_s(8),
path: path,
commit_id: sha,
binary: blob.binary?
)
end
end
rescue Rugged::ReferenceError
nil
end
# Recursive search of blob id by path
#
# Ex.
# blog/ # oid: 1a
# app/ # oid: 2a
# models/ # oid: 3a
# file.rb # oid: 4a
#
#
# Blob.find_entry_by_path(repo, '1a', 'blog', 'app', 'file.rb') # => '4a'
#
def find_entry_by_path(repository, root_id, *path_parts)
root_tree = repository.lookup(root_id)
entry = root_tree.find do |entry|
entry[:name] == path_parts[0]
end
return unless entry
if path_parts.size > 1
return unless entry[:type] == :tree
path_parts.shift
find_entry_by_path(repository, entry[:oid], *path_parts)
else
[:blob, :commit].include?(entry[:type]) ? entry : nil
end
end
def submodule_blob(blob_entry, path, sha)
new(
id: blob_entry[:oid],
name: blob_entry[:name],
size: 0,
data: '',
path: path,
commit_id: sha
)
end
end
end
end
end
end
# frozen_string_literal: true
# NOTE: This code is legacy. Do not add/modify code here unless you have
# discussed with the Gitaly team. See
# https://docs.gitlab.com/ee/development/gitaly.html#legacy-rugged-code
# for more details.
# rubocop:disable Gitlab/ModuleWithInstanceVariables
module Gitlab
module Git
module RuggedImpl
module Commit
module ClassMethods
extend ::Gitlab::Utils::Override
def rugged_find(repo, commit_id)
obj = repo.rev_parse_target(commit_id)
obj.is_a?(::Rugged::Commit) ? obj : nil
rescue ::Rugged::Error
nil
end
# This needs to return an array of Gitlab::Git:Commit objects
# instead of Rugged::Commit objects to ensure upstream models
# operate on a consistent interface. Unlike
# Gitlab::Git::Commit.find, Gitlab::Git::Commit.batch_by_oid
# doesn't attempt to decorate the result.
def rugged_batch_by_oid(repo, oids)
oids.map { |oid| rugged_find(repo, oid) }
.compact
.map { |commit| decorate(repo, commit) }
end
override :find_commit
def find_commit(repo, commit_id)
if Feature.enabled?(:rugged_find_commit)
rugged_find(repo, commit_id)
else
super
end
end
override :batch_by_oid
def batch_by_oid(repo, oids)
if Feature.enabled?(:rugged_list_commits_by_oid)
rugged_batch_by_oid(repo, oids)
else
super
end
end
end
extend ::Gitlab::Utils::Override
override :init_commit
def init_commit(raw_commit)
case raw_commit
when ::Rugged::Commit
init_from_rugged(raw_commit)
else
super
end
end
override :commit_tree_entry
def commit_tree_entry(path)
if Feature.enabled?(:rugged_commit_tree_entry)
rugged_tree_entry(path)
else
super
end
end
# Is this the same as Blob.find_entry_by_path ?
def rugged_tree_entry(path)
rugged_commit.tree.path(path)
rescue Rugged::TreeError
nil
end
def rugged_commit
@rugged_commit ||= if raw_commit.is_a?(Rugged::Commit)
raw_commit
else
@repository.rev_parse_target(id)
end
end
def init_from_rugged(commit)
author = commit.author
committer = commit.committer
@raw_commit = commit
@id = commit.oid
@message = commit.message
@authored_date = author[:time]
@committed_date = committer[:time]
@author_name = author[:name]
@author_email = author[:email]
@committer_name = committer[:name]
@committer_email = committer[:email]
@parent_ids = commit.parents.map(&:oid)
end
end
end
end
end
# rubocop:enable Gitlab/ModuleWithInstanceVariables
# frozen_string_literal: true
# NOTE: This code is legacy. Do not add/modify code here unless you have
# discussed with the Gitaly team. See
# https://docs.gitlab.com/ee/development/gitaly.html#legacy-rugged-code
# for more details.
module Gitlab
module Git
module RuggedImpl
module Ref
def self.dereference_object(object)
object = object.target while object.is_a?(::Rugged::Tag::Annotation)
object
end
end
end
end
end
# frozen_string_literal: true
# NOTE: This code is legacy. Do not add/modify code here unless you have
# discussed with the Gitaly team. See
# https://docs.gitlab.com/ee/development/gitaly.html#legacy-rugged-code
# for more details.
# rubocop:disable Gitlab/ModuleWithInstanceVariables
module Gitlab
module Git
module RuggedImpl
module Repository
extend ::Gitlab::Utils::Override
FEATURE_FLAGS = %i(rugged_find_commit rugged_tree_entries rugged_tree_entry rugged_commit_is_ancestor rugged_commit_tree_entry rugged_list_commits_by_oid).freeze
def alternate_object_directories
relative_object_directories.map { |d| File.join(path, d) }
end
ALLOWED_OBJECT_RELATIVE_DIRECTORIES_VARIABLES = %w[
GIT_OBJECT_DIRECTORY_RELATIVE
GIT_ALTERNATE_OBJECT_DIRECTORIES_RELATIVE
].freeze
def relative_object_directories
Gitlab::Git::HookEnv.all(gl_repository).values_at(*ALLOWED_OBJECT_RELATIVE_DIRECTORIES_VARIABLES).flatten.compact
end
def rugged
@rugged ||= ::Rugged::Repository.new(path, alternates: alternate_object_directories)
rescue ::Rugged::RepositoryError, ::Rugged::OSError
raise ::Gitlab::Git::Repository::NoRepository.new('no repository for such path')
end
def cleanup
@rugged&.close
end
# Return the object that +revspec+ points to. If +revspec+ is an
# annotated tag, then return the tag's target instead.
def rev_parse_target(revspec)
obj = rugged.rev_parse(revspec)
Ref.dereference_object(obj)
end
override :ancestor?
def ancestor?(from, to)
if Feature.enabled?(:rugged_commit_is_ancestor)
rugged_is_ancestor?(from, to)
else
super
end
end
def rugged_is_ancestor?(ancestor_id, descendant_id)
return false if ancestor_id.nil? || descendant_id.nil?
rugged_merge_base(ancestor_id, descendant_id) == ancestor_id
rescue Rugged::OdbError
false
end
def rugged_merge_base(from, to)
rugged.merge_base(from, to)
rescue Rugged::ReferenceError
nil
end
# Lookup for rugged object by oid or ref name
def lookup(oid_or_ref_name)
rugged.rev_parse(oid_or_ref_name)
end
end
end
end
end
# rubocop:enable Gitlab/ModuleWithInstanceVariables
# frozen_string_literal: true
# NOTE: This code is legacy. Do not add/modify code here unless you have
# discussed with the Gitaly team. See
# https://docs.gitlab.com/ee/development/gitaly.html#legacy-rugged-code
# for more details.
module Gitlab
module Git
module RuggedImpl
module Tree
module ClassMethods
extend ::Gitlab::Utils::Override
override :tree_entries
def tree_entries(repository, sha, path, recursive)
if Feature.enabled?(:rugged_tree_entries)
tree_entries_with_flat_path_from_rugged(repository, sha, path, recursive)
else
super
end
end
def tree_entries_with_flat_path_from_rugged(repository, sha, path, recursive)
tree_entries_from_rugged(repository, sha, path, recursive).tap do |entries|
# This was an optimization to reduce N+1 queries for Gitaly
# (https://gitlab.com/gitlab-org/gitaly/issues/530). It
# used to be done lazily in the view via
# TreeHelper#flatten_tree, so it's possible there's a
# performance impact by loading this eagerly.
rugged_populate_flat_path(repository, sha, path, entries)
end
end
def tree_entries_from_rugged(repository, sha, path, recursive)
current_path_entries = get_tree_entries_from_rugged(repository, sha, path)
ordered_entries = []
current_path_entries.each do |entry|
ordered_entries << entry
if recursive && entry.dir?
ordered_entries.concat(tree_entries_from_rugged(repository, sha, entry.path, true))
end
end
end
def rugged_populate_flat_path(repository, sha, path, entries)
entries.each do |entry|
entry.flat_path = entry.path
next unless entry.dir?
entry.flat_path =
if path
File.join(path, rugged_flatten_tree(repository, sha, entry, path))
else
rugged_flatten_tree(repository, sha, entry, path)
end
end
end
# Returns the relative path of the first subdir that doesn't have only one directory descendant
def rugged_flatten_tree(repository, sha, tree, root_path)
subtree = tree_entries_from_rugged(repository, sha, tree.path, false)
if subtree.count == 1 && subtree.first.dir?
File.join(tree.name, rugged_flatten_tree(repository, sha, subtree.first, root_path))
else
tree.name
end
end
def get_tree_entries_from_rugged(repository, sha, path)
commit = repository.lookup(sha)
root_tree = commit.tree
tree = if path
id = find_id_by_path(repository, root_tree.oid, path)
if id
repository.lookup(id)
else
[]
end
else
root_tree
end
tree.map do |entry|
current_path = path ? File.join(path, entry[:name]) : entry[:name]
new(
id: entry[:oid],
root_id: root_tree.oid,
name: entry[:name],
type: entry[:type],
mode: entry[:filemode].to_s(8),
path: current_path,
commit_id: sha
)
end
rescue Rugged::ReferenceError
[]
end
end
end
end
end
end
...@@ -16,6 +16,10 @@ module Gitlab ...@@ -16,6 +16,10 @@ module Gitlab
def where(repository, sha, path = nil, recursive = false) def where(repository, sha, path = nil, recursive = false)
path = nil if path == '' || path == '/' path = nil if path == '' || path == '/'
tree_entries(repository, sha, path, recursive)
end
def tree_entries(repository, sha, path, recursive)
wrapped_gitaly_errors do wrapped_gitaly_errors do
repository.gitaly_commit_client.tree_entries(repository, sha, path, recursive) repository.gitaly_commit_client.tree_entries(repository, sha, path, recursive)
end end
...@@ -93,3 +97,5 @@ module Gitlab ...@@ -93,3 +97,5 @@ module Gitlab
end end
end end
end end
Gitlab::Git::Tree.singleton_class.prepend Gitlab::Git::RuggedImpl::Tree::ClassMethods