From 4c248c05cbd1356199cc96775b68fbbde64d4d5d Mon Sep 17 00:00:00 2001 From: Ryan Cobb Date: Thu, 18 Apr 2019 12:40:00 -0600 Subject: [PATCH 1/7] Adds new metrics for unicorn monitoring This adds new metrics for monitoring unicorn. These metrics include process_cpu_seconds_total, process_start_time_seconds, process_max_fds, and unicorn_workers. --- Gemfile | 1 + Gemfile.lock | 3 ++ .../metrics/samplers/unicorn_sampler.rb | 36 ++++++++++++++----- lib/gitlab/metrics/system.rb | 25 +++++++++++++ .../metrics/samplers/unicorn_sampler_spec.rb | 33 ++++++++++++++--- spec/lib/gitlab/metrics/system_spec.rb | 18 ++++++++++ 6 files changed, 103 insertions(+), 13 deletions(-) diff --git a/Gemfile b/Gemfile index c55e6478cb0..e38c1f03ca0 100644 --- a/Gemfile +++ b/Gemfile @@ -406,6 +406,7 @@ gem 'health_check', '~> 2.6.0' # System information gem 'vmstat', '~> 2.3.0' gem 'sys-filesystem', '~> 1.1.6' +gem 'sys-proctable', '~> 1.2' # SSH host key support gem 'net-ssh', '~> 5.0' diff --git a/Gemfile.lock b/Gemfile.lock index 109958e2591..36c24265e48 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -867,6 +867,8 @@ GEM state_machines-activemodel (>= 0.5.0) sys-filesystem (1.1.6) ffi + sys-proctable (1.2.1) + ffi sysexits (1.2.0) temple (0.8.0) test-prof (0.2.5) @@ -1160,6 +1162,7 @@ DEPENDENCIES stackprof (~> 0.2.10) state_machines-activerecord (~> 0.5.1) sys-filesystem (~> 1.1.6) + sys-proctable (~> 1.2) test-prof (~> 0.2.5) thin (~> 1.7.0) timecop (~> 0.8.0) diff --git a/lib/gitlab/metrics/samplers/unicorn_sampler.rb b/lib/gitlab/metrics/samplers/unicorn_sampler.rb index bec64e864b3..16a2ee9b9be 100644 --- a/lib/gitlab/metrics/samplers/unicorn_sampler.rb +++ b/lib/gitlab/metrics/samplers/unicorn_sampler.rb @@ -8,12 +8,19 @@ module Gitlab super(interval) end - def unicorn_active_connections - @unicorn_active_connections ||= ::Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max) + def metrics + @metrics ||= init_metrics end - def unicorn_queued_connections - @unicorn_queued_connections ||= ::Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max) + def init_metrics + { + unicorn_active_connections: ::Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max), + unicorn_queued_connections: ::Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max), + unicorn_workers: ::Gitlab::Metrics.gauge(:unicorn_workers, 'Unicorn workers'), + process_cpu_seconds_total: ::Gitlab::Metrics.gauge(:process_cpu_seconds_total, 'Process CPU seconds total'), + process_max_fds: ::Gitlab::Metrics.gauge(:process_max_fds, 'Process max fds'), + process_start_time_seconds: ::Gitlab::Metrics.gauge(:process_start_time_seconds, 'Process start time seconds') + } end def enabled? @@ -23,14 +30,19 @@ module Gitlab def sample Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats| - unicorn_active_connections.set({ socket_type: 'tcp', socket_address: addr }, stats.active) - unicorn_queued_connections.set({ socket_type: 'tcp', socket_address: addr }, stats.queued) + metrics[:unicorn_active_connections].set({ socket_type: 'tcp', socket_address: addr }, stats.active) + metrics[:unicorn_queued_connections].set({ socket_type: 'tcp', socket_address: addr }, stats.queued) end Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats| - unicorn_active_connections.set({ socket_type: 'unix', socket_address: addr }, stats.active) - unicorn_queued_connections.set({ socket_type: 'unix', socket_address: addr }, stats.queued) + metrics[:unicorn_active_connections].set({ socket_type: 'unix', socket_address: addr }, stats.active) + metrics[:unicorn_queued_connections].set({ socket_type: 'unix', socket_address: addr }, stats.queued) end + + metrics[:process_cpu_seconds_total].set({ pid: nil }, ::Gitlab::Metrics::System.cpu_time) + metrics[:process_start_time_seconds].set({ pid: nil }, ::Gitlab::Metrics::System.process_start_time) + metrics[:process_max_fds].set({ pid: nil }, ::Gitlab::Metrics::System.max_open_file_descriptors) + metrics[:unicorn_workers].set({}, unicorn_workers_count) end private @@ -39,6 +51,10 @@ module Gitlab @tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z}) end + def pid + @pid ||= Process.pid + end + def unix_listeners @unix_listeners ||= Unicorn.listener_names - tcp_listeners end @@ -46,6 +62,10 @@ module Gitlab def unicorn_with_listeners? defined?(Unicorn) && Unicorn.listener_names.any? end + + def unicorn_workers_count + Sys::ProcTable.ps.select {|p| p.cmdline.match(/unicorn_rails worker/)}.count + end end end end diff --git a/lib/gitlab/metrics/system.rb b/lib/gitlab/metrics/system.rb index 426496855e3..a269a8688e9 100644 --- a/lib/gitlab/metrics/system.rb +++ b/lib/gitlab/metrics/system.rb @@ -23,6 +23,16 @@ module Gitlab def self.file_descriptor_count Dir.glob('/proc/self/fd/*').length end + + def self.max_open_file_descriptors + match = File.read('/proc/self/limits').match(/Max open files\s*(\d+)/) + + if match && match[1] + max_fds = match[1].to_i + end + + max_fds + end else def self.memory_usage 0.0 @@ -31,6 +41,10 @@ module Gitlab def self.file_descriptor_count 0 end + + def self.max_open_file_descriptors + 0 + end end # THREAD_CPUTIME is not supported on OS X @@ -46,6 +60,17 @@ module Gitlab end end + # CLOCK_BOOTTIME is not supported on OS X + if Process.const_defined?(:CLOCK_BOOTTIME) + def self.process_start_time + Process + .clock_gettime(Process::CLOCK_BOOTTIME, :float_second) + end + else + def self.process_start_time + 0.0 + end + end # Returns the current real time in a given precision. # # Returns the time as a Float for precision = :float_second. diff --git a/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb index 4b03f3c2532..4470dc3ee93 100644 --- a/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb +++ b/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb @@ -39,8 +39,8 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do it 'updates metrics type unix and with addr' do labels = { socket_type: 'unix', socket_address: socket_address } - expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active') - expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued') + expect(subject.metrics[:unicorn_active_connections]).to receive(:set).with(labels, 'active') + expect(subject.metrics[:unicorn_queued_connections]).to receive(:set).with(labels, 'queued') subject.sample end @@ -50,7 +50,6 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do context 'unicorn listens on tcp sockets' do let(:tcp_socket_address) { '0.0.0.0:8080' } let(:tcp_sockets) { [tcp_socket_address] } - before do allow(unicorn).to receive(:listener_names).and_return(tcp_sockets) end @@ -71,13 +70,37 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do it 'updates metrics type unix and with addr' do labels = { socket_type: 'tcp', socket_address: tcp_socket_address } - expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active') - expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued') + expect(subject.metrics[:unicorn_active_connections]).to receive(:set).with(labels, 'active') + expect(subject.metrics[:unicorn_queued_connections]).to receive(:set).with(labels, 'queued') subject.sample end end end + + context 'additional metrics' do + let(:cpu_time) { 3.14 } + let(:process_start_time) { 19100.24 } + let(:process_max_fds) { 1024 } + let(:unicorn_workers) { 2 } + + before do + allow(unicorn).to receive(:listener_names).and_return([""]) + allow(::Gitlab::Metrics::System).to receive(:cpu_time).and_return(cpu_time) + allow(::Gitlab::Metrics::System).to receive(:process_start_time).and_return(process_start_time) + allow(::Gitlab::Metrics::System).to receive(:max_open_file_descriptors).and_return(process_max_fds) + allow(subject).to receive(:unicorn_workers_count).and_return(unicorn_workers) + end + + it "sets additional metrics" do + expect(subject.metrics[:process_cpu_seconds_total]).to receive(:set).with({ pid: nil }, cpu_time) + expect(subject.metrics[:process_start_time_seconds]).to receive(:set).with({ pid: nil }, process_start_time) + expect(subject.metrics[:process_max_fds]).to receive(:set).with({ pid: nil }, process_max_fds) + expect(subject.metrics[:unicorn_workers]).to receive(:set).with({}, unicorn_workers) + + subject.sample + end + end end describe '#start' do diff --git a/spec/lib/gitlab/metrics/system_spec.rb b/spec/lib/gitlab/metrics/system_spec.rb index 14afcdf5daa..2de6821bb79 100644 --- a/spec/lib/gitlab/metrics/system_spec.rb +++ b/spec/lib/gitlab/metrics/system_spec.rb @@ -13,6 +13,12 @@ describe Gitlab::Metrics::System do expect(described_class.file_descriptor_count).to be > 0 end end + + describe '.max_open_file_descriptors' do + it 'returns the max allowed open file descriptors' do + expect(described_class.max_open_file_descriptors).to be > 0 + end + end else describe '.memory_usage' do it 'returns 0.0' do @@ -25,6 +31,12 @@ describe Gitlab::Metrics::System do expect(described_class.file_descriptor_count).to eq(0) end end + + describe '.max_open_file_descriptors' do + it 'returns 0' do + expect(described_class.max_open_file_descriptors).to eq(0) + end + end end describe '.cpu_time' do @@ -44,4 +56,10 @@ describe Gitlab::Metrics::System do expect(described_class.monotonic_time).to be_an(Float) end end + + describe '.process_start_time' do + it 'returns a Float' do + expect(described_class.process_start_time).to be_an(Float) + end + end end -- GitLab From 174a03dfc284781d811df1874ce3cf11d451a8f5 Mon Sep 17 00:00:00 2001 From: Ryan Cobb Date: Wed, 24 Apr 2019 11:41:54 -0600 Subject: [PATCH 2/7] Move process specific metrics to ruby sampler These metrics are not unicorn specific and can be used across ruby processes --- lib/gitlab/metrics/samplers/ruby_sampler.rb | 22 ++++++++++------ .../metrics/samplers/unicorn_sampler.rb | 8 +----- .../metrics/samplers/ruby_sampler_spec.rb | 26 ++++++++++++++++++- .../metrics/samplers/unicorn_sampler_spec.rb | 7 ----- 4 files changed, 40 insertions(+), 23 deletions(-) diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb index 18a69321905..c4b2224efdf 100644 --- a/lib/gitlab/metrics/samplers/ruby_sampler.rb +++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb @@ -23,25 +23,31 @@ module Gitlab end def init_metrics - metrics = {} - metrics[:sampler_duration] = ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels) - metrics[:total_time] = ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels) + metrics = { + file_descriptors: ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum), + memory_usage: ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum), + process_cpu_seconds_total: ::Gitlab::Metrics.gauge(:process_cpu_seconds_total, 'Process CPU seconds total'), + process_max_fds: ::Gitlab::Metrics.gauge(:process_max_fds, 'Process max fds'), + process_start_time_seconds: ::Gitlab::Metrics.gauge(:process_start_time_seconds, 'Process start time seconds'), + sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels), + total_time: ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels) + } + GC.stat.keys.each do |key| metrics[key] = ::Gitlab::Metrics.gauge(with_prefix(:gc_stat, key), to_doc_string(key), labels, :livesum) end - metrics[:memory_usage] = ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum) - metrics[:file_descriptors] = ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum) - metrics end def sample start_time = System.monotonic_time - metrics[:memory_usage].set(labels.merge(worker_label), System.memory_usage) metrics[:file_descriptors].set(labels.merge(worker_label), System.file_descriptor_count) - + metrics[:memory_usage].set(labels.merge(worker_label), System.memory_usage) + metrics[:process_cpu_seconds_total].set(labels.merge(worker_label), ::Gitlab::Metrics::System.cpu_time) + metrics[:process_start_time_seconds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.process_start_time) + metrics[:process_max_fds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.max_open_file_descriptors) sample_gc metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time) diff --git a/lib/gitlab/metrics/samplers/unicorn_sampler.rb b/lib/gitlab/metrics/samplers/unicorn_sampler.rb index 16a2ee9b9be..c7063c5ba28 100644 --- a/lib/gitlab/metrics/samplers/unicorn_sampler.rb +++ b/lib/gitlab/metrics/samplers/unicorn_sampler.rb @@ -16,10 +16,7 @@ module Gitlab { unicorn_active_connections: ::Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max), unicorn_queued_connections: ::Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max), - unicorn_workers: ::Gitlab::Metrics.gauge(:unicorn_workers, 'Unicorn workers'), - process_cpu_seconds_total: ::Gitlab::Metrics.gauge(:process_cpu_seconds_total, 'Process CPU seconds total'), - process_max_fds: ::Gitlab::Metrics.gauge(:process_max_fds, 'Process max fds'), - process_start_time_seconds: ::Gitlab::Metrics.gauge(:process_start_time_seconds, 'Process start time seconds') + unicorn_workers: ::Gitlab::Metrics.gauge(:unicorn_workers, 'Unicorn workers') } end @@ -39,9 +36,6 @@ module Gitlab metrics[:unicorn_queued_connections].set({ socket_type: 'unix', socket_address: addr }, stats.queued) end - metrics[:process_cpu_seconds_total].set({ pid: nil }, ::Gitlab::Metrics::System.cpu_time) - metrics[:process_start_time_seconds].set({ pid: nil }, ::Gitlab::Metrics::System.process_start_time) - metrics[:process_max_fds].set({ pid: nil }, ::Gitlab::Metrics::System.max_open_file_descriptors) metrics[:unicorn_workers].set({}, unicorn_workers_count) end diff --git a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb index 7972ff253fe..0fafcb8e380 100644 --- a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb +++ b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb @@ -10,8 +10,11 @@ describe Gitlab::Metrics::Samplers::RubySampler do describe '#sample' do it 'samples various statistics' do - expect(Gitlab::Metrics::System).to receive(:memory_usage) + expect(Gitlab::Metrics::System).to receive(:cpu_time) expect(Gitlab::Metrics::System).to receive(:file_descriptor_count) + expect(Gitlab::Metrics::System).to receive(:memory_usage) + expect(Gitlab::Metrics::System).to receive(:process_start_time) + expect(Gitlab::Metrics::System).to receive(:max_open_file_descriptors) expect(sampler).to receive(:sample_gc) sampler.sample @@ -34,6 +37,27 @@ describe Gitlab::Metrics::Samplers::RubySampler do sampler.sample end + it 'adds a metric containing the processes total cpu time' do + expect(Gitlab::Metrics::System).to receive(:cpu_time).and_return(0.51) + expect(sampler.metrics[:process_cpu_seconds_total]).to receive(:set).with({}, 0.51) + + sampler.sample + end + + it 'adds a metric containing the process start time' do + expect(Gitlab::Metrics::System).to receive(:process_start_time).and_return(12345) + expect(sampler.metrics[:process_start_time_seconds]).to receive(:set).with({}, 12345) + + sampler.sample + end + + it 'adds a metric containing the process max file descriptors' do + expect(Gitlab::Metrics::System).to receive(:max_open_file_descriptors).and_return(1024) + expect(sampler.metrics[:process_max_fds]).to receive(:set).with({}, 1024) + + sampler.sample + end + it 'clears any GC profiles' do expect(GC::Profiler).to receive(:clear) diff --git a/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb index 4470dc3ee93..0edbfc869a6 100644 --- a/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb +++ b/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb @@ -80,22 +80,15 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do context 'additional metrics' do let(:cpu_time) { 3.14 } - let(:process_start_time) { 19100.24 } - let(:process_max_fds) { 1024 } let(:unicorn_workers) { 2 } before do allow(unicorn).to receive(:listener_names).and_return([""]) allow(::Gitlab::Metrics::System).to receive(:cpu_time).and_return(cpu_time) - allow(::Gitlab::Metrics::System).to receive(:process_start_time).and_return(process_start_time) - allow(::Gitlab::Metrics::System).to receive(:max_open_file_descriptors).and_return(process_max_fds) allow(subject).to receive(:unicorn_workers_count).and_return(unicorn_workers) end it "sets additional metrics" do - expect(subject.metrics[:process_cpu_seconds_total]).to receive(:set).with({ pid: nil }, cpu_time) - expect(subject.metrics[:process_start_time_seconds]).to receive(:set).with({ pid: nil }, process_start_time) - expect(subject.metrics[:process_max_fds]).to receive(:set).with({ pid: nil }, process_max_fds) expect(subject.metrics[:unicorn_workers]).to receive(:set).with({}, unicorn_workers) subject.sample -- GitLab From bb27bf4a1c7153f2f5074eb058d8659dd9f198ad Mon Sep 17 00:00:00 2001 From: Ryan Cobb Date: Wed, 24 Apr 2019 17:05:09 -0600 Subject: [PATCH 3/7] Update docs and calculate process start time via proc table This updates monitor docs to reflect the new ruby and unicorn metrics as well as making it so we fetch process start time via the proc table instead of via CLOCK_BOOTTIME --- .../monitoring/prometheus/gitlab_metrics.md | 14 +++++++----- lib/gitlab/metrics/samplers/ruby_sampler.rb | 18 +++++++-------- lib/gitlab/metrics/system.rb | 22 +++++++++---------- .../metrics/samplers/ruby_sampler_spec.rb | 6 ++--- spec/lib/gitlab/metrics/system_spec.rb | 18 ++++++++++----- 5 files changed, 44 insertions(+), 34 deletions(-) diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index 3bfcc9a289e..8885236e7be 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -43,10 +43,11 @@ The following metrics are available: | redis_ping_latency_seconds | Gauge | 9.4 | Round trip time of the redis ping | | user_session_logins_total | Counter | 9.4 | Counter of how many users have logged in | | upload_file_does_not_exist | Counter | 10.7 in EE, 11.5 in CE | Number of times an upload record could not find its file | -| failed_login_captcha_total | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login | -| successful_login_captcha_total | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login | -| unicorn_active_connections | Gauge | 11.0 | The number of active Unicorn connections (workers) | -| unicorn_queued_connections | Gauge | 11.0 | The number of queued Unicorn connections | +| failed_login_captcha_total | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login | +| successful_login_captcha_total | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login | +| unicorn_active_connections | Gauge | 11.0 | The number of active Unicorn connections (workers) | +| unicorn_queued_connections | Gauge | 11.0 | The number of queued Unicorn connections | +| unicorn_workers | Gauge | 11.11 | The number of Unicorn workers | ### Ruby metrics @@ -57,8 +58,11 @@ Some basic Ruby runtime metrics are available: | ruby_gc_duration_seconds_total | Counter | 11.1 | Time spent by Ruby in GC | | ruby_gc_stat_... | Gauge | 11.1 | Various metrics from [GC.stat] | | ruby_file_descriptors | Gauge | 11.1 | File descriptors per process | -| ruby_memory_bytes | Gauge | 11.1 | Memory usage by process | +| ruby_process_resident_memory_bytes | Gauge | 11.1 | Memory usage by process | | ruby_sampler_duration_seconds_total | Counter | 11.1 | Time spent collecting stats | +| ruby_process_cpu_seconds_total | Gauge | 11.11 | Total amount of cpu time per process | +| ruby_process_max_fds | Gauge | 11.11 | Maximum number of open file descriptors per process | +| ruby_process_start_time_seconds | Gauge | 11.11 | The time the process started after system boot in seconds | [GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb index c4b2224efdf..5740380e63e 100644 --- a/lib/gitlab/metrics/samplers/ruby_sampler.rb +++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb @@ -24,13 +24,13 @@ module Gitlab def init_metrics metrics = { - file_descriptors: ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum), - memory_usage: ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum), - process_cpu_seconds_total: ::Gitlab::Metrics.gauge(:process_cpu_seconds_total, 'Process CPU seconds total'), - process_max_fds: ::Gitlab::Metrics.gauge(:process_max_fds, 'Process max fds'), - process_start_time_seconds: ::Gitlab::Metrics.gauge(:process_start_time_seconds, 'Process start time seconds'), - sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels), - total_time: ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels) + file_descriptors: ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum), + process_cpu_seconds_total: ::Gitlab::Metrics.gauge(with_prefix(:process, :cpu_seconds_total), 'Process CPU seconds total'), + process_max_fds: ::Gitlab::Metrics.gauge(with_prefix(:process, :max_fds), 'Process max fds'), + process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels, :livesum), + process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'), + sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels), + total_time: ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels) } GC.stat.keys.each do |key| @@ -44,10 +44,10 @@ module Gitlab start_time = System.monotonic_time metrics[:file_descriptors].set(labels.merge(worker_label), System.file_descriptor_count) - metrics[:memory_usage].set(labels.merge(worker_label), System.memory_usage) metrics[:process_cpu_seconds_total].set(labels.merge(worker_label), ::Gitlab::Metrics::System.cpu_time) - metrics[:process_start_time_seconds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.process_start_time) metrics[:process_max_fds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.max_open_file_descriptors) + metrics[:process_resident_memory_bytes].set(labels.merge(worker_label), System.memory_usage) + metrics[:process_start_time_seconds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.process_start_time) sample_gc metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time) diff --git a/lib/gitlab/metrics/system.rb b/lib/gitlab/metrics/system.rb index a269a8688e9..ecd558d7ec7 100644 --- a/lib/gitlab/metrics/system.rb +++ b/lib/gitlab/metrics/system.rb @@ -33,6 +33,13 @@ module Gitlab max_fds end + + def self.process_start_time + start_time_in_jiffies = Sys::ProcTable.ps(pid: Process.pid).starttime + return 0 unless start_time_in_jiffies + + start_time_in_jiffies / 100 + end else def self.memory_usage 0.0 @@ -45,6 +52,10 @@ module Gitlab def self.max_open_file_descriptors 0 end + + def self.process_start_time + 0 + end end # THREAD_CPUTIME is not supported on OS X @@ -60,17 +71,6 @@ module Gitlab end end - # CLOCK_BOOTTIME is not supported on OS X - if Process.const_defined?(:CLOCK_BOOTTIME) - def self.process_start_time - Process - .clock_gettime(Process::CLOCK_BOOTTIME, :float_second) - end - else - def self.process_start_time - 0.0 - end - end # Returns the current real time in a given precision. # # Returns the time as a Float for precision = :float_second. diff --git a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb index 0fafcb8e380..aaf8c9fa2a0 100644 --- a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb +++ b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb @@ -20,10 +20,10 @@ describe Gitlab::Metrics::Samplers::RubySampler do sampler.sample end - it 'adds a metric containing the memory usage' do + it 'adds a metric containing the process resident memory bytes' do expect(Gitlab::Metrics::System).to receive(:memory_usage).and_return(9000) - expect(sampler.metrics[:memory_usage]).to receive(:set).with({}, 9000) + expect(sampler.metrics[:process_resident_memory_bytes]).to receive(:set).with({}, 9000) sampler.sample end @@ -37,7 +37,7 @@ describe Gitlab::Metrics::Samplers::RubySampler do sampler.sample end - it 'adds a metric containing the processes total cpu time' do + it 'adds a metric containing the process total cpu time' do expect(Gitlab::Metrics::System).to receive(:cpu_time).and_return(0.51) expect(sampler.metrics[:process_cpu_seconds_total]).to receive(:set).with({}, 0.51) diff --git a/spec/lib/gitlab/metrics/system_spec.rb b/spec/lib/gitlab/metrics/system_spec.rb index 2de6821bb79..b0603d96eb2 100644 --- a/spec/lib/gitlab/metrics/system_spec.rb +++ b/spec/lib/gitlab/metrics/system_spec.rb @@ -19,6 +19,12 @@ describe Gitlab::Metrics::System do expect(described_class.max_open_file_descriptors).to be > 0 end end + + describe '.process_start_time' do + it 'returns the process start time' do + expect(described_class.process_start_time).to be > 0 + end + end else describe '.memory_usage' do it 'returns 0.0' do @@ -37,6 +43,12 @@ describe Gitlab::Metrics::System do expect(described_class.max_open_file_descriptors).to eq(0) end end + + describe 'process_start_time' do + it 'returns 0' do + expect(described_class.process_start_time).to eq(0) + end + end end describe '.cpu_time' do @@ -56,10 +68,4 @@ describe Gitlab::Metrics::System do expect(described_class.monotonic_time).to be_an(Float) end end - - describe '.process_start_time' do - it 'returns a Float' do - expect(described_class.process_start_time).to be_an(Float) - end - end end -- GitLab From 2f6a1e77bc8d3b57eddc337ec8f4485d0f26b32c Mon Sep 17 00:00:00 2001 From: Ryan Cobb Date: Fri, 26 Apr 2019 10:16:53 -0600 Subject: [PATCH 4/7] Cleanup syntax in System max_open_file_descriptors --- lib/gitlab/metrics/system.rb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/gitlab/metrics/system.rb b/lib/gitlab/metrics/system.rb index ecd558d7ec7..ce0bb82d138 100644 --- a/lib/gitlab/metrics/system.rb +++ b/lib/gitlab/metrics/system.rb @@ -27,11 +27,9 @@ module Gitlab def self.max_open_file_descriptors match = File.read('/proc/self/limits').match(/Max open files\s*(\d+)/) - if match && match[1] - max_fds = match[1].to_i - end + return unless match && match[1] - max_fds + match[1].to_i end def self.process_start_time -- GitLab From 17986d91a5a54a8f68c45818073e77689f3c1fd6 Mon Sep 17 00:00:00 2001 From: Ryan Cobb Date: Mon, 29 Apr 2019 12:13:02 -0600 Subject: [PATCH 5/7] Add back ruby_memory_bytes metric, limit duplication, clean up This adds back ruby_memory_bytes for backwards compatibility, limits code duplication, cleans up unused methods, and limits the unicorn worker sampling scope. --- .../monitoring/prometheus/gitlab_metrics.md | 5 +++-- lib/gitlab/metrics/samplers/ruby_sampler.rb | 11 ++++++++++- lib/gitlab/metrics/samplers/unicorn_sampler.rb | 16 ++++++++-------- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index 8885236e7be..ecde09f5cd8 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -58,10 +58,11 @@ Some basic Ruby runtime metrics are available: | ruby_gc_duration_seconds_total | Counter | 11.1 | Time spent by Ruby in GC | | ruby_gc_stat_... | Gauge | 11.1 | Various metrics from [GC.stat] | | ruby_file_descriptors | Gauge | 11.1 | File descriptors per process | -| ruby_process_resident_memory_bytes | Gauge | 11.1 | Memory usage by process | +| ruby_memory_bytes | Gauge | 11.1 | Memory usage by process | | ruby_sampler_duration_seconds_total | Counter | 11.1 | Time spent collecting stats | -| ruby_process_cpu_seconds_total | Gauge | 11.11 | Total amount of cpu time per process | +| ruby_process_cpu_seconds_total | Gauge | 11.11 | Total amount of CPU time per process | | ruby_process_max_fds | Gauge | 11.11 | Maximum number of open file descriptors per process | +| ruby_process_resident_memory_bytes | Gauge | 11.11 | Memory usage by process | | ruby_process_start_time_seconds | Gauge | 11.11 | The time the process started after system boot in seconds | [GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb index 5740380e63e..4d9c43f37e7 100644 --- a/lib/gitlab/metrics/samplers/ruby_sampler.rb +++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb @@ -25,6 +25,7 @@ module Gitlab def init_metrics metrics = { file_descriptors: ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum), + memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum), process_cpu_seconds_total: ::Gitlab::Metrics.gauge(with_prefix(:process, :cpu_seconds_total), 'Process CPU seconds total'), process_max_fds: ::Gitlab::Metrics.gauge(with_prefix(:process, :max_fds), 'Process max fds'), process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels, :livesum), @@ -46,8 +47,8 @@ module Gitlab metrics[:file_descriptors].set(labels.merge(worker_label), System.file_descriptor_count) metrics[:process_cpu_seconds_total].set(labels.merge(worker_label), ::Gitlab::Metrics::System.cpu_time) metrics[:process_max_fds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.max_open_file_descriptors) - metrics[:process_resident_memory_bytes].set(labels.merge(worker_label), System.memory_usage) metrics[:process_start_time_seconds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.process_start_time) + set_memory_usage_metrics sample_gc metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time) @@ -67,6 +68,14 @@ module Gitlab metrics[:total_time].increment(labels, GC::Profiler.total_time) end + def set_memory_usage_metrics + memory_usage = System.memory_usage + memory_labels = labels.merge(worker_label) + + metrics[:memory_bytes].set(memory_labels, memory_usage) + metrics[:process_resident_memory_bytes].set(memory_labels, memory_usage) + end + def worker_label return {} unless defined?(Unicorn::Worker) diff --git a/lib/gitlab/metrics/samplers/unicorn_sampler.rb b/lib/gitlab/metrics/samplers/unicorn_sampler.rb index c7063c5ba28..1b6c52ac0bf 100644 --- a/lib/gitlab/metrics/samplers/unicorn_sampler.rb +++ b/lib/gitlab/metrics/samplers/unicorn_sampler.rb @@ -27,13 +27,10 @@ module Gitlab def sample Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats| - metrics[:unicorn_active_connections].set({ socket_type: 'tcp', socket_address: addr }, stats.active) - metrics[:unicorn_queued_connections].set({ socket_type: 'tcp', socket_address: addr }, stats.queued) + set_unicorn_connection_metrics('tcp', addr, stats) end - Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats| - metrics[:unicorn_active_connections].set({ socket_type: 'unix', socket_address: addr }, stats.active) - metrics[:unicorn_queued_connections].set({ socket_type: 'unix', socket_address: addr }, stats.queued) + set_unicorn_connection_metrics('unix', addr, stats) end metrics[:unicorn_workers].set({}, unicorn_workers_count) @@ -45,8 +42,11 @@ module Gitlab @tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z}) end - def pid - @pid ||= Process.pid + def set_unicorn_connection_metrics(type, addr, stats) + labels = { socket_type: type, socket_address: addr } + + metrics[:unicorn_active_connections].set(labels, stats.active) + metrics[:unicorn_queued_connections].set(labels, stats.queued) end def unix_listeners @@ -58,7 +58,7 @@ module Gitlab end def unicorn_workers_count - Sys::ProcTable.ps.select {|p| p.cmdline.match(/unicorn_rails worker/)}.count + Sys::ProcTable.ps.select {|p| p.cmdline.match(/unicorn_rails worker.+ #{Rails.root.to_s}/)}.count end end end -- GitLab From 8ad7cbf7739ef1111a816e18786ac8cea8d303a4 Mon Sep 17 00:00:00 2001 From: Ryan Cobb Date: Tue, 30 Apr 2019 12:26:16 -0600 Subject: [PATCH 6/7] Cleanup spec by removing not needed let --- spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb index 0edbfc869a6..090e456644f 100644 --- a/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb +++ b/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb @@ -79,12 +79,11 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do end context 'additional metrics' do - let(:cpu_time) { 3.14 } let(:unicorn_workers) { 2 } before do allow(unicorn).to receive(:listener_names).and_return([""]) - allow(::Gitlab::Metrics::System).to receive(:cpu_time).and_return(cpu_time) + allow(::Gitlab::Metrics::System).to receive(:cpu_time).and_return(3.14) allow(subject).to receive(:unicorn_workers_count).and_return(unicorn_workers) end -- GitLab From c16a7a8c563fe208bdb1331654d6e9c05e188596 Mon Sep 17 00:00:00 2001 From: Ryan Cobb Date: Wed, 1 May 2019 12:32:13 -0600 Subject: [PATCH 7/7] Clarify wording in metrics documentation --- doc/administration/monitoring/prometheus/gitlab_metrics.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index ecde09f5cd8..81a6e6bca9f 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -62,8 +62,8 @@ Some basic Ruby runtime metrics are available: | ruby_sampler_duration_seconds_total | Counter | 11.1 | Time spent collecting stats | | ruby_process_cpu_seconds_total | Gauge | 11.11 | Total amount of CPU time per process | | ruby_process_max_fds | Gauge | 11.11 | Maximum number of open file descriptors per process | -| ruby_process_resident_memory_bytes | Gauge | 11.11 | Memory usage by process | -| ruby_process_start_time_seconds | Gauge | 11.11 | The time the process started after system boot in seconds | +| ruby_process_resident_memory_bytes | Gauge | 11.11 | Memory usage by process, measured in bytes | +| ruby_process_start_time_seconds | Gauge | 11.11 | The elapsed time between system boot and the process started, measured in seconds | [GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat -- GitLab