Linux lhjmq-records 5.15.0-118-generic #128-Ubuntu SMP Fri Jul 5 09:28:59 UTC 2024 x86_64
Your IP : 3.142.212.225
# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
# This file is part of the sos project: https://github.com/sosreport/sos
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions of
# version 2 of the GNU General Public License.
#
# See the LICENSE file in the source distribution for further information.
import fnmatch
import inspect
import json
import os
import random
import re
import string
import socket
import shutil
import sys
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from getpass import getpass
from pathlib import Path
from pipes import quote
from textwrap import fill
from sos.cleaner import SoSCleaner
from sos.collector.sosnode import SosNode
from sos.options import ClusterOption, str_to_bool
from sos.component import SoSComponent
from sos.utilities import bold
from sos import __version__
COLLECTOR_CONFIG_DIR = '/etc/sos/groups.d'
class SoSCollector(SoSComponent):
"""
sos collect, or SoS Collector, is the formerly standalone sos-collector
project, brought into sos natively in 4.0 and later.
It is meant to collect sos reports from an arbitrary number of remote
nodes, as well as the localhost, at the same time. These nodes may be
either user defined, defined by some clustering software, or both.
For cluster defined lists of nodes, cluster profiles exist that not only
define how these node lists are generated but may also influence the
sos report command run on nodes depending upon their role within the
cluster.
Nodes are connected to via a 'transport' which defaults to the use of
OpenSSH's Control Persist feature. Other transport types are available, and
may be specifically linked to use with a certain cluster profile (or, at
minimum, a node within a certain cluster type even if that profile is not
used).
sos collect may be run from either a node within the cluster that is
capable of enumerating/discovering the other cluster nodes, or may be run
from a user's workstation and instructed to first connect to such a node
via the --primary option. If run in the latter manner, users will likely
want to use the --no-local option, as by default sos collect will also
collect an sos report locally.
Users should expect this command to result in a tarball containing one or
more sos report archives on the system that sos collect was executed on.
"""
desc = 'Collect an sos report from multiple nodes simultaneously'
arg_defaults = {
'all_logs': False,
'alloptions': False,
'allow_system_changes': False,
'become_root': False,
'case_id': False,
'chroot': 'auto',
'clean': False,
'cluster_options': [],
'cluster_type': None,
'container_runtime': 'auto',
'domains': [],
'disable_parsers': [],
'enable_plugins': [],
'encrypt_key': '',
'encrypt_pass': '',
'group': None,
'image': '',
'force_pull_image': True,
'jobs': 4,
'keywords': [],
'keyword_file': None,
'keep_binary_files': False,
'label': '',
'list_options': False,
'log_size': 0,
'map_file': '/etc/sos/cleaner/default_mapping',
'primary': '',
'namespaces': None,
'nodes': [],
'no_env_vars': False,
'no_local': False,
'nopasswd_sudo': False,
'no_pkg_check': False,
'no_update': False,
'only_plugins': [],
'password': False,
'password_per_node': False,
'plugopts': [],
'plugin_timeout': None,
'cmd_timeout': None,
'preset': '',
'registry_user': None,
'registry_password': None,
'registry_authfile': None,
'save_group': '',
'since': '',
'skip_commands': [],
'skip_files': [],
'skip_plugins': [],
'sos_opt_line': '',
'ssh_key': '',
'ssh_port': 22,
'ssh_user': 'root',
'timeout': 600,
'transport': 'auto',
'verify': False,
'usernames': [],
'upload': False,
'upload_url': None,
'upload_directory': None,
'upload_user': None,
'upload_pass': None,
'upload_method': 'auto',
'upload_no_ssl_verify': False,
'upload_protocol': 'auto'
}
def __init__(self, parser, parsed_args, cmdline_args):
super(SoSCollector, self).__init__(parser, parsed_args, cmdline_args)
os.umask(0o77)
self.client_list = []
self.node_list = []
self.primary = None
self.retrieved = 0
self.cluster = None
self.cluster_type = None
# add manifest section for collect
self.manifest.components.add_section('collect')
# shorthand reference
self.collect_md = self.manifest.components.collect
# placeholders in manifest organization
self.collect_md.add_field('cluster_type', 'none')
self.collect_md.add_list('node_list')
# add a place to set/get the sudo password, but do not expose it via
# the CLI, because security is a thing
setattr(self.opts, 'sudo_pw', '')
# get the local hostname and addresses to filter from results later
self.hostname = socket.gethostname()
try:
self.ip_addrs = list(set([
i[4][0] for i in socket.getaddrinfo(socket.gethostname(), None)
]))
except Exception:
# this is almost always a DNS issue with reverse resolution
# set a safe fallback and log the issue
self.log_error(
"Could not get a list of IP addresses from this hostnamne. "
"This may indicate a DNS issue in your environment"
)
self.ip_addrs = ['127.0.0.1']
self._parse_options()
self.clusters = self.load_clusters()
if not self.opts.list_options:
try:
self.parse_node_strings()
self.parse_cluster_options()
self.log_debug('Executing %s' % ' '.join(s for s in sys.argv))
self.log_debug("Found cluster profiles: %s"
% self.clusters.keys())
self.verify_cluster_options()
except KeyboardInterrupt:
self.exit('Exiting on user cancel', 130)
except Exception:
raise
def load_clusters(self):
"""Loads all cluster types supported by the local installation for
future comparison and/or use
"""
import sos.collector.clusters
package = sos.collector.clusters
supported_clusters = {}
clusters = self._load_modules(package, 'clusters')
for cluster in clusters:
supported_clusters[cluster[0]] = cluster[1](self.commons)
return supported_clusters
@classmethod
def _load_modules(cls, package, submod):
"""Helper to import cluster and host types"""
modules = []
for path in package.__path__:
if os.path.isdir(path):
modules.extend(cls._find_modules_in_path(path, submod))
return modules
@classmethod
def _find_modules_in_path(cls, path, modulename):
"""Given a path and a module name, find everything that can be imported
and then import it
path - the filesystem path of the package
modulename - the name of the module in the package
E.G. a path of 'clusters', and a modulename of 'ovirt' equates to
importing sos.collector.clusters.ovirt
"""
modules = []
if os.path.exists(path):
for pyfile in sorted(os.listdir(path)):
if not pyfile.endswith('.py'):
continue
if '__' in pyfile:
continue
fname, ext = os.path.splitext(pyfile)
modname = 'sos.collector.%s.%s' % (modulename, fname)
modules.extend(cls._import_modules(modname))
return modules
@classmethod
def _import_modules(self, modname):
"""Import and return all found classes in a module"""
mod_short_name = modname.split('.')[2]
module = __import__(modname, globals(), locals(), [mod_short_name])
modules = inspect.getmembers(module, inspect.isclass)
for mod in modules:
if mod[0] in ('SosHost', 'Cluster'):
modules.remove(mod)
return modules
def parse_node_strings(self):
"""Parses the given --nodes option(s) to properly format the regex
list that we use. We cannot blindly split on ',' chars since it is a
valid regex character, so we need to scan along the given strings and
check at each comma if we should use the preceeding string by itself
or not, based on if there is a valid regex at that index.
"""
if not self.opts.nodes:
return
nodes = []
if not isinstance(self.opts.nodes, list):
self.opts.nodes = [self.opts.nodes]
for node in self.opts.nodes:
idxs = [i for i, m in enumerate(node) if m == ',']
idxs.append(len(node))
start = 0
pos = 0
for idx in idxs:
try:
pos = idx
reg = node[start:idx]
re.compile(re.escape(reg))
# make sure we aren't splitting a regex value
if '[' in reg and ']' not in reg:
continue
nodes.append(reg.lstrip(','))
start = idx
except re.error:
continue
if pos != len(node):
nodes.append(node[pos+1:])
self.opts.nodes = nodes
@classmethod
def add_parser_options(cls, parser):
# Add the supported report passthru options to a group for logical
# grouping in --help display
sos_grp = parser.add_argument_group(
'Report Passthru Options',
'These options control how report is run on nodes'
)
sos_grp.add_argument('-a', '--alloptions', action='store_true',
help='Enable all sos report options')
sos_grp.add_argument('--all-logs', action='store_true',
help='Collect logs regardless of size')
sos_grp.add_argument('--allow-system-changes', action='store_true',
default=False,
help=('Allow sosreport to run commands that may '
'alter system state'))
sos_grp.add_argument('--chroot', default='',
choices=['auto', 'always', 'never'],
help="chroot executed commands to SYSROOT")
sos_grp.add_argument("--container-runtime", default="auto",
help="Default container runtime to use for "
"collections. 'auto' for policy control.")
sos_grp.add_argument('-e', '--enable-plugins', action="extend",
help='Enable specific plugins for sosreport')
sos_grp.add_argument('-k', '--plugin-option', '--plugopts',
action="extend", dest='plugopts',
help='Plugin option as plugname.option=value')
sos_grp.add_argument('--log-size', default=0, type=int,
help='Limit the size of individual logs (in MiB)')
sos_grp.add_argument('-n', '--skip-plugins', action="extend",
help='Skip these plugins')
sos_grp.add_argument('-o', '--only-plugins', action="extend",
default=[],
help='Run these plugins only')
sos_grp.add_argument('--namespaces', default=None,
help='limit number of namespaces to collect '
'output for - 0 means unlimited')
sos_grp.add_argument('--no-env-vars', action='store_true',
default=False,
help='Do not collect env vars in sosreports')
sos_grp.add_argument('--plugin-timeout', type=int, default=None,
help='Set the global plugin timeout value')
sos_grp.add_argument('--cmd-timeout', type=int, default=None,
help='Set the global command timeout value')
sos_grp.add_argument('--since', default=None,
help=('Escapes archived files older than date. '
'This will also affect --all-logs. '
'Format: YYYYMMDD[HHMMSS]'))
sos_grp.add_argument('--skip-commands', default=[], action='extend',
dest='skip_commands',
help="do not execute these commands")
sos_grp.add_argument('--skip-files', default=[], action='extend',
dest='skip_files',
help="do not collect these files")
sos_grp.add_argument('--verify', action="store_true",
help='perform pkg verification during collection')
# Add the collector specific options to a separate group to keep
# everything organized
collect_grp = parser.add_argument_group(
'Collector Options',
'These options control how collect runs locally'
)
collect_grp.add_argument('-b', '--become', action='store_true',
dest='become_root',
help='Become root on the remote nodes')
collect_grp.add_argument('--case-id', help='Specify case number')
collect_grp.add_argument('--cluster-type',
help='Specify a type of cluster profile')
collect_grp.add_argument('-c', '--cluster-option',
dest='cluster_options', action='append',
help=('Specify a cluster options used by a '
'profile and takes the form of '
'cluster.option=value'))
collect_grp.add_argument('--group', default=None,
help='Use a predefined group JSON file')
collect_grp.add_argument('--save-group', default='',
help='Save a resulting node list to a group')
collect_grp.add_argument('--image',
help=('Specify the container image to use for'
' containerized hosts.'))
collect_grp.add_argument('--force-pull-image', '--pull',
default=True, choices=(True, False),
type=str_to_bool,
help='Force pull the container image even if '
'it already exists on the host')
collect_grp.add_argument('--registry-user', default=None,
help='Username to authenticate to the '
'registry with for pulling an image')
collect_grp.add_argument('--registry-password', default=None,
help='Password to authenticate to the '
'registry with for pulling an image')
collect_grp.add_argument('--registry-authfile', default=None,
help='Use this authfile to provide registry '
'authentication when pulling an image')
collect_grp.add_argument('-i', '--ssh-key', help='Specify an ssh key')
collect_grp.add_argument('-j', '--jobs', default=4, type=int,
help='Number of concurrent nodes to collect')
collect_grp.add_argument('-l', '--list-options', action="store_true",
help='List options available for profiles')
collect_grp.add_argument('--label',
help='Assign a label to the archives')
collect_grp.add_argument('--primary', '--manager', '--controller',
dest='primary', default='',
help='Specify a primary node for cluster '
'enumeration')
collect_grp.add_argument('--nopasswd-sudo', action='store_true',
help='Use passwordless sudo on nodes')
collect_grp.add_argument('--nodes', action="append",
help=('Provide a comma delimited list of '
'nodes, or a regex to match against'))
collect_grp.add_argument('--no-pkg-check', action='store_true',
help=('Do not run package checks. Use this '
'with --cluster-type if there are rpm '
'or apt issues on node'))
collect_grp.add_argument('--no-local', action='store_true',
help='Do not collect a report from localhost')
collect_grp.add_argument('-p', '--ssh-port', type=int,
help='Specify SSH port for all nodes')
collect_grp.add_argument('--password', action='store_true',
default=False,
help='Prompt for user password for nodes')
collect_grp.add_argument('--password-per-node', action='store_true',
default=False,
help='Prompt for password for each node')
collect_grp.add_argument('--preset', default='', required=False,
help='Specify a sos preset to use')
collect_grp.add_argument('--sos-cmd', dest='sos_opt_line',
help=('Manually specify the commandline '
'for sos report on nodes'))
collect_grp.add_argument('--ssh-user',
help='Specify an SSH user. Default root')
collect_grp.add_argument('--timeout', type=int, required=False,
help='Timeout for sosreport on each node.')
collect_grp.add_argument('--transport', default='auto', type=str,
help='Remote connection transport to use')
collect_grp.add_argument("--upload", action="store_true",
default=False,
help="Upload archive to a policy-default "
"location")
collect_grp.add_argument("--upload-url", default=None,
help="Upload the archive to specified server")
collect_grp.add_argument("--upload-directory", default=None,
help="Specify upload directory for archive")
collect_grp.add_argument("--upload-user", default=None,
help="Username to authenticate with")
collect_grp.add_argument("--upload-pass", default=None,
help="Password to authenticate with")
collect_grp.add_argument("--upload-method", default='auto',
choices=['auto', 'put', 'post'],
help="HTTP method to use for uploading")
collect_grp.add_argument("--upload-no-ssl-verify", default=False,
action='store_true',
help="Disable SSL verification for upload url"
)
collect_grp.add_argument("--upload-protocol", default='auto',
choices=['auto', 'https', 'ftp', 'sftp'],
help="Manually specify the upload protocol")
# Group the cleaner options together
cleaner_grp = parser.add_argument_group(
'Cleaner/Masking Options',
'These options control how data obfuscation is performed'
)
cleaner_grp.add_argument('--clean', '--cleaner', '--mask',
dest='clean',
default=False, action='store_true',
help='Obfuscate sensitive information')
cleaner_grp.add_argument('--keep-binary-files', default=False,
action='store_true', dest='keep_binary_files',
help='Keep unprocessable binary files in the '
'archive instead of removing them')
cleaner_grp.add_argument('--domains', dest='domains', default=[],
action='extend',
help='Additional domain names to obfuscate')
cleaner_grp.add_argument('--disable-parsers', action='extend',
default=[], dest='disable_parsers',
help=('Disable specific parsers, so that '
'those elements are not obfuscated'))
cleaner_grp.add_argument('--keywords', action='extend', default=[],
dest='keywords',
help='List of keywords to obfuscate')
cleaner_grp.add_argument('--keyword-file', default=None,
dest='keyword_file',
help='Provide a file a keywords to obfuscate')
cleaner_grp.add_argument('--no-update', action='store_true',
default=False, dest='no_update',
help='Do not update the default cleaner map')
cleaner_grp.add_argument('--map-file', dest='map_file',
default='/etc/sos/cleaner/default_mapping',
help=('Provide a previously generated mapping'
' file for obfuscation'))
cleaner_grp.add_argument('--usernames', dest='usernames', default=[],
action='extend',
help='List of usernames to obfuscate')
@classmethod
def display_help(cls, section):
section.set_title('SoS Collect Detailed Help')
section.add_text(cls.__doc__)
hsections = {
'collect.clusters': 'Information on cluster profiles',
'collect.clusters.$cluster': 'Specific profile information',
'collect.transports': 'Information on how connections are made',
'collect.transports.$transport': 'Specific transport information'
}
section.add_text(
'The following help sections may be of further interest:\n'
)
for hsec in hsections:
section.add_text(
"{:>8}{:<40}{:<30}".format(' ', bold(hsec), hsections[hsec]),
newline=False
)
def exit(self, msg=None, error=0, force=False):
"""Used to terminate and ensure all cleanup is done, setting the exit
code as specified if required.
:param msg: Log the provided message as an error
:type msg: ``str``
:param error: The exit code to use when terminating
:type error: ``int``
:param force: Use os.exit() to break out of nested threads if needed
:type force: ``bool``
"""
if self.cluster:
self.cluster.cleanup()
if msg:
self.log_error(msg)
try:
self.close_all_connections()
except Exception:
pass
if error != 130:
# keep the tempdir around when a user issues a keyboard interrupt
# like we do for report
self.cleanup()
if not force:
sys.exit(error)
else:
os._exit(error)
def _parse_options(self):
"""From commandline options, defaults, etc... build a set of commons
to hand to other collector mechanisms
"""
self.commons = {
'cmdlineopts': self.opts,
'need_sudo': True if self.opts.ssh_user != 'root' else False,
'tmpdir': self.tmpdir,
'hostlen': max(len(self.opts.primary), len(self.hostname)),
'policy': self.policy
}
def parse_cluster_options(self):
opts = []
if not isinstance(self.opts.cluster_options, list):
self.opts.cluster_options = [self.opts.cluster_options]
if self.opts.cluster_options:
for option in self.opts.cluster_options:
cluster = option.split('.')[0]
name = option.split('.')[1].split('=')[0]
try:
# there are no instances currently where any cluster option
# should contain a legitimate space.
value = option.split('=')[1].split()[0]
except IndexError:
# conversion to boolean is handled during validation
value = 'True'
opts.append(
ClusterOption(name, value, value.__class__, cluster)
)
self.opts.cluster_options = opts
def verify_cluster_options(self):
"""Verify that requested cluster options exist"""
if self.opts.cluster_options:
for opt in self.opts.cluster_options:
match = False
for clust in self.clusters:
for option in self.clusters[clust].options:
if opt.name == option.name and opt.cluster == clust:
match = True
opt.value = self._validate_option(option, opt)
break
if not match:
self.exit('Unknown cluster option provided: %s.%s'
% (opt.cluster, opt.name), 1)
def _validate_option(self, default, cli):
"""Checks to make sure that the option given on the CLI is valid.
Valid in this sense means that the type of value given matches what a
cluster profile expects (str for str, bool for bool, etc).
For bool options, this will also convert the string equivalent to an
actual boolean value
"""
if not default.opt_type == bool:
if not default.opt_type == cli.opt_type:
msg = "Invalid option type for %s. Expected %s got %s"
self.exit(msg % (cli.name, default.opt_type, cli.opt_type), 1)
return cli.value
else:
val = cli.value.lower()
if val not in ['true', 'on', 'yes', 'false', 'off', 'no']:
msg = ("Invalid value for %s. Accepted values are: 'true', "
"'false', 'on', 'off', 'yes', 'no'.")
self.exit(msg % cli.name, 1)
else:
if val in ['true', 'on', 'yes']:
return True
else:
return False
def log_info(self, msg):
"""Log info messages to both console and log file"""
self.soslog.info(msg)
def log_warn(self, msg):
"""Log warn messages to both console and log file"""
self.soslog.warn(msg)
def log_error(self, msg):
"""Log error messages to both console and log file"""
self.soslog.error(msg)
def log_debug(self, msg):
"""Log debug message to both console and log file"""
caller = inspect.stack()[1][3]
msg = '[sos_collector:%s] %s' % (caller, msg)
self.soslog.debug(msg)
def list_options(self):
"""Display options for available clusters"""
sys.stdout.write('\nThe following clusters are supported by this '
'installation\n')
sys.stdout.write('Use the short name with --cluster-type or cluster '
'options (-c)\n\n')
for cluster in sorted(self.clusters):
sys.stdout.write(" {:<15} {:30}\n".format(
cluster,
self.clusters[cluster].cluster_name))
_opts = {}
for _cluster in self.clusters:
for opt in self.clusters[_cluster].options:
if opt.name not in _opts.keys():
_opts[opt.name] = opt
else:
for clust in opt.cluster:
if clust not in _opts[opt.name].cluster:
_opts[opt.name].cluster.append(clust)
sys.stdout.write('\nThe following cluster options are available:\n\n')
sys.stdout.write(' {:25} {:15} {:<10} {:10} {:<}\n'.format(
'Cluster',
'Option Name',
'Type',
'Default',
'Description'
))
for _opt in sorted(_opts, key=lambda x: _opts[x].cluster):
opt = _opts[_opt]
optln = ' {:25} {:15} {:<10} {:<10} {:<10}\n'.format(
', '.join(c for c in sorted(opt.cluster)),
opt.name,
opt.opt_type.__name__,
str(opt.value),
opt.description)
sys.stdout.write(optln)
sys.stdout.write('\nOptions take the form of cluster.name=value'
'\nE.G. "ovirt.no-database=True" or '
'"pacemaker.offline=False"\n')
def delete_tmp_dir(self):
"""Removes the temp directory and all collected sosreports"""
shutil.rmtree(self.tmpdir)
def _get_archive_name(self):
"""Generates a name for the tarball archive"""
nstr = 'sos-collector'
if self.opts.label:
nstr += '-%s' % self.opts.label
if self.opts.case_id:
nstr += '-%s' % self.opts.case_id
dt = datetime.strftime(datetime.now(), '%Y-%m-%d')
try:
string.lowercase = string.ascii_lowercase
except NameError:
pass
rand = ''.join(random.choice(string.lowercase) for x in range(5))
return '%s-%s-%s' % (nstr, dt, rand)
def _get_archive_path(self):
"""Returns the path, including filename, of the tarball we build
that contains the collected sosreports
"""
self.arc_name = self._get_archive_name()
compr = 'gz'
return self.tmpdir + '/' + self.arc_name + '.tar.' + compr
def _fmt_msg(self, msg):
width = 80
_fmt = ''
for line in msg.splitlines():
_fmt = _fmt + fill(line, width, replace_whitespace=False) + '\n'
return _fmt
def _load_group_config(self):
"""
Attempts to load the host group specified on the command line.
Host groups are defined via JSON files, typically saved under
/etc/sos/groups.d/, although users can specify a full filepath
on the commandline to point to one existing anywhere on the system
Host groups define a list of nodes and/or regexes and optionally the
primary and cluster-type options.
"""
grp = self.opts.group
paths = [
grp,
os.path.join(Path.home(), '.config/sos/groups.d/%s' % grp),
os.path.join(COLLECTOR_CONFIG_DIR, grp)
]
fname = None
for path in paths:
if os.path.exists(path):
fname = path
break
if fname is None:
raise OSError("no group definition for %s" % grp)
self.log_debug("Loading host group %s" % fname)
with open(fname, 'r') as hf:
_group = json.load(hf)
for key in ['primary', 'cluster_type']:
if _group[key]:
self.log_debug("Setting option '%s' to '%s' per host group"
% (key, _group[key]))
setattr(self.opts, key, _group[key])
if _group['nodes']:
self.log_debug("Adding %s to node list" % _group['nodes'])
self.opts.nodes.extend(_group['nodes'])
def write_host_group(self):
"""
Saves the results of this run of sos-collector to a host group file
on the system so it can be used later on.
The host group will save the options primary, cluster_type, and nodes
as determined by sos-collector prior to execution of sosreports.
"""
cfg = {
'name': self.opts.save_group,
'primary': self.opts.primary,
'cluster_type': self.cluster.cluster_type[0],
'nodes': [n for n in self.node_list]
}
if os.getuid() != 0:
group_path = os.path.join(Path.home(), '.config/sos/groups.d')
# create the subdir within the user's home directory
os.makedirs(group_path, exist_ok=True)
else:
group_path = COLLECTOR_CONFIG_DIR
fname = os.path.join(group_path, cfg['name'])
with open(fname, 'w') as hf:
json.dump(cfg, hf)
os.chmod(fname, 0o644)
return fname
def prep(self):
self.policy.set_commons(self.commons)
if (not self.opts.password and not
self.opts.password_per_node):
self.log_debug('password not specified, assuming SSH keys')
msg = ('sos-collector ASSUMES that SSH keys are installed on all '
'nodes unless the --password option is provided.\n')
self.ui_log.info(self._fmt_msg(msg))
try:
if ((self.opts.password or (self.opts.password_per_node and
self.opts.primary))
and not self.opts.batch):
self.log_debug('password specified, not using SSH keys')
msg = ('Provide the SSH password for user %s: '
% self.opts.ssh_user)
self.opts.password = getpass(prompt=msg)
if ((self.commons['need_sudo'] and not self.opts.nopasswd_sudo)
and not self.opts.batch):
if not self.opts.password and not self.opts.password_per_node:
self.log_debug('non-root user specified, will request '
'sudo password')
msg = ('A non-root user has been provided. Provide sudo '
'password for %s on remote nodes: '
% self.opts.ssh_user)
self.opts.sudo_pw = getpass(prompt=msg)
else:
if not self.opts.nopasswd_sudo:
self.opts.sudo_pw = self.opts.password
except KeyboardInterrupt:
self.exit("\nExiting on user cancel\n", 130)
if self.opts.become_root:
if not self.opts.ssh_user == 'root':
if self.opts.batch:
msg = ("Cannot become root without obtaining root "
"password. Do not use --batch if you need "
"to become root remotely.")
self.exit(msg, 1)
self.log_debug('non-root user asking to become root remotely')
msg = ('User %s will attempt to become root. '
'Provide root password: ' % self.opts.ssh_user)
self.opts.root_password = getpass(prompt=msg)
self.commons['need_sudo'] = False
else:
self.log_info('Option to become root but ssh user is root.'
' Ignoring request to change user on node')
self.opts.become_root = False
if self.opts.group:
try:
self._load_group_config()
except Exception as err:
msg = ("Could not load specified group %s: %s"
% (self.opts.group, err))
self.exit(msg, 1)
try:
self.policy.pre_work()
except KeyboardInterrupt:
self.exit("Exiting on user cancel\n", 130)
if self.opts.primary:
self.connect_to_primary()
self.opts.no_local = True
else:
try:
can_run_local = True
local_sudo = None
skip_local_msg = (
"Local sos report generation forcibly skipped due "
"to lack of root privileges.\nEither use --nopasswd-sudo, "
"run as root, or do not use --batch so that you will be "
"prompted for a password\n"
)
if (not self.opts.no_local and (os.getuid() != 0 and not
self.opts.nopasswd_sudo)):
if not self.opts.batch:
msg = ("Enter local sudo password to generate local "
"sos report: ")
local_sudo = getpass(msg)
if local_sudo == '':
self.ui_log.info(skip_local_msg)
can_run_local = False
self.opts.no_local = True
local_sudo = None
else:
self.ui_log.info(skip_local_msg)
can_run_local = False
self.opts.no_local = True
self.primary = SosNode('localhost', self.commons,
local_sudo=local_sudo,
load_facts=can_run_local)
except Exception as err:
self.log_debug("Unable to determine local installation: %s" %
err)
self.exit('Unable to determine local installation. Use the '
'--no-local option if localhost should not be '
'included.\nAborting...\n', 1)
self.collect_md.add_field('primary', self.primary.address)
self.collect_md.add_section('nodes')
self.collect_md.nodes.add_section(self.primary.address)
self.primary.set_node_manifest(getattr(self.collect_md.nodes,
self.primary.address))
if self.opts.cluster_type:
if self.opts.cluster_type == 'none':
self.cluster = self.clusters['jbon']
else:
self.cluster = self.clusters[self.opts.cluster_type]
self.cluster_type = self.opts.cluster_type
self.cluster.primary = self.primary
else:
self.determine_cluster()
if self.cluster is None and not self.opts.nodes:
msg = ('Cluster type could not be determined and no nodes provided'
'\nAborting...')
self.exit(msg, 1)
elif self.cluster is None and self.opts.nodes:
self.log_info("Cluster type could not be determined, but --nodes "
"is provided. Attempting to continue using JBON "
"cluster type and the node list")
self.cluster = self.clusters['jbon']
self.cluster_type = 'none'
self.collect_md.add_field('cluster_type', self.cluster_type)
if self.cluster:
self.primary.cluster = self.cluster
if self.opts.transport == 'auto':
self.opts.transport = self.cluster.set_transport_type()
self.cluster.setup()
if self.cluster.cluster_ssh_key:
if not self.opts.ssh_key:
self.log_debug("Updating SSH key to %s per cluster"
% self.cluster.cluster_ssh_key)
self.opts.ssh_key = self.cluster.cluster_ssh_key
self.get_nodes()
if self.opts.save_group:
gname = self.opts.save_group
try:
fname = self.write_host_group()
self.log_info("Wrote group '%s' to %s" % (gname, fname))
except Exception as err:
self.log_error("Could not save group %s: %s" % (gname, err))
def display_nodes(self):
"""Prints a list of nodes to collect from, if available. If no nodes
are discovered or provided, abort.
"""
self.ui_log.info('')
if not self.node_list and not self.primary.connected:
self.exit('No nodes were detected, or nodes do not have sos '
'installed.\nAborting...', 1)
self.ui_log.info('The following is a list of nodes to collect from:')
if self.primary.connected and self.primary.hostname is not None:
if not (self.primary.local and self.opts.no_local):
self.ui_log.info('\t%-*s' % (self.commons['hostlen'],
self.primary.hostname))
for node in sorted(self.node_list):
self.ui_log.info("\t%-*s" % (self.commons['hostlen'], node))
self.ui_log.info('')
if not self.opts.batch:
try:
input("\nPress ENTER to continue with these nodes, or press "
"CTRL-C to quit\n")
self.ui_log.info("")
except KeyboardInterrupt:
self.exit("Exiting on user cancel", 130)
except Exception as e:
self.exit(repr(e), 1)
def configure_sos_cmd(self):
"""Configures the sosreport command that is run on the nodes"""
self.sos_cmd = 'sosreport --batch '
if self.opts.sos_opt_line:
filt = ['&', '|', '>', '<', ';']
if any(f in self.opts.sos_opt_line for f in filt):
self.log_warn('Possible shell script found in provided sos '
'command. Ignoring --sos-opt-line entirely.')
self.opts.sos_opt_line = None
else:
self.sos_cmd = '%s %s' % (
self.sos_cmd, quote(self.opts.sos_opt_line))
self.log_debug("User specified manual sosreport command. "
"Command set to %s" % self.sos_cmd)
return True
sos_opts = []
if self.opts.case_id:
sos_opts.append('--case-id=%s' % (quote(self.opts.case_id)))
if self.opts.alloptions:
sos_opts.append('--alloptions')
if self.opts.all_logs:
sos_opts.append('--all-logs')
if self.opts.verify:
sos_opts.append('--verify')
if self.opts.log_size:
sos_opts.append(('--log-size=%s' % quote(str(self.opts.log_size))))
if self.opts.sysroot:
sos_opts.append('-s %s' % quote(self.opts.sysroot))
if self.opts.chroot:
sos_opts.append('-c %s' % quote(self.opts.chroot))
if self.opts.compression_type != 'auto':
sos_opts.append('-z %s' % (quote(self.opts.compression_type)))
self.sos_cmd = self.sos_cmd + ' '.join(sos_opts)
self.log_debug("Initial sos cmd set to %s" % self.sos_cmd)
self.commons['sos_cmd'] = self.sos_cmd
self.collect_md.add_field('initial_sos_cmd', self.sos_cmd)
def connect_to_primary(self):
"""If run with --primary, we will run cluster checks again that
instead of the localhost.
"""
try:
self.primary = SosNode(self.opts.primary, self.commons)
self.ui_log.info('Connected to %s, determining cluster type...'
% self.opts.primary)
except Exception as e:
self.log_debug('Failed to connect to primary node: %s' % e)
self.exit('Could not connect to primary node. Aborting...', 1)
def determine_cluster(self):
"""This sets the cluster type and loads that cluster's cluster.
If no cluster type is matched and no list of nodes is provided by
the user, then we abort.
If a list of nodes is given, this is not run, however the cluster
can still be run if the user sets a --cluster-type manually
"""
checks = list(self.clusters.values())
for cluster in self.clusters.values():
checks.remove(cluster)
cluster.primary = self.primary
if cluster.check_enabled():
cname = cluster.__class__.__name__
self.log_debug("Installation matches %s, checking for layered "
"profiles" % cname)
for remaining in checks:
if issubclass(remaining.__class__, cluster.__class__):
rname = remaining.__class__.__name__
self.log_debug("Layered profile %s found. "
"Checking installation"
% rname)
remaining.primary = self.primary
if remaining.check_enabled():
self.log_debug("Installation matches both layered "
"profile %s and base profile %s, "
"setting cluster type to layered "
"profile" % (rname, cname))
cluster = remaining
break
self.cluster = cluster
self.cluster_type = cluster.name()
self.commons['cluster'] = self.cluster
self.ui_log.info(
'Cluster type set to %s' % self.cluster_type)
break
def get_nodes_from_cluster(self):
"""Collects the list of nodes from the determined cluster cluster"""
if self.cluster_type:
nodes = self.cluster._get_nodes()
self.log_debug('Node list: %s' % nodes)
return nodes
return []
def reduce_node_list(self):
"""Reduce duplicate entries of the localhost and/or primary node
if applicable"""
if (self.hostname in self.node_list and self.opts.no_local):
self.node_list.remove(self.hostname)
for i in self.ip_addrs:
if i in self.node_list:
self.node_list.remove(i)
# remove the primary node from the list, since we already have
# an open session to it.
if self.primary is not None:
for n in self.node_list:
if n == self.primary.hostname or n == self.opts.primary:
self.node_list.remove(n)
self.node_list = list(set(n for n in self.node_list if n))
self.log_debug('Node list reduced to %s' % self.node_list)
self.collect_md.add_list('node_list', self.node_list)
def compare_node_to_regex(self, node):
"""Compares a discovered node name to a provided list of nodes from
the user. If there is not a match, the node is removed from the list"""
for regex in self.opts.nodes:
try:
regex = fnmatch.translate(regex)
if re.match(regex, node):
return True
except re.error as err:
msg = 'Error comparing %s to provided node regex %s: %s'
self.log_debug(msg % (node, regex, err))
return False
def get_nodes(self):
""" Sets the list of nodes to collect sosreports from """
if not self.primary and not self.cluster:
msg = ('Could not determine a cluster type and no list of '
'nodes or primary node was provided.\nAborting...'
)
self.exit(msg, 1)
try:
nodes = self.get_nodes_from_cluster()
if self.opts.nodes:
for node in nodes:
if self.compare_node_to_regex(node):
self.node_list.append(node)
else:
self.node_list = nodes
except Exception as e:
self.log_debug("Error parsing node list: %s" % e)
self.log_debug('Setting node list to --nodes option')
self.node_list = self.opts.nodes
for node in self.node_list:
if any(i in node for i in ('*', '\\', '?', '(', ')', '/')):
self.node_list.remove(node)
# force add any non-regex node strings from nodes option
if self.opts.nodes:
for node in self.opts.nodes:
if any(i in node for i in '*\\?()/[]'):
continue
if node not in self.node_list:
self.log_debug("Force adding %s to node list" % node)
self.node_list.append(node)
if not self.primary:
host = self.hostname.split('.')[0]
# trust the local hostname before the node report from cluster
for node in self.node_list:
if host == node.split('.')[0]:
self.node_list.remove(node)
if not self.cluster.strict_node_list:
self.node_list.append(self.hostname)
self.reduce_node_list()
try:
_node_max = len(max(self.node_list, key=len))
self.commons['hostlen'] = max(_node_max, self.commons['hostlen'])
except (TypeError, ValueError):
pass
def _connect_to_node(self, node):
"""Try to connect to the node, and if we can add to the client list to
run sosreport on
Positional arguments
node - a tuple specifying (address, password). If no password, set
to None
"""
try:
client = SosNode(node[0], self.commons, password=node[1])
client.set_cluster(self.cluster)
if client.connected:
self.client_list.append(client)
self.collect_md.nodes.add_section(node[0])
client.set_node_manifest(getattr(self.collect_md.nodes,
node[0]))
else:
client.disconnect()
except Exception:
# all exception logging is handled within SoSNode
pass
def intro(self):
"""Print the intro message and prompts for a case ID if one is not
provided on the command line
"""
disclaimer = ("""\
This utility is used to collect sos reports from multiple \
nodes simultaneously. Remote connections are made and/or maintained \
to those nodes via well-known transport protocols such as SSH.
An archive of sos report tarballs collected from the nodes will be \
generated in %s and may be provided to an appropriate support representative.
The generated archive may contain data considered sensitive \
and its content should be reviewed by the originating \
organization before being passed to any third party.
No configuration changes will be made to the system running \
this utility or remote systems that it connects to.
""")
self.ui_log.info("\nsos-collector (version %s)\n" % __version__)
intro_msg = self._fmt_msg(disclaimer % self.tmpdir)
self.ui_log.info(intro_msg)
prompt = "\nPress ENTER to continue, or CTRL-C to quit\n"
if not self.opts.batch:
try:
input(prompt)
self.ui_log.info("")
except KeyboardInterrupt:
self.exit("Exiting on user cancel", 130)
except Exception as e:
self.exit(e, 1)
def execute(self):
if self.opts.list_options:
self.list_options()
self.exit()
self.intro()
self.configure_sos_cmd()
self.prep()
self.display_nodes()
self.archive_name = self._get_archive_name()
self.setup_archive(name=self.archive_name)
self.archive_path = self.archive.get_archive_path()
self.archive.makedirs('sos_logs', 0o755)
self.collect()
self.exit()
def collect(self):
""" For each node, start a collection thread and then tar all
collected sosreports """
if self.primary.connected:
self.client_list.append(self.primary)
self.ui_log.info("\nConnecting to nodes...")
filters = [self.primary.address, self.primary.hostname]
nodes = [(n, None) for n in self.node_list if n not in filters]
if self.opts.password_per_node:
_nodes = []
for node in nodes:
msg = ("Please enter the password for %s@%s: "
% (self.opts.ssh_user, node[0]))
node_pwd = getpass(msg)
_nodes.append((node[0], node_pwd))
nodes = _nodes
try:
pool = ThreadPoolExecutor(self.opts.jobs)
pool.map(self._connect_to_node, nodes, chunksize=1)
pool.shutdown(wait=True)
if (self.opts.no_local and
self.client_list[0].address == 'localhost'):
self.client_list.pop(0)
self.report_num = len(self.client_list)
if self.report_num == 0:
self.exit("No nodes connected. Aborting...", 1)
elif self.report_num == 1:
if self.client_list[0].address == 'localhost':
self.exit(
"Collection would only gather from localhost due to "
"failure to either enumerate or connect to cluster "
"nodes. Assuming single collection from localhost is "
"not desired.\n"
"Aborting...", 1
)
self.ui_log.info("\nBeginning collection of sosreports from %s "
"nodes, collecting a maximum of %s "
"concurrently\n"
% (self.report_num, self.opts.jobs))
npool = ThreadPoolExecutor(self.opts.jobs)
npool.map(self._finalize_sos_cmd, self.client_list, chunksize=1)
npool.shutdown(wait=True)
pool = ThreadPoolExecutor(self.opts.jobs)
pool.map(self._collect, self.client_list, chunksize=1)
pool.shutdown(wait=True)
except KeyboardInterrupt:
self.exit("Exiting on user cancel\n", 130, force=True)
except Exception as err:
msg = 'Could not connect to nodes: %s' % err
self.exit(msg, 1, force=True)
if hasattr(self.cluster, 'run_extra_cmd'):
self.ui_log.info('Collecting additional data from primary node...')
files = self.cluster._run_extra_cmd()
if files:
self.primary.collect_extra_cmd(files)
msg = '\nSuccessfully captured %s of %s sosreports'
self.log_info(msg % (self.retrieved, self.report_num))
self.close_all_connections()
if self.retrieved > 0:
arc_name = self.create_cluster_archive()
else:
msg = 'No sosreports were collected, nothing to archive...'
self.exit(msg, 1)
if self.opts.upload and self.policy.get_upload_url():
try:
self.policy.upload_archive(arc_name)
self.ui_log.info("Uploaded archive successfully")
except Exception as err:
self.ui_log.error("Upload attempt failed: %s" % err)
def _finalize_sos_cmd(self, client):
"""Calls finalize_sos_cmd() on each node so that we have the final
command before we thread out the actual execution of sos
"""
try:
client.finalize_sos_cmd()
except Exception as err:
self.log_error("Could not finalize sos command for %s: %s"
% (client.address, err))
def _collect(self, client):
"""Runs sosreport on each node"""
try:
if not client.local:
client.sosreport()
else:
if not self.opts.no_local:
client.sosreport()
if client.retrieved:
self.retrieved += 1
except Exception as err:
self.log_error("Error running sosreport: %s" % err)
def close_all_connections(self):
"""Close all sessions for nodes"""
for client in self.client_list:
if client.connected:
self.log_debug('Closing connection to %s' % client.address)
client.disconnect()
def create_cluster_archive(self):
"""Calls for creation of tar archive then cleans up the temporary
files created by sos-collector"""
map_file = None
arc_paths = []
for host in self.client_list:
for fname in host.file_list:
arc_paths.append(fname)
do_clean = False
if self.opts.clean:
hook_commons = {
'policy': self.policy,
'tmpdir': self.tmpdir,
'sys_tmp': self.sys_tmp,
'options': self.opts,
'manifest': self.manifest
}
try:
self.ui_log.info('')
cleaner = SoSCleaner(in_place=True,
hook_commons=hook_commons)
cleaner.set_target_path(self.tmpdir)
map_file, arc_paths = cleaner.execute()
do_clean = True
except Exception as err:
self.ui_log.error("ERROR: unable to obfuscate reports: %s"
% err)
try:
self.log_info('Creating archive of sosreports...')
for fname in arc_paths:
dest = fname.split('/')[-1]
if do_clean:
dest = cleaner.obfuscate_string(dest)
name = os.path.join(self.tmpdir, fname)
self.archive.add_file(name, dest=dest)
if map_file:
# regenerate the checksum for the obfuscated archive
checksum = cleaner.get_new_checksum(fname)
if checksum:
name = os.path.join('checksums', fname.split('/')[-1])
name += '.sha256'
self.archive.add_string(checksum, name)
self.archive.add_file(self.sos_log_file,
dest=os.path.join('sos_logs', 'sos.log'))
self.archive.add_file(self.sos_ui_log_file,
dest=os.path.join('sos_logs', 'ui.log'))
if self.manifest is not None:
self.archive.add_final_manifest_data(
self.opts.compression_type
)
if do_clean:
_dir = os.path.join(self.tmpdir, self.archive._name)
cleaner.obfuscate_file(
os.path.join(_dir, 'sos_logs', 'sos.log'),
short_name='sos.log'
)
cleaner.obfuscate_file(
os.path.join(_dir, 'sos_logs', 'ui.log'),
short_name='ui.log'
)
cleaner.obfuscate_file(
os.path.join(_dir, 'sos_reports', 'manifest.json'),
short_name='manifest.json'
)
arc_name = self.archive.finalize(self.opts.compression_type)
final_name = os.path.join(self.sys_tmp, os.path.basename(arc_name))
if do_clean:
final_name = cleaner.obfuscate_string(
final_name.replace('.tar', '-obfuscated.tar')
)
os.rename(arc_name, final_name)
if map_file:
# rename the map file to match the collector archive name, not
# the temp dir it was constructed in
map_name = cleaner.obfuscate_string(
os.path.join(self.sys_tmp,
"%s_private_map" % self.archive_name)
)
os.rename(map_file, map_name)
self.ui_log.info("A mapping of obfuscated elements is "
"available at\n\t%s" % map_name)
self.soslog.info('Archive created as %s' % final_name)
self.ui_log.info('\nThe following archive has been created. '
'Please provide it to your support team.')
self.ui_log.info('\t%s\n' % final_name)
return final_name
except Exception as err:
msg = ("Could not finalize archive: %s\n\nData may still be "
"available uncompressed at %s" % (err, self.archive_path))
self.exit(msg, 2)
|