Hi experts,
In our test environment, some test cases will failed due to ansible run script moudle, this is hard to reproduce, so I write a script to reproduce this issue, below is the detail a bout this bug:
ANSIBLE VERSION
[root@172-20-12-5 ~]# ansible --version
ansible 1.9.6
configured module search path = None
[root@172-20-12-5 ~]#
CONFIGURATION
[root@172-20-12-5 ~]# cat /etc/ansible/ansible.cfg
# config file for ansible -- http://ansible.com/
# ==============================================
# nearly all parameters can be overridden in ansible-playbook
# or with command line flags. ansible will read ANSIBLE_CONFIG,
# ansible.cfg in the current working directory, .ansible.cfg in
# the home directory or /etc/ansible/ansible.cfg, whichever it
# finds first
[defaults]
# some basic default values...
inventory = /etc/ansible/hosts
#library = /usr/share/my_modules/
remote_tmp = $HOME/.ansible/tmp
pattern = *
forks = 5
poll_interval = 15
sudo_user = root
#ask_sudo_pass = True
#ask_pass = True
transport = smart
#remote_port = 22
module_lang = C
# plays will gather facts by default, which contain information about
# the remote system.
#
# smart - gather by default, but don't regather if already gathered
# implicit - gather by default, turn off with gather_facts: False
# explicit - do not gather by default, must say gather_facts: True
gathering = implicit
# additional paths to search for roles in, colon separated
#roles_path = /etc/ansible/roles
# uncomment this to disable SSH key host checking
#host_key_checking = False
# change this for alternative sudo implementations
sudo_exe = sudo
# what flags to pass to sudo
#sudo_flags = -H
# SSH timeout
timeout = 10
# default user to use for playbooks if user is not specified
# (/usr/bin/ansible will use current user as default)
#remote_user = root
# logging is off by default unless this path is defined
# if so defined, consider logrotate
#log_path = /var/log/ansible.log
# default module name for /usr/bin/ansible
#module_name = command
# use this shell for commands executed under sudo
# you may need to change this to bin/bash in rare instances
# if sudo is constrained
#executable = /bin/sh
# if inventory variables overlap, does the higher precedence one win
# or are hash values merged together? The default is 'replace' but
# this can also be set to 'merge'.
#hash_behaviour = replace
# list any Jinja2 extensions to enable here:
#jinja2_extensions = jinja2.ext.do,jinja2.ext.i18n
# if set, always use this private key file for authentication, same as
# if passing --private-key to ansible or ansible-playbook
#private_key_file = /path/to/file
# format of string {{ ansible_managed }} available within Jinja2
# templates indicates to users editing templates files will be replaced.
# replacing {file}, {host} and {uid} and strftime codes with proper values.
ansible_managed = Ansible managed: {file} modified on %Y-%m-%d %H:%M:%S by {uid} on {host}
# by default, ansible-playbook will display "Skipping [host]" if it determines a task
# should not be run on a host. Set this to "False" if you don't want to see these "Skipping"
# messages. NOTE: the task header will still be shown regardless of whether or not the
# task is skipped.
#display_skipped_hosts = True
# by default (as of 1.3), Ansible will raise errors when attempting to dereference
# Jinja2 variables that are not set in templates or action lines. Uncomment this line
# to revert the behavior to pre-1.3.
#error_on_undefined_vars = False
# by default (as of 1.6), Ansible may display warnings based on the configuration of the
# system running ansible itself. This may include warnings about 3rd party packages or
# other conditions that should be resolved if possible.
# to disable these warnings, set the following value to False:
#system_warnings = True
# by default (as of 1.4), Ansible may display deprecation warnings for language
# features that should no longer be used and will be removed in future versions.
# to disable these warnings, set the following value to False:
#deprecation_warnings = True
# (as of 1.8), Ansible can optionally warn when usage of the shell and
# command module appear to be simplified by using a default Ansible module
# instead. These warnings can be silenced by adjusting the following
# setting or adding warn=yes or warn=no to the end of the command line
# parameter string. This will for example suggest using the git module
# instead of shelling out to the git command.
# command_warnings = False
# set plugin path directories here, separate with colons
action_plugins = /usr/share/ansible_plugins/action_plugins
callback_plugins = /usr/share/ansible_plugins/callback_plugins
connection_plugins = /usr/share/ansible_plugins/connection_plugins
lookup_plugins = /usr/share/ansible_plugins/lookup_plugins
vars_plugins = /usr/share/ansible_plugins/vars_plugins
filter_plugins = /usr/share/ansible_plugins/filter_plugins
# by default callbacks are not loaded for /bin/ansible, enable this if you
# want, for example, a notification or logging callback to also apply to
# /bin/ansible runs
#bin_ansible_callbacks = False
# don't like cows? that's unfortunate.
# set to 1 if you don't want cowsay support or export ANSIBLE_NOCOWS=1
#nocows = 1
# don't like colors either?
# set to 1 if you don't want colors, or export ANSIBLE_NOCOLOR=1
#nocolor = 1
# the CA certificate path used for validating SSL certs. This path
# should exist on the controlling node, not the target nodes
# common locations:
# RHEL/CentOS: /etc/pki/tls/certs/ca-bundle.crt
# Fedora : /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem
# Ubuntu : /usr/share/ca-certificates/cacert.org/cacert.org.crt
#ca_file_path =
# the http user-agent string to use when fetching urls. Some web server
# operators block the default urllib user agent as it is frequently used
# by malicious attacks/scripts, so we set it to something unique to
# avoid issues.
#http_user_agent = ansible-agent
# if set to a persistent type (not 'memory', for example 'redis') fact values
# from previous runs in Ansible will be stored. This may be useful when
# wanting to use, for example, IP information from one group of servers
# without having to talk to them in the same playbook run to get their
# current IP information.
fact_caching = memory
# retry files
#retry_files_enabled = False
#retry_files_save_path = ~/.ansible-retry
[privilege_escalation]
#become=True
#become_method=sudo
#become_user=root
#become_ask_pass=False
[paramiko_connection]
# uncomment this line to cause the paramiko connection plugin to not record new host
# keys encountered. Increases performance on new host additions. Setting works independently of the
# host key checking setting above.
#record_host_keys=False
# by default, Ansible requests a pseudo-terminal for commands executed under sudo. Uncomment this
# line to disable this behaviour.
#pty=False
[ssh_connection]
# ssh arguments to use
# Leaving off ControlPersist will result in poor performance, so use
# paramiko on older platforms rather than removing it
#ssh_args = -o ControlMaster=auto -o ControlPersist=60s
# The path to use for the ControlPath sockets. This defaults to
# "%(directory)s/ansible-ssh-%%h-%%p-%%r", however on some systems with
# very long hostnames or very long path names (caused by long user names or
# deeply nested home directories) this can exceed the character limit on
# file socket names (108 characters for most platforms). In that case, you
# may wish to shorten the string below.
#
# Example:
# control_path = %(directory)s/%%h-%%r
#control_path = %(directory)s/ansible-ssh-%%h-%%p-%%r
# Enabling pipelining reduces the number of SSH operations required to
# execute a module on the remote server. This can result in a significant
# performance improvement when enabled, however when using "sudo:" you must
# first disable 'requiretty' in /etc/sudoers
#
# By default, this option is disabled to preserve compatibility with
# sudoers configurations that have requiretty (the default on many distros).
#
#pipelining = False
# if True, make ansible use scp if the connection type is ssh
# (default is sftp)
#scp_if_ssh = True
[accelerate]
accelerate_port = 5099
accelerate_timeout = 30
accelerate_connect_timeout = 5.0
# The daemon timeout is measured in minutes. This time is measured
# from the last activity to the accelerate daemon.
accelerate_daemon_timeout = 30
# If set to yes, accelerate_multi_key will allow multiple
# private keys to be uploaded to it, though each user must
# have access to the system via SSH to add a new key. The default
# is "no".
#accelerate_multi_key = yes
[selinux]
# file systems that require special treatment when dealing with security context
# the default behaviour that copies the existing context or uses the user default
# needs to be changed to use the file system dependant context.
#special_context_filesystems=nfs,vboxsf,fuse
OS / ENVIRONMENT
[root@172-20-12-5 ~]# cat /etc/system-release
CentOS Linux release 7.2.1511 (Core)
SUMMARY
I have a test script which will re-run a task on the host(localhost), but after a lot of times(sometimes only a few minutes) re-run the task, the task failed by report:
TASK: [pre-install script] ****************************************************
<172.20.12.5> ESTABLISH CONNECTION FOR USER: root
<172.20.12.5> EXEC ssh -C -tt -vvv -o ControlMaster=auto -o ControlPersist=60s -o ControlPath="/root/.ansible/cp/ansible-ssh-%h-%p-%r" -o KbdInteractiveAuthentication=no -o PreferredAuthentications=gssapi-with-mic,gssapi-keyex,hostbased,publickey -o PasswordAuthentication=no -o ConnectTimeout=10 172.20.12.5 /bin/sh -c 'mkdir -p $HOME/.ansible/tmp/ansible-tmp-1464595244.34-181895261530692 && echo $HOME/.ansible/tmp/ansible-tmp-1464595244.34-181895261530692'
<172.20.12.5> PUT /tmp/tmp_l2Pf4 TO tmp_l2Pf4
<172.20.12.5> EXEC ssh -C -tt -vvv -o ControlMaster=auto -o ControlPersist=60s -o ControlPath="/root/.ansible/cp/ansible-ssh-%h-%p-%r" -o KbdInteractiveAuthentication=no -o PreferredAuthentications=gssapi-with-mic,gssapi-keyex,hostbased,publickey -o PasswordAuthentication=no -o ConnectTimeout=10 172.20.12.5 /bin/sh -c 'chmod +rx tmp_l2Pf4'
<172.20.12.5> EXEC ssh -C -tt -vvv -o ControlMaster=auto -o ControlPersist=60s -o ControlPath="/root/.ansible/cp/ansible-ssh-%h-%p-%r" -o KbdInteractiveAuthentication=no -o PreferredAuthentications=gssapi-with-mic,gssapi-keyex,hostbased,publickey -o PasswordAuthentication=no -o ConnectTimeout=10 172.20.12.5 /bin/sh -c 'LANG=C LC_CTYPE=C tmp_l2Pf4 '
failed: [172.20.12.5] => {"changed": true, "rc": 127}
stderr: OpenSSH_6.6.1, OpenSSL 1.0.1e-fips 11 Feb 2013
debug1: Reading configuration data /etc/ssh/ssh_config
debug1: /etc/ssh/ssh_config line 57: Applying options for *
debug1: auto-mux: Trying existing master
debug2: fd 3 setting O_NONBLOCK
debug2: mux_client_hello_exchange: master version 4
debug3: mux_client_forwards: request forwardings: 0 local, 0 remote
debug3: mux_client_request_session: entering
debug3: mux_client_request_alive: entering
debug3: mux_client_request_alive: done pid = 30030
debug3: mux_client_request_session: session request sent
debug1: mux_client_request_session: master session id: 2
debug3: mux_client_read_packet: read header failed: Broken pipe
debug2: Received exit status from master 127
Shared connection to 172.20.12.5 closed.
stdout: /bin/sh: tmp_l2Pf4: command not found
FATAL: all hosts have already failed -- aborting
STEPS TO REPRODUCE
Only need to replace your real host ip and run the test_script.py will re-produce the bug.
BTW, don’t forget to add your host ip to /etc/ansible/hosts