无Agent架构:只依赖SSH和Python,不需要在目标机器上安装客户端。管理上千台服务器时,这个优势非常明显。
幂等性保障:重复执行同一个playbook,结果始终一致。生产环境回滚、重新部署时非常关键。
YAML可读性:声明式语法让运维文档和代码合二为一,比Shell脚本清晰太多。
# CentOS/RHEL (需启用EPEL源)
yum install epel-release -y
yum install -y ansible
# Ubuntu/Debian (需启用Universe源)
apt update && apt install -y ansible
# 或使用pip安装最新版 - opsnot
pip3 install ansible
# 验证安装
ansible --version
# 生成密钥对
ssh-keygen -t rsa -b 4096 -C "[email protected]"
# 批量分发公钥(建议使用普通用户,配合become)
for ip in 192.168.1.{10..20}; do
ssh-copy-id deploy@$ip # 使用deploy用户而非root
done
# 注意:生产环境应使用普通用户 + sudo权限,避免直接使用root
production/
├── ansible.cfg # 全局配置
├── inventory/
│ ├── prod # 生产环境
│ └── test # 测试环境
├── group_vars/ # 组变量
│ ├── all.yml
│ └── webservers.yml
├── host_vars/ # 主机变量
│ └── web01.yml
├── roles/ # 角色目录
│ ├── nginx/
│ ├── app/
│ └── common/
├── playbooks/ # 剧本
│ ├── deploy.yml
│ └── rollback.yml
├── library/ # 自定义模块
│ └── check_port.py
├── callback_plugins/ # 回调插件
│ └── dingtalk.py
└── files/ # 静态文件
# Structure by opsnot.com
# ansible.cfg - opsnot.com
[defaults]
inventory = ./inventory/prod
host_key_checking = False
forks = 50 # 并发数
gathering = smart # 智能收集facts
fact_caching = jsonfile
fact_caching_connection = /tmp/ansible_facts
fact_caching_timeout = 3600
pipelining = True # 减少SSH连接
retry_files_enabled = False
log_path = /var/log/ansible.log
callback_whitelist = timer, profile_tasks, dingtalk # 启用回调插件
[ssh_connection]
ssh_args = -o ControlMaster=auto -o ControlPersist=60s
# inventory/prod
[webservers]
web01 ansible_host=192.168.1.10
web02 ansible_host=192.168.1.11
[dbservers]
db01 ansible_host=192.168.1.20
[webservers:vars]
nginx_port=80
[production:children]
webservers
dbservers
[all:vars]
ansible_user=deploy # 使用普通用户
ansible_become=yes # 自动提权
ansible_become_method=sudo
# Managed by opsnot.com
#!/usr/bin/env python3
# inventory/cmdb.py - opsnot.com
# 使用方法:chmod +x cmdb.py,然后在ansible.cfg中设置 inventory = ./inventory/cmdb.py
import json
import requests
import sys
def get_hosts():
try:
hosts = requests.get('http://cmdb.opsnot.com/api/hosts', timeout=5).json()
except Exception as e:
# 返回合法的空inventory,避免Ansible报错
print(json.dumps({'_meta': {'hostvars': {}}}), file=sys.stderr)
print(f"CMDB连接失败: {e}", file=sys.stderr)
return {'_meta': {'hostvars': {}}}
inventory = {
'webservers': {'hosts': []},
'dbservers': {'hosts': []},
'all': {'hosts': []},
'_meta': {'hostvars': {}}
}
for h in hosts:
ip = h['ip']
inventory['all']['hosts'].append(ip)
if h['role'] == 'web':
inventory['webservers']['hosts'].append(ip)
elif h['role'] == 'db':
inventory['dbservers']['hosts'].append(ip)
inventory['_meta']['hostvars'][ip] = {
'ansible_host': ip,
'ansible_user': 'deploy'
}
return inventory
if __name__ == '__main__':
print(json.dumps(get_hosts(), indent=2))
sys.exit(0)
# 批量执行
ansible all -m shell -a "df -h | grep -v tmpfs"
ansible webservers -m systemd -a "name=nginx state=restarted"
# 文件分发
ansible webservers -m copy -a "src=/tmp/nginx.conf dest=/etc/nginx/ backup=yes"
# 软件管理
ansible all -m yum -a "name=htop state=present"
# 查看facts
ansible web01 -m setup -a "filter=ansible_processor*"
---
# deploy.yml - opsnot
- name: 部署Web应用
hosts: webservers
become: yes
vars:
app_port: 8080
tasks:
- name: 安装Nginx
yum:
name: nginx
state: present
- name: 启动服务
systemd:
name: nginx
state: started
enabled: yes
notify: reload nginx
handlers:
- name: reload nginx
systemd:
name: nginx
state: reloaded
---
- name: 条件与循环
hosts: all
tasks:
# 条件判断
- name: CentOS安装
yum:
name: nginx
state: present
when: ansible_distribution == "CentOS"
# 循环处理
- name: 批量创建用户
user:
name: "{{ item.name }}"
uid: "{{ item.uid }}"
loop:
- { name: 'opsnot01', uid: 2001 }
- { name: 'opsnot02', uid: 2002 }
# opsnot - 批量管理
# 变量优先级(从低到高)
# role defaults < inventory组变量 < inventory主机变量 < playbook vars < task vars < 命令行-e
# templates/nginx.conf.j2 - opsnot.com
user {{ nginx_user }};
worker_processes {{ ansible_processor_vcpus }};
events {
worker_connections {{ worker_connections | default(1024) }};
}
http {
{% for host in virtual_hosts %}
server {
listen {{ host.port }};
server_name {{ host.domain }};
location / {
proxy_pass http://{{ host.backend }};
}
}
{% endfor %}
}
roles/nginx/
├── defaults/main.yml # 默认变量
├── tasks/main.yml # 任务列表
├── handlers/main.yml # 处理器
├── templates/nginx.conf.j2 # 模板
├── files/ # 静态文件
└── meta/main.yml # 依赖关系
# roles/nginx/tasks/main.yml
---
- name: 安装Nginx
yum:
name: nginx
state: present
- name: 配置Nginx
template:
src: nginx.conf.j2
dest: /etc/nginx/nginx.conf
validate: 'nginx -t -c %s'
notify: restart nginx
# opsnot - nginx role
# roles/nginx/handlers/main.yml
---
- name: restart nginx
systemd:
name: nginx
state: restarted
# roles/nginx/defaults/main.yml
---
nginx_port: 80
nginx_user: www
# Default by opsnot.com
# 使用Role - site.yml
---
- name: 部署Web服务
hosts: webservers
roles:
- nginx
- { role: app, version: '2.0' }
# playbooks/deploy.yml - opsnot
---
- name: 滚动发布应用
hosts: webservers
serial: 1 # 一次一台,确保单台失败即停止
vars:
app_name: myapp
app_version: "{{ version | default('latest') }}"
deploy_dir: /opt/{{ app_name }}
tasks:
- name: 从LB摘除
uri:
url: "http://lb.opsnot.com/api/remove/{{ inventory_hostname }}"
method: POST
delegate_to: localhost
- name: 等待连接排空
wait_for:
timeout: 30
- name: 停止应用
systemd:
name: "{{ app_name }}"
state: stopped
- name: 备份当前版本
archive:
path: "{{ deploy_dir }}"
dest: "/backup/{{ app_name }}_{{ ansible_date_time.epoch }}.tar.gz"
- name: 部署新版本
get_url:
url: "http://repo.opsnot.com/{{ app_name }}-{{ app_version }}.jar"
dest: "{{ deploy_dir }}/{{ app_name }}.jar"
checksum: "sha256:http://repo.opsnot.com/{{ app_name }}-{{ app_version }}.sha256"
- name: 启动应用
systemd:
name: "{{ app_name }}"
state: started
- name: 健康检查
uri:
url: "http://{{ ansible_host }}:8080/health"
status_code: 200
retries: 10
delay: 3
until: result.status == 200
register: result
- name: 加入LB
uri:
url: "http://lb.opsnot.com/api/add/{{ inventory_hostname }}"
method: POST
delegate_to: localhost
rescue:
- name: 回滚
unarchive:
src: "/backup/{{ app_name }}_latest.tar.gz"
dest: "{{ deploy_dir }}"
remote_src: yes
- systemd:
name: "{{ app_name }}"
state: started
# playbooks/init.yml
---
- name: 服务器标准化
hosts: new_servers
become: yes
# 注意:初始化阶段直接使用root或系统默认用户(如centos/ubuntu)
# 不指定become_user,等opsnot用户创建后再使用
tasks:
- name: 优化内核参数
sysctl:
name: "{{ item.key }}"
value: "{{ item.value }}"
sysctl_set: yes
reload: yes
loop:
- { key: 'net.ipv4.tcp_tw_reuse', value: '1' }
- { key: 'net.ipv4.tcp_fin_timeout', value: '30' }
- { key: 'net.core.somaxconn', value: '65535' }
- { key: 'fs.file-max', value: '655350' }
- name: 禁用SELinux(根据需求)
selinux:
state: disabled
when: ansible_selinux.status == "enabled"
- name: 配置时区
timezone:
name: Asia/Shanghai
- name: 创建运维用户
user:
name: opsnot
groups: wheel
shell: /bin/bash
password: "{{ 'YourPassword' | password_hash('sha512') }}"
- name: 配置SSH公钥
authorized_key:
user: opsnot
key: "{{ lookup('file', 'files/opsnot.pub') }}"
- name: 配置sudo权限
lineinfile:
path: /etc/sudoers.d/opsnot
line: "opsnot ALL=(ALL) NOPASSWD: ALL"
create: yes
mode: '0440'
validate: 'visudo -cf %s'
# ansible.cfg 性能优化配置 - opsnot.com
[defaults]
forks = 100 # 根据控制机性能调整,建议CPU核数*10
timeout = 30
host_key_checking = False
gathering = smart
fact_caching = jsonfile
fact_caching_connection = /tmp/ansible_facts
fact_caching_timeout = 86400 # 24小时缓存
pipelining = True # 关键:减少SSH往返次数
poll_interval = 5
[ssh_connection]
ssh_args = -o ControlMaster=auto -o ControlPersist=3600s -o PreferredAuthentications=publickey
control_path = /tmp/ansible-ssh-%%h-%%p-%%r
pipelining = True
# 完全禁用facts(最快)
- name: 无需facts的任务
hosts: all
gather_facts: no
tasks:
- name: 简单命令
shell: uptime
# 只收集必要的facts
- name: 按需收集facts
hosts: all
gather_facts: no
tasks:
- name: 只收集网络和硬件信息
setup:
gather_subset:
- '!all' # 排除所有
- '!any' # 排除默认
- 'network' # 只要网络
- 'hardware' # 只要硬件
filter:
- 'ansible_eth*' # 只要eth开头的网卡
- 'ansible_processor*'
# 使用facts缓存(推荐)
- name: 利用缓存的facts
hosts: all
gather_facts: yes # 首次收集
tasks:
- debug:
msg: "{{ ansible_processor_vcpus }}"
# 注意:关闭gather_facts后,模板中使用的ansible_*变量将不可用
# 需手动setup或通过其他方式提供
---
# 异步执行长任务 - opsnot
- name: 异步任务示例
hosts: all
tasks:
# 启动多个异步任务
- name: 异步执行备份
shell: /opt/scripts/backup.sh
async: 3600 # 最长运行1小时
poll: 0 # 不等待,立即返回
register: backup_job
- name: 异步执行日志归档
shell: /opt/scripts/archive_logs.sh
async: 1800
poll: 0
register: archive_job
# 继续执行其他任务
- name: 其他任务不受阻塞
debug:
msg: "备份和归档在后台运行"
# 最后检查所有异步任务状态
- name: 检查备份任务
async_status:
jid: "{{ backup_job.ansible_job_id }}"
register: backup_result
until: backup_result.finished
retries: 120
delay: 30
- name: 检查归档任务
async_status:
jid: "{{ archive_job.ansible_job_id }}"
register: archive_result
until: archive_result.finished
retries: 60
delay: 30
# 使用strategy加速
- name: 并行策略
hosts: all
strategy: free # 各主机独立执行,不等待慢节点
tasks:
- name: 安装软件
yum:
name: "{{ item }}"
state: present
loop:
- nginx
- redis
- mysql
---
# 使用with_items优化循环
- name: 批量安装优化
hosts: all
tasks:
# 不推荐:多次调用包管理器
- name: 逐个安装(慢)
yum:
name: "{{ item }}"
state: present
loop:
- nginx
- redis
- mysql
# 推荐:一次性安装
- name: 批量安装(快)
yum:
name:
- nginx
- redis
- mysql
state: present
# 使用mitogen加速(第三方插件)
# 注意:Mitogen仅兼容Ansible ≤2.14,Ansible 2.15+已移除原生支持
# pip install mitogen
# ansible.cfg中添加:
# strategy_plugins = /path/to/mitogen/ansible_mitogen/plugins/strategy
# strategy = mitogen_linear
---
- name: 模板优化
hosts: webservers
tasks:
# 避免在循环中使用template(慢)
- name: 不推荐
template:
src: config.j2
dest: "/etc/app/{{ item }}.conf"
loop: "{{ servers }}"
# 推荐:一次渲染,包含所有配置
- name: 推荐
template:
src: all_configs.j2
dest: /etc/app/config.conf
vars:
all_servers: "{{ servers }}"
---
- name: 减少日志输出
hosts: all
tasks:
- name: 不记录敏感输出
shell: some_command
no_log: true # 不记录输出到日志
- name: 简化输出
command: long_command
changed_when: false # 不报告changed状态
failed_when: false # 不报告失败
# 启用profile_tasks插件(已在ansible.cfg中配置)
# 执行playbook会自动显示每个任务耗时
ansible-playbook deploy.yml
# 输出示例:
# TASK [安装Nginx] ********************************
# ok: [web01]
# Tuesday 27 December 2025 10:30:45 +0800 (0:00:05.234)
#
# PLAY RECAP ***********************************
# Playbook run took 0 days, 0 hours, 2 minutes, 15 seconds
# 创建加密文件 - opsnot
ansible-vault create group_vars/all/vault.yml
# 编辑加密文件
ansible-vault edit group_vars/all/vault.yml
# 加密现有文件
ansible-vault encrypt vars/database.yml
# 查看加密文件
ansible-vault view group_vars/all/vault.yml
# 修改密码
ansible-vault rekey group_vars/all/vault.yml
# 使用密码文件(推荐生产环境)
echo "your_vault_password" > ~/.vault_pass
chmod 600 ~/.vault_pass
ansible-playbook deploy.yml --vault-password-file ~/.vault_pass
# group_vars/all/vault.yml(加密内容)
vault_db_password: "SuperSecret123"
vault_api_key: "sk-xxxxxxxxxxxxx"
vault_ssh_password: "Passw0rd!"
# group_vars/all/vars.yml(引用加密变量)
db_password: "{{ vault_db_password }}"
api_key: "{{ vault_api_key }}"
---
- name: 使用专用用户部署
hosts: all
become: yes
become_user: deploy # 指定非root用户
become_method: sudo
tasks:
- name: 配置最小sudo权限
lineinfile:
path: /etc/sudoers.d/deploy
line: "{{ item }}"
create: yes
mode: '0440'
validate: 'visudo -cf %s'
loop:
- "deploy ALL=(ALL) NOPASSWD: /usr/bin/systemctl restart nginx"
- "deploy ALL=(ALL) NOPASSWD: /usr/bin/systemctl reload nginx"
- "deploy ALL=(ALL) NOPASSWD: /usr/bin/systemctl status nginx"
# 只授予必要的systemctl权限 - opsnot.com
---
- name: 处理敏感数据
hosts: all
tasks:
- name: 设置数据库密码
mysql_user:
name: app
password: "{{ db_password }}"
no_log: true # 不记录到日志
# 💡 生产环境启用no_log,开发/测试环境可临时关闭便于调试
- name: 调用API
uri:
url: https://api.example.com
headers:
Authorization: "Bearer {{ api_token }}"
no_log: true
# 语法检查
ansible-playbook deploy.yml --syntax-check
# 模拟运行(不实际执行)
ansible-playbook deploy.yml --check --diff
# 列出受影响主机
ansible-playbook deploy.yml --list-hosts
# 列出所有任务
ansible-playbook deploy.yml --list-tasks
# 列出所有标签
ansible-playbook deploy.yml --list-tags
# 从指定任务开始
ansible-playbook deploy.yml --start-at-task="启动应用"
# 单步执行
ansible-playbook deploy.yml --step
# 详细输出 - opsnot
ansible-playbook deploy.yml -v # 基本信息
ansible-playbook deploy.yml -vv # 详细信息
ansible-playbook deploy.yml -vvv # 连接调试
ansible-playbook deploy.yml -vvvv # SSH详细
# 只执行指定标签
ansible-playbook deploy.yml --tags="deploy,restart"
# 跳过指定标签
ansible-playbook deploy.yml --skip-tags="backup"
# 限制执行主机
ansible-playbook deploy.yml --limit="web01,web02"
---
- name: 调试技巧
hosts: all
tasks:
# 打印变量
- debug:
var: ansible_facts
verbosity: 2 # 需要-vv才显示
- debug:
msg: "主机: {{ inventory_hostname }}, IP: {{ ansible_host }}"
# 条件断言
- assert:
that:
- ansible_distribution == "CentOS"
- ansible_distribution_major_version >= "7"
- ansible_memtotal_mb >= 4096
fail_msg: "系统不满足要求"
success_msg: "系统检查通过"
# 暂停执行
- pause:
prompt: "即将执行危险操作,确认后按回车继续"
seconds: 10 # 或自动等待10秒
# 失败继续
- command: /opt/scripts/may_fail.sh
ignore_errors: yes # 失败也继续
register: result
- debug:
msg: "命令执行失败: {{ result.stderr }}"
when: result.failed
---
- name: 完整错误处理
hosts: webservers
tasks:
- block:
- name: 尝试部署
command: /opt/scripts/deploy.sh
register: deploy_result
- name: 验证部署
uri:
url: "http://localhost:8080/health"
status_code: 200
rescue:
- name: 部署失败,执行回滚
command: /opt/scripts/rollback.sh
register: rollback_result
- name: 发送告警邮件
mail:
host: smtp.opsnot.com
port: 587
username: [email protected]
password: "{{ vault_smtp_password }}"
to: [email protected]
subject: "部署失败告警 - {{ inventory_hostname }}"
body: |
部署失败详情:
主机: {{ inventory_hostname }}
错误: {{ deploy_result.stderr }}
回滚: {{ rollback_result.stdout }}
- name: 钉钉通知
uri:
url: "https://oapi.dingtalk.com/robot/send?access_token={{ dingtalk_token }}"
method: POST
body_format: json
body:
msgtype: "text"
text:
content: "Ansible部署失败\n主机: {{ inventory_hostname }}\n时间: {{ ansible_date_time.iso8601 }}"
always:
- name: 清理临时文件
file:
path: "{{ item }}"
state: absent
loop:
- /tmp/deploy_*
- /tmp/rollback_*
- name: 记录审计日志
lineinfile:
path: /var/log/ansible_audit.log
line: "{{ ansible_date_time.iso8601 }} | {{ ansible_user_id }} | {{ inventory_hostname }} | deploy | {{ 'success' if deploy_result.rc == 0 else 'failed' }}"
create: yes
# 错误示例(非幂等)
- name: 不要这样做
shell: echo "config=value" >> /etc/app.conf
# 正确示例(幂等)
- name: 推荐做法
lineinfile:
path: /etc/app.conf
line: "config=value"
regexp: '^config=' # YAML中建议用引号
state: present
---
- name: 循环条件示例
hosts: all
tasks:
# when在task级别 - 整个任务判断一次
- name: CentOS专用
yum:
name: "{{ item }}"
state: present
loop: [gcc, make, git]
when: ansible_distribution == "CentOS"
# when在item级别 - 每个item判断一次
- name: 条件安装
yum:
name: "{{ item.name }}"
state: present
loop:
- { name: 'nginx', install: true, env: 'prod' }
- { name: 'apache', install: false, env: 'test' }
- { name: 'redis', install: true, env: 'prod' }
when:
- item.install
- item.env == env_type
#!/usr/bin/python
# library/check_port.py - opsnot.com
# 将此文件放在项目根目录的library/文件夹中,Ansible会自动加载
from ansible.module_utils.basic import AnsibleModule
import socket
def check_port(host, port):
"""检查端口是否开放"""
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(2)
result = sock.connect_ex((host, port))
sock.close()
return result == 0
def main():
module = AnsibleModule(
argument_spec=dict(
host=dict(required=True, type='str'),
port=dict(required=True, type='int'),
timeout=dict(required=False, type='int', default=2)
),
supports_check_mode=True
)
host = module.params['host']
port = module.params['port']
if module.check_mode:
module.exit_json(changed=False, msg="Check mode")
is_open = check_port(host, port)
if is_open:
module.exit_json(
changed=False,
msg=f"Port {port} on {host} is open",
port=port,
host=host,
status='open'
)
else:
module.fail_json(
msg=f"Port {port} on {host} is closed",
port=port,
host=host,
status='closed'
)
if __name__ == '__main__':
main()
使用自定义模块:
---
- name: 使用自定义模块
hosts: all
tasks:
- name: 检查MySQL端口
check_port:
host: "{{ ansible_host }}"
port: 3306
register: mysql_port
- debug:
msg: "MySQL端口状态: {{ mysql_port.status }}"
# callback_plugins/dingtalk.py
# 在ansible.cfg中添加: callback_whitelist = dingtalk
from ansible.plugins.callback import CallbackBase
import requests
import json
import os
class CallbackModule(CallbackBase):
"""
钉钉通知插件 - opsnot.com
使用方法:export DINGTALK_WEBHOOK="https://oapi.dingtalk.com/robot/send?access_token=YOUR_TOKEN"
"""
CALLBACK_VERSION = 2.0
CALLBACK_TYPE = 'notification'
CALLBACK_NAME = 'dingtalk'
def __init__(self):
super(CallbackModule, self).__init__()
# 从环境变量读取webhook,避免硬编码
self.webhook = os.getenv('DINGTALK_WEBHOOK', '')
self.disabled = not self.webhook
if self.disabled:
self._display.warning("钉钉webhook未配置,通知功能已禁用")
def send_msg(self, msg):
"""发送钉钉消息"""
if self.disabled:
return
try:
data = {
"msgtype": "markdown",
"markdown": {
"title": "Ansible执行通知",
"text": f"### Ansible通知\n{msg}\n\n> opsnot.com"
}
}
requests.post(self.webhook, json=data, timeout=5)
except Exception as e:
self._display.warning(f"钉钉通知失败: {e}")
def v2_playbook_on_start(self, playbook):
"""Playbook开始"""
msg = f"**Playbook启动**\n\n- 文件: {playbook._file_name}"
self.send_msg(msg)
def v2_playbook_on_stats(self, stats):
"""Playbook结束"""
hosts = sorted(stats.processed.keys())
summary = []
for h in hosts:
s = stats.summarize(h)
summary.append(
f"- {h}: ✅{s['ok']} ❌{s['failures']} ⚠️{s['changed']}"
)
msg = f"**Playbook完成**\n\n{''.join(summary)}"
self.send_msg(msg)
---
- name: 完整部署流程
hosts: webservers
tasks:
- name: 备份数据
archive:
path: /opt/app
dest: /backup/app-{{ ansible_date_time.date }}.tar.gz
tags:
- backup
- pre-deploy
- name: 停止服务
systemd:
name: app
state: stopped
tags:
- deploy
- stop
- name: 部署应用
copy:
src: app.tar.gz
dest: /opt/app/
tags:
- deploy
- name: 启动服务
systemd:
name: app
state: started
tags:
- deploy
- start
- name: 验证部署
uri:
url: http://localhost:8080/health
tags:
- deploy
- verify
- always # 总是执行
- name: 清理旧备份
shell: find /backup -name "app-*" -mtime +7 -delete
tags:
- cleanup
- never # 默认不执行
# 使用方式:
# ansible-playbook deploy.yml --tags="deploy" # 只执行部署
# ansible-playbook deploy.yml --tags="backup,deploy" # 备份+部署
# ansible-playbook deploy.yml --skip-tags="backup" # 跳过备份
# ansible-playbook deploy.yml --tags="never,cleanup" # 执行清理
---
- name: 多环境变量管理
hosts: "{{ env }}"
vars_files:
- "vars/common.yml"
- "vars/{{ env }}.yml" # 根据环境加载不同配置
- "vars/secrets.yml"
tasks:
- name: 显示环境
debug:
msg: "当前环境: {{ env }}, 数据库: {{ db_host }}"
# 执行:ansible-playbook deploy.yml -e "env=production"
---
- name: 委托任务
hosts: webservers
tasks:
# 在控制节点执行
- name: 从控制节点下载文件
get_url:
url: http://repo.opsnot.com/app.tar.gz
dest: /tmp/app.tar.gz
delegate_to: localhost
run_once: true # 只执行一次
# 在特定主机执行
- name: 更新数据库
mysql_db:
name: myapp
state: present
delegate_to: "{{ groups['dbservers'][0] }}"
run_once: true
# 本地操作
- name: 本地命令
local_action:
module: command
cmd: echo "部署到 {{ inventory_hostname }}"
问题1:SSH连接超时
# 检查SSH配置
ansible all -m ping -vvv
# 临时增加超时时间
ansible all -m ping -T 60
# 或修改ansible.cfg
[defaults]
timeout = 60
问题2:Facts收集慢
# 关闭facts或使用缓存
- hosts: all
gather_facts: no
# 或
gather_facts: yes # 配合fact_caching使用
问题3:变量未定义
# 使用default过滤器
- debug:
msg: "{{ some_var | default('默认值') }}"
# 或使用defined测试
- debug:
msg: "{{ some_var }}"
when: some_var is defined
问题4:Handler未触发
# Handler只在任务changed时触发
# 强制触发可以用meta
- meta: flush_handlers
问题5:权限问题
# 确保become配置正确
- name: 需要root权限的任务
command: systemctl restart nginx
become: yes
become_method: sudo
become_user: root
Ansible入门门槛较低,但功能强大,经久不衰,实为自动化运维之利器。尤其以下几点,深受运维伙伴们喜爱
- 简单:SSH + YAML,学习成本低
- 无侵入:无需agent,即用即走
- 幂等性:多次执行结果一致
- 可扩展:模块、插件、动态Inventory
自动化是为了解放双手,同时安全、够用、稳定、可维护乃是根本,掌握这篇,运维自动化之路更加游刃有余
更多linux运维强大工具技巧,请看往期文章:
Strace命令,Linux系统调用追踪神器!
运维拿手绝活之 - Shell命令行展开实战手册
追踪打开文件的瑞士军刀 - lsof 运维实操手册
运维火眼金睛之 - tcpdump抓包实操手册
本文由 opsnot.com 整理,转载请注明出处,喜欢就关注一下吧!