Skip to content

Commit

Permalink
Replace /etc/hosts with Consul DNS
Browse files Browse the repository at this point in the history
 - Stop defining every host in /etc/hosts
 - The domain assigned to all nodes is defined in the pillar (node.dc1.pnda.local by default)
 - Replace most uses of pnda.ip_addresses salt function with pnda.get_hosts_for_role to use hostnames instead of IP addresses for all internal wiring

Nodes may now be replaced or added to the cluster without having to edit /etc/hosts on every node or update IP addresses in config files on existing nodes. The next phase of work will involve using Consul service DNS entries instead of node DNS entries to further prevent having to edit existing configuration (e.g. kafka broker strings) when nodes join or leave the cluster.

PNDA-4203
PNDA-4235
PNDA-4445
  • Loading branch information
jeclarke committed Mar 13, 2018
1 parent 4e2113f commit 088d51e
Show file tree
Hide file tree
Showing 37 changed files with 239 additions and 180 deletions.
3 changes: 3 additions & 0 deletions pillar/services.sls
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ kafka:
consul:
service: True
version: 1.0.3
domain: pnda.local
data_center: dc1
node: node

kafkatool:
release_version: v0.2.0
Expand Down
34 changes: 21 additions & 13 deletions salt/_modules/pnda.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ def get_name_service():
name_service = response['items'][0]['name']
return name_service


def cluster_name():
"""Returns PNDA cluster name of the minion"""
cname = __grains__['pnda_cluster']
Expand Down Expand Up @@ -71,35 +70,46 @@ def hbase_master_host():

def hadoop_manager_ip():
""" Returns the Cloudera Manager ip address"""
cm = ip_addresses('hadoop_manager')
cm = get_hosts_for_role('hadoop_manager')
if cm is not None and len(cm) > 0:
return cm[0]
else:
return None

def kafka_brokers_hosts():
"""Returns kafka brokers hosts"""
return get_hosts_for_role('kafka')

def kafka_brokers_ips():
"""Returns kafka brokers ip addresses"""
return ip_addresses('kafka')
def opentsdb_hosts():
"""Returns opentsdb node hosts"""
return get_hosts_for_role('opentsdb')

def opentsdb_ips():
"""Returns opentsdb nodes ip addresses"""
return ip_addresses('opentsdb')
def kafka_zookeepers_hosts():
"""Returns zookeeper hosts"""
return get_hosts_for_role('zookeeper')

def kafka_zookeepers_ips():
"""Returns zookeeper ip addresses"""
return ip_addresses('zookeeper')
return get_ips_for_role('zookeeper')

def ip_addresses(role):
def get_ips_for_role(role):
"""Returns ip addresses of minions having a specific role"""
query = "G@pnda_cluster:{} and G@roles:{}".format(cluster_name(), role)
result = __salt__['mine.get'](query, 'network.ip_addrs', 'compound').values()
# Only get first ip address
result = [r[0] for r in result]
return result if len(result) > 0 else None

def get_hosts_for_role(role):
"""Returns ip addresses of minions having a specific role"""
query = "G@pnda_cluster:{} and G@roles:{}".format(cluster_name(), role)
result = __salt__['mine.get'](query, 'network.ip_addrs', 'compound').keys()
# Add on the domain set in the pillar
result = [host_name for host_name in result]
return result if len(result) > 0 else None

def generate_http_link(role, suffix):
nodes = ip_addresses(role)
nodes = get_hosts_for_role(role)
if nodes is not None and len(nodes) > 0:
return 'http://%s%s' % (nodes[0], suffix)
else:
Expand Down Expand Up @@ -142,7 +152,6 @@ def cloudera_get_hosts_by_role(service, role_type):
def ambari_get_hosts_by_role(service, role_type):
return [socket.getfqdn(host['HostRoles']['host_name']) for host in ambari_request('/clusters/%s/services/%s/components/%s' % (cluster_name(),service,role_type))['host_components']]


def get_hosts_by_role(service, role_type):
if hadoop_distro() == 'CDH':
return cloudera_get_hosts_by_role(service, role_type)
Expand All @@ -162,7 +171,6 @@ def cloudera_get_service_status(service):

return service_resp['healthSummary']


def ambari_get_service_status(service):
user = hadoop_manager_username()
password = hadoop_manager_password()
Expand Down
2 changes: 1 addition & 1 deletion salt/ambari/agent.sls
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{%- set ambari_server_host = salt['pnda.ip_addresses']('hadoop_manager')[0] -%}
{%- set ambari_server_host = salt['pnda.get_hosts_for_role']('hadoop_manager')[0] -%}

ambari-agent-user:
user.present:
Expand Down
4 changes: 2 additions & 2 deletions salt/ambari/server.sls
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
{% set misc_packages_path = pillar['pnda_mirror']['misc_packages_path'] %}
{% set mirror_location = pnda_mirror + misc_packages_path %}
{% set jdbc_package = 'je-5.0.73.jar' %}
{%- set cm_host = salt['pnda.ip_addresses']('hadoop_manager')[0] -%}
{%- set cm_host = salt['pnda.get_hosts_for_role']('hadoop_manager')[0] -%}
{% set cmdb_user = pillar['hadoop_manager']['cmdb']['user'] %}
{% set cmdb_database = pillar['hadoop_manager']['cmdb']['database'] %}
{% set cmdb_password = pillar['hadoop_manager']['cmdb']['password'] %}
{%- set cmdb_host = salt['pnda.ip_addresses']('oozie_database')[0] -%}
{%- set cmdb_host = salt['pnda.get_hosts_for_role']('oozie_database')[0] -%}
{% set mysql_root_password = salt['pillar.get']('mysql:root_pw', 'mysqldefault') %}

include:
Expand Down
2 changes: 1 addition & 1 deletion salt/cdh/cloudera-manager-agent.sls
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{%- set cm_host = salt['pnda.ip_addresses']('hadoop_manager')[0] -%}
{%- set cm_host = salt['pnda.get_hosts_for_role']('hadoop_manager')[0] -%}

cloudera-manager-agent-install_daemons:
pkg.installed:
Expand Down
4 changes: 2 additions & 2 deletions salt/cdh/cloudera-manager.sls
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{%- set mysql_root_password = salt['pillar.get']('mysql:root_pw', 'mysqldefault') -%}
{%- set cmdb_host = salt['pnda.ip_addresses']('oozie_database')[0] -%}
{%- set cm_host = salt['pnda.ip_addresses']('hadoop_manager')[0] -%}
{%- set cmdb_host = salt['pnda.get_hosts_for_role']('oozie_database')[0] -%}
{%- set cm_host = salt['pnda.get_hosts_for_role']('hadoop_manager')[0] -%}
{% set cmdb_user = pillar['hadoop_manager']['cmdb']['user'] %}
{% set cmdb_database = pillar['hadoop_manager']['cmdb']['database'] %}
{% set cmdb_password = pillar['hadoop_manager']['cmdb']['password'] %}
Expand Down
38 changes: 38 additions & 0 deletions salt/cdh/oozie_mysql.sls
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
{% set hue_database = salt['pillar.get']('hadoop:hue:database', 'hue') %}
{% set hue_password = salt['pillar.get']('hadoop:hue:password', 'hue') %}

{% set scm_user = salt['pillar.get']('hadoop_manager:cmdb:user', 'scm') %}
{% set scm_database = salt['pillar.get']('hadoop_manager:cmdb:database', 'scm') %}
{% set scm_password = salt['pillar.get']('hadoop_manager:cmdb:password', 'scm') %}

include:
- mysql

Expand Down Expand Up @@ -152,3 +156,37 @@ cdh-Grant privileges to root user from outside:
- host: '%'
- connection_user: root
- connection_pass: {{ mysql_root_password }}

cdh-Create scm MySQL user:
mysql_user.present:
- name: {{ scm_user }}
- host: localhost
- password: {{ scm_password }}
- connection_user: root
- connection_pass: {{ mysql_root_password }}

cdh-Create scm MySQL user remote:
mysql_user.present:
- name: {{ scm_user }}
- host: '%'
- password: {{ scm_password }}
- connection_user: root
- connection_pass: {{ mysql_root_password }}

cdh-Grant privileges to scm user from localhost:
mysql_grants.present:
- grant: all privileges
- database: {{ scm_database }}.*
- user: {{ scm_user }}
- host: localhost
- connection_user: root
- connection_pass: {{ mysql_root_password }}

cdh-Grant privileges to scm user from outside:
mysql_grants.present:
- grant: all privileges
- database: {{ scm_database }}.*
- user: {{ scm_user }}
- host: '%'
- connection_user: root
- connection_pass: {{ mysql_root_password }}
4 changes: 2 additions & 2 deletions salt/cdh/setup_hadoop.sls
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
{% set keystone_tenant = salt['pillar.get']('keystone.tenant', "") %}
{% set keystone_auth_url = salt['pillar.get']('keystone.auth_url', "") + '/tokens' %}
{% set region = salt['pillar.get']('keystone.region_name', "") %}
{% set mysql_host = salt['pnda.ip_addresses']('oozie_database')[0] %}
{% set mysql_host = salt['pnda.get_hosts_for_role']('oozie_database')[0] %}
{% set aws_key = salt['pillar.get']('aws.archive_key', '') %}
{% set aws_secret_key = salt['pillar.get']('aws.archive_secret', '') %}
{% set pip_index_url = pillar['pip']['index_url'] %}
{% set pnda_home = pillar['pnda']['homedir'] %}
{% set app_packages_dir = pnda_home + "/app-packages" %}
{% set pnda_graphite_host = salt['pnda.ip_addresses']('graphite')[0] %}
{% set pnda_graphite_host = salt['pnda.get_hosts_for_role']('graphite')[0] %}

{%- set data_volume_list = [] %}
{%- for n in range(flavor_cfg.data_volumes_count) -%}
Expand Down
2 changes: 1 addition & 1 deletion salt/cdh/templates/impala-shell.tpl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!{{ virtual_env_dir }}/bin/python
{% set cm_ip = salt['pnda.ip_addresses']('hadoop_manager')[0] %}
{% set cm_ip = salt['pnda.get_hosts_for_role']('hadoop_manager')[0] %}
{% set cm_username = pillar['admin_login']['user'] %}
{% set cm_password = pillar['admin_login']['password'] %}
import sys
Expand Down
2 changes: 1 addition & 1 deletion salt/console-backend/data-logger.sls
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
{% set install_dir = pillar['pnda']['homedir'] %}
{% set app_dir = install_dir + '/console-backend-data-logger' %}
{% set app_config_dir = app_dir + '/conf' %}
{% set host_ip = salt['pnda.ip_addresses']('console_backend_data_logger')[0] %}
{% set host_ip = salt['pnda.get_hosts_for_role']('console_backend_data_logger')[0] %}
{% set backend_app_port = salt['pillar.get']('console_backend_data_logger:bind_port', '3001') %}
{% set data_logger_log_file = '/var/log/pnda/console/data-logger.log' %}
{% set data_logger_log_level = 'debug' %}
Expand Down
2 changes: 1 addition & 1 deletion salt/console-backend/data-manager.sls
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
{% set app_dir = install_dir + '/console-backend-data-manager' %}
{% set app_config_dir = app_dir + '/conf' %}
{% set pnda_cluster = salt['pnda.cluster_name']() %}
{% set host_ip = salt['pnda.ip_addresses']('console_backend_data_manager')[0] %}
{% set host_ip = salt['pnda.get_hosts_for_role']('console_backend_data_manager')[0] %}
{% set console_frontend_port = salt['pillar.get']('console_frontend:bind_port', '') %}
# get host names of the instance where the console frontend is running in the cluster
{% set console_frontend_fqdn = salt['mine.get']('roles:console_frontend', 'grains.items', expr_form='grain').values()[0]['fqdn'] %}
Expand Down
4 changes: 2 additions & 2 deletions salt/console-frontend/init.sls
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
{% set km_port = salt['pillar.get']('kafkamanager:bind_port', 10900) %}
{% set hadoop_distro = grains['hadoop.distro'] %}

{% set data_manager_host = salt['pnda.ip_addresses']('console_backend_data_manager')[0] %}
{% set data_manager_host = salt['pnda.get_hosts_for_role']('console_backend_data_manager')[0] %}
{% set data_manager_port = salt['pillar.get']('console_backend_data_manager:bind_port', '3123') %}
{% set data_manager_version = salt['pillar.get']('console_backend_data_manager:release_version', 'unknown') %}

# edge node IP
{% set edge_nodes = salt['pnda.ip_addresses']('hadoop_edge') %}
{% set edge_nodes = salt['pnda.get_hosts_for_role']('hadoop_edge') %}
{%- if edge_nodes is not none and edge_nodes|length > 0 -%}
{%- set edge_node_ip = edge_nodes[0] -%}
{%- else -%}
Expand Down
4 changes: 2 additions & 2 deletions salt/consul/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ consul:
encrypt: ""
retry_join: []
retry_interval: 30s
datacenter: "dc1"
datacenter: "{{ pillar['consul']['data_center'] }}"
enable_script_checks: true
ports:
- dns: 53
domain: "pnda.local"
domain: "{{ pillar['consul']['domain'] }}"
register: []
scripts: []
37 changes: 35 additions & 2 deletions salt/consul/dns.sls
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{%- from 'consul/map.jinja' import consul with context -%}
{% set domain_name = pillar['consul']['node'] + '.' + pillar['consul']['data_center'] + '.' + pillar['consul']['domain'] %}
{% if grains['os'] in ('RedHat', 'CentOS') %}

consul_dns-add-nameserver:
file.prepend:
Expand All @@ -12,4 +13,36 @@ consul_dns-add-domain:
file.replace:
- name: /etc/resolv.conf
- pattern: 'search(.*)'
- repl: 'search\1 {{consul.config.domain}}'
- repl: 'search\1 {{ domain_name }}'

{% for cfg_file in salt['cmd.shell']('ls -1 /etc/sysconfig/network-scripts/ifcfg-*').split('\n') %}
consul_turn-off-peer-dns-{{ cfg_file }}:
file.append:
- name: {{ cfg_file }}
- text: PEERDNS=no
{% endfor %}

consul_prevent-modify-resolv-conf:
cmd.run:
- name: chattr +i /etc/resolv.conf

{% else %}

consul_dns-add-nameserver:
file.append:
- name: /etc/resolvconf/resolv.conf.d/head
- text: |
{%- for ip in salt['pnda.kafka_zookeepers_ips']() %}
nameserver {{ ip }}
{%- endfor %}

consul_dns-add-domain:
file.append:
- name: /etc/resolvconf/resolv.conf.d/base
- text: 'search {{ domain_name }}'

consul_refresh-resolv-conf:
cmd.run:
- name: resolvconf -u

{% endif %}
8 changes: 4 additions & 4 deletions salt/deployment-manager/templates/dm-config.json.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@
{%- set hadoop_distro = grains['hadoop.distro'] -%}

{%- set kafka_brokers = [] -%}
{%- for ip in salt['pnda.kafka_brokers_ips']() -%}
{%- for ip in salt['pnda.kafka_brokers_hosts']() -%}
{%- do kafka_brokers.append(ip+':9092') -%}
{%- endfor -%}

{%- set kafka_zookeepers = [] -%}
{%- for ip in salt['pnda.kafka_zookeepers_ips']() -%}
{%- for ip in salt['pnda.kafka_zookeepers_hosts']() -%}
{%- do kafka_zookeepers.append(ip+':2181') -%}
{%- endfor -%}

{% set km_port = salt['pillar.get']('kafkamanager:bind_port', 10900) %}

{%- set opentsdb_port = salt['pillar.get']('opentsdb:bind_port', 4242) -%}
{%- set opentsdb_nodes = salt['pnda.ip_addresses']('opentsdb') -%}
{%- set opentsdb_nodes = salt['pnda.get_hosts_for_role']('opentsdb') -%}
{%- set opentsdb_host = '' -%}
{%- if opentsdb_nodes is not none and opentsdb_nodes|length > 0 -%}
{%- set opentsdb_host = opentsdb_nodes[0]+':'+opentsdb_port|string -%}
Expand All @@ -25,7 +25,7 @@

{% set km_link = salt['pnda.generate_http_link']('kafka_manager',':'+km_port|string+'/clusters/'+pnda_cluster) %}

{%- set jupyter_nodes = salt['pnda.ip_addresses']('jupyter') -%}
{%- set jupyter_nodes = salt['pnda.get_hosts_for_role']('jupyter') -%}
{%- set jupyter_host = '' -%}
{%- if jupyter_nodes is not none and jupyter_nodes|length > 0 -%}
{%- set jupyter_host = jupyter_nodes[0] -%}
Expand Down
2 changes: 1 addition & 1 deletion salt/gobblin/init.sls
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
{% set namenode = salt['pnda.hadoop_namenode']() %}

{%- set kafka_brokers = [] -%}
{%- for ip in salt['pnda.kafka_brokers_ips']() -%}
{%- for ip in salt['pnda.kafka_brokers_hosts']() -%}
{%- do kafka_brokers.append(ip + ':9092') -%}
{%- endfor -%}

Expand Down
2 changes: 1 addition & 1 deletion salt/grafana/init.sls
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
{% set mirror_location = pnda_mirror + misc_packages_path %}

{% set pnda_graphite_port = 8013 %}
{% set pnda_graphite_host = salt['pnda.ip_addresses']('graphite')[0] %}
{% set pnda_graphite_host = salt['pnda.get_hosts_for_role']('graphite')[0] %}

{% set grafana_login = pillar['pnda']['user'] %}
# Because grafana is checking for password length, we need a password of at least 8 characters
Expand Down
14 changes: 7 additions & 7 deletions salt/hdp/files/hdp_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def set_hdf_repl_factor(blueprint, nodes):
if 'hdfs-site' in config:
config['hdfs-site']['properties']['dfs.replication'] = hdfs_repl_factor

def create_new_cluster(nodes, cluster_name, hdp_core_stack_repo, hdp_utils_stack_repo, ambari_api, auth, headers):
def create_new_cluster(nodes, cluster_name, domain_name, hdp_core_stack_repo, hdp_utils_stack_repo, ambari_api, auth, headers):
'''
Create a new cluster, will fail if a cluster with this name already exists.
- Adds the stack repos
Expand Down Expand Up @@ -216,7 +216,7 @@ def create_new_cluster(nodes, cluster_name, hdp_core_stack_repo, hdp_utils_stack

### Create blueprint ###
logging.info("Loading blueprint")
blueprint = json.loads(_CFG.BLUEPRINT % {'cluster_name': cluster_name})
blueprint = json.loads(_CFG.BLUEPRINT % {'cluster_name': cluster_name, 'domain_name': domain_name})
set_hdf_repl_factor(blueprint, nodes)

logging.debug("Blueprint to be used:")
Expand Down Expand Up @@ -383,7 +383,7 @@ def update_cluster_config(nodes, cluster_name, ambari_api, auth, headers):
logging.info("Updating cluster configuration")

### Load properties out of blueprint definition ###
blueprint = json.loads(_CFG.BLUEPRINT % {'cluster_name': cluster_name})
blueprint = json.loads(_CFG.BLUEPRINT % {'cluster_name': cluster_name, 'domain_name': domain_name})
set_hdf_repl_factor(blueprint, nodes)
blueprint_config = {}
for requested_config in blueprint['configurations']:
Expand Down Expand Up @@ -470,6 +470,7 @@ def setup_hadoop(
ambari_host,
nodes,
cluster_name,
domain_name,
ambari_username='admin',
ambari_password='admin',
hdp_core_stack_repo=None,
Expand Down Expand Up @@ -537,17 +538,16 @@ def setup_hadoop(
auth = (ambari_username, ambari_password)

new_nodes = get_new_nodes(nodes, cluster_name, ambari_api, auth, headers)

if len(new_nodes) == 0:
# no new nodes, reapply config to existing ones
update_cluster_config(nodes, cluster_name, ambari_api, auth, headers)
update_cluster_config(nodes, cluster_name, domain_name, ambari_api, auth, headers)
elif len(new_nodes) == len([node for node in nodes if node['type'] == 'DATANODE']):
# all new nodes, create new cluster
create_new_cluster(nodes, cluster_name, hdp_core_stack_repo, hdp_utils_stack_repo, ambari_api, auth, headers)
create_new_cluster(nodes, cluster_name, domain_name, hdp_core_stack_repo, hdp_utils_stack_repo, ambari_api, auth, headers)
else:
# some new nodes, expand cluster onto them
expand_cluster(new_nodes, cluster_name, ambari_api, auth, headers)
# config might also have been updated so make sure that is up to date too
update_cluster_config(nodes, cluster_name, ambari_api, auth, headers)
update_cluster_config(nodes, cluster_name, domain_name, ambari_api, auth, headers)

logging.info("HDP setup finished")
Loading

0 comments on commit 088d51e

Please sign in to comment.