Skip to content
Snippets Groups Projects
public
Authored by
Vamshidhar Nallamala @s211030

openhpc

Edited
Embed
Share
  • Clone with SSH
  • Clone with HTTPS
  • openhpc.txt 7.26 KiB
    Step 1 Installing the server 
    
    1 CPU
    20 GB HDD
    Adpator 1 BRIDGE ADP
    Adaptor 2 HOST-ONLY-ADP
    Adaptor 3 NAT NETWORK
    
    # Computing nodes Information
    
    1 COMPUTE NODE
    20 GB HDD
    2 CPU 
    Adaptor 1 BRIDGE Adpator
    Adaptor 2 NAt network
    
    ## Preparing the server
    sethostname 
    hostnamectl set-hostname master 
    echo "192.168.1.156 master" >> /etc/hosts
    cat /etc/hosts 
    
    
    # Disable the SE linux before performing any part 
    
    sed -i s/^SELINUX=.*$/SELINUX=disabled/ /etc/selinux/config
    cat /etc/sysconfig/selinux 
    
    # reboot the VM 
    reboot 
    
    # Check selinux is diabaled or not 
    
    getenforce
    
    # Update the Vm 
    yum -y update 
     
    # Disable the firewalld temporarily
    
    systemctl stop firewalld
    systemctl disable firewalld
    systemctl status firewalld
    
    # Install the master 
    export CHROOT=/opt/ohpc/admin/images/centos7.7
    export WW_CONF=/etc/warewulf/bootstrap.conf
    export master_ip=192.168.1.156
    
    # Install wget 
    yum -y install wget
    
    # Install openhpc base packages 
    
    yum -y install http://build.openhpc.community/OpenHPC:/1.3/CentOS_7/x86_64/ohpc-release-1.3-1.el7.x86_64.rpm
    yum -y install ohpc-base
    yum -y install ohpc-warewulf
    yum install -y wget htop unzip vim
    
    # Enable NTPD on master host
    
    systemctl enable ntpd.service
    echo "master Enter IP  " >> /etc/ntp.conf
    systemctl restart ntpd
    ntpq -p
    date
    systemctl status ntpd
    
    # Install  Slurm 
    yum -y install ohpc-slurm-server
    perl -pi -e "s/ControlMachine=\S+/ControlMachine=master/" /etc/slurm/slurm.conf
    
    # Configure internal interface. Replace eth1 by enp0s3.
    
    perl -pi -e "s/device = eth1/device = enp0s3/" /etc/warewulf/provision.conf
    perl -pi -e "s/^\s+disable\s+= yes/ disable = no/" /etc/xinetd.d/tftp
    
    # Enable IP interface for enp0s3
    # Before below cmd check master_ip is assigned or not 
    
    echo $master_ip
    ifconfig enp0s3 $master_ip netmask 255.255.255.0 up
    
    # Restart all services 
    
    systemctl restart xinetd
    systemctl enable mariadb.service
    systemctl restart mariadb
    systemctl enable httpd.service
    systemctl restart httpd
    systemctl enable dhcpd.service
    
    # Define image location for compute node, then install OpenHPC
    
    wwmkchroot centos-7 $CHROOT
    yum -y --installroot=$CHROOT install ohpc-base-compute
    
    #  Install slurm client, NTP, kernel, and modules on compute node
    
    cp -p /etc/resolv.conf $CHROOT/etc/resolv.conf
    yum -y --installroot=$CHROOT install ohpc-slurm-client
    yum -y --installroot=$CHROOT install ntp
    yum -y --installroot=$CHROOT install kernel
    yum -y --installroot=$CHROOT install lmod-ohpc
    
    # Initialize warewulf database and ssh keys
    wwinit database
    wwinit ssh_keys
    
    # Create NFS client
    
    echo "$master_ip:/home /home nfs nfsvers=3,nodev,nosuid,noatime 0 0" >> $CHROOT/etc/fstab
    echo "$master_ip:/opt/ohpc/pub /opt/ohpc/pub nfs nfsvers=3,nodev,noatime 0 0" >> $CHROOT/etc/fstab
    
    # Create NFS server on /home and /opt/ohpc/pub in master
    
    echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports
    echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports
    exportfs -a
    systemctl restart nfs-server
    systemctl enable nfs-server
    
    # Add NTP server ip (master_ip) to compute node
    
    chroot $CHROOT systemctl enable ntpd
    echo "server master_ip" >> $CHROOT/etc/ntp.conf
    
    # Update basic slurm configuration for nodes c1 and c2
    
    perl -pi -e "s/^NodeName=(\S+)/NodeName=c[1-2]/" /etc/slurm/slurm.conf
    perl -pi -e "s/^PartitionName=normal Nodes=(\S+)/PartitionName=normal Nodes=c[1-2]/" /etc/slurm/slurm.conf
    perl -pi -e "s/^*Sockets=(\S+)/Sockets=1/" /etc/slurm/slurm.conf
    perl -pi -e "s/^*CoresPerSocket=(\S+)/CoresPerSocket=2/" /etc/slurm/slurm.conf
    perl -pi -e "s/^*ThreadsPerCore=(\S+)/ThreadsPerCore=1/" /etc/slurm/slurm.conf
    perl -pi -e "s/^NodeName=(\S+)/NodeName=c[1-2]/" $CHROOT/etc/slurm/slurm.conf
    perl -pi -e "s/^PartitionName=normal Nodes=(\S+)/PartitionName=normal Nodes=c[1-2]/" $CHROOT/etc/slurm/slurm.conf
    perl -pi -e "s/^*Sockets=(\S+)/Sockets=1/" $CHROOT/etc/slurm/slurm.conf
    perl -pi -e "s/^*CoresPerSocket=(\S+)/CoresPerSocket=2/" $CHROOT/etc/slurm/slurm.conf
    perl -pi -e "s/^*ThreadsPerCore=(\S+)/ThreadsPerCore=1/" $CHROOT/etc/slurm/slurm.conf
    
    # Start munge and slurmctld
    
    systemctl enable munge
    systemctl enable slurmctld
    systemctl start munge
    systemctl start slurmctld
    chroot $CHROOT systemctl enable slurmd
    
    # Increase locked memory limits
    
    perl -pi -e 's/# End of file/\* soft memlock unlimited\n$&/s' /etc/security/limits.conf
    perl -pi -e 's/# End of file/\* hard memlock unlimited\n$&/s' /etc/security/limits.conf
    perl -pi -e 's/# End of file/\* soft memlock unlimited\n$&/s' $CHROOT/etc/security/limits.conf
    perl -pi -e 's/# End of file/\* hard memlock unlimited\n$&/s' $CHROOT/etc/security/limits.conf
    
    # Enabe slurm pam module
    echo "account required pam_slurm.so" >> $CHROOT/etc/pam.d/sshd
    
    # Set up rsyslog for the master node
    
    perl -pi -e "s/\\#\\\$ModLoad imudp/\\\$ModLoad imudp/" /etc/rsyslog.conf
    perl -pi -e "s/\\#\\\$UDPServerRun 514/\\\$UDPServerRun 514/" /etc/rsyslog.conf
    systemctl restart rsyslog
    echo "*.* @$master_ip:514" >> $CHROOT/etc/rsyslog.conf
    perl -pi -e "s/^\*\.info/\\#\*\.info/" $CHROOT/etc/rsyslog.conf
    perl -pi -e "s/^authpriv/\\#authpriv/" $CHROOT/etc/rsyslog.conf
    perl -pi -e "s/^mail/\\#mail/" $CHROOT/etc/rsyslog.conf
    perl -pi -e "s/^cron/\\#cron/" $CHROOT/etc/rsyslog.conf
    perl -pi -e "s/^uucp/\\#uucp/" $CHROOT/etc/rsyslog.conf
    
    # Install Ganglia monitor
    
    yum -y install ohpc-ganglia
    yum -y --installroot=$CHROOT install ganglia-gmond-ohpc
    cp /opt/ohpc/pub/examples/ganglia/gmond.conf /etc/ganglia/gmond.conf
    perl -pi -e "s/<sms>/master/" /etc/ganglia/gmond.conf
    cp /etc/ganglia/gmond.conf $CHROOT/etc/ganglia/gmond.conf
    echo "gridname MySite.." >> /etc/ganglia/gmetad.conf
    
    #Restart and enable the services
    
    systemctl enable gmond
    systemctl enable gmetad
    systemctl start gmond
    systemctl start gmetad
    chroot $CHROOT systemctl enable gmond
    systemctl try-restart httpd
    
    # Add role adm to ${compute_prefix}[1-${num_computes}] by compute_prefix = c and num_computes = 2:
    
    yum -y install clustershell-ohpc
    cd /etc/clustershell/groups.d
    mv local.cfg local.cfg.orig
    echo "adm: master" > local.cfg
    echo "compute: c[1-2]" >> local.cfg
    echo "all: @adm,@compute" >> local.cfg
    cd
    
    # Import files
    
    wwsh file list
    wwsh file import /etc/passwd
    wwsh file import /etc/group
    wwsh file import /etc/shadow
    wwsh file import /etc/slurm/slurm.conf
    wwsh file import /etc/munge/munge.key
    wwsh file list
    
    #  Assemble bootstrap image
    echo "drivers += updates/kernel/" >> $WW_CONF
    echo "drivers += overlay" >> $WW_CONF
    
    # Build bootstrap image
    wwbootstrap `uname -r` 
    
    # Create Virtual Node File System(VNFS) image:
    wwvnfs --chroot $CHROOT
    
    # Define compute node from MAC Address manually:
    echo "GATEWAYDEV=enp0s8" > /tmp/network.$$
    wwsh -y file import /tmp/network.$$ --name network
    wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0
    wwsh file list
    wwsh -y node new c1 --ipaddr=192.168.1.65 --hwaddr=08:00:27:99:B3:5F -D enp0s3
    wwsh node list
    
    # Defind VNFS to compute node
    
    wwsh -y provision set "c1" --vnfs=centos7.7 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
    
    
    
    wwsh provision list
    
    #  Delete any nodes if any mistake 
    wwsh node delete c1 # example 
    
    # Restart ganglia/dhcp services
    systemctl restart gmond
    systemctl restart gmetad
    systemctl restart dhcpd
    wwsh pxe update
    
    # Create usersname test for testing purpose.
    
    useradd -m test1
    passwd test1
    wwsh file resync
    
    # Next step turn on the computing  nodes 
    
    Error not able to turn the computer nodes on 
    
    0% or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Finish editing this message first!
    Please register or to comment