public
Authored by
Vamshidhar Nallamala @s211030
openhpc
Embed
Share
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
Step 1 Installing the server
1 CPU
20 GB HDD
Adpator 1 BRIDGE ADP
Adaptor 2 HOST-ONLY-ADP
Adaptor 3 NAT NETWORK
# Computing nodes Information
1 COMPUTE NODE
20 GB HDD
2 CPU
Adaptor 1 BRIDGE Adpator
Adaptor 2 NAt network
## Preparing the server
sethostname
hostnamectl set-hostname master
echo "192.168.1.156 master" >> /etc/hosts
cat /etc/hosts
# Disable the SE linux before performing any part
sed -i s/^SELINUX=.*$/SELINUX=disabled/ /etc/selinux/config
cat /etc/sysconfig/selinux
# reboot the VM
reboot
# Check selinux is diabaled or not
getenforce
# Update the Vm
yum -y update
# Disable the firewalld temporarily
systemctl stop firewalld
systemctl disable firewalld
systemctl status firewalld
# Install the master
export CHROOT=/opt/ohpc/admin/images/centos7.7
export WW_CONF=/etc/warewulf/bootstrap.conf
export master_ip=192.168.1.156
# Install wget
yum -y install wget
# Install openhpc base packages
yum -y install http://build.openhpc.community/OpenHPC:/1.3/CentOS_7/x86_64/ohpc-release-1.3-1.el7.x86_64.rpm
yum -y install ohpc-base
yum -y install ohpc-warewulf
yum install -y wget htop unzip vim
# Enable NTPD on master host
systemctl enable ntpd.service
echo "master Enter IP " >> /etc/ntp.conf
systemctl restart ntpd
ntpq -p
date
systemctl status ntpd
# Install Slurm
yum -y install ohpc-slurm-server
perl -pi -e "s/ControlMachine=\S+/ControlMachine=master/" /etc/slurm/slurm.conf
# Configure internal interface. Replace eth1 by enp0s3.
perl -pi -e "s/device = eth1/device = enp0s3/" /etc/warewulf/provision.conf
perl -pi -e "s/^\s+disable\s+= yes/ disable = no/" /etc/xinetd.d/tftp
# Enable IP interface for enp0s3
# Before below cmd check master_ip is assigned or not
echo $master_ip
ifconfig enp0s3 $master_ip netmask 255.255.255.0 up
# Restart all services
systemctl restart xinetd
systemctl enable mariadb.service
systemctl restart mariadb
systemctl enable httpd.service
systemctl restart httpd
systemctl enable dhcpd.service
# Define image location for compute node, then install OpenHPC
wwmkchroot centos-7 $CHROOT
yum -y --installroot=$CHROOT install ohpc-base-compute
# Install slurm client, NTP, kernel, and modules on compute node
cp -p /etc/resolv.conf $CHROOT/etc/resolv.conf
yum -y --installroot=$CHROOT install ohpc-slurm-client
yum -y --installroot=$CHROOT install ntp
yum -y --installroot=$CHROOT install kernel
yum -y --installroot=$CHROOT install lmod-ohpc
# Initialize warewulf database and ssh keys
wwinit database
wwinit ssh_keys
# Create NFS client
echo "$master_ip:/home /home nfs nfsvers=3,nodev,nosuid,noatime 0 0" >> $CHROOT/etc/fstab
echo "$master_ip:/opt/ohpc/pub /opt/ohpc/pub nfs nfsvers=3,nodev,noatime 0 0" >> $CHROOT/etc/fstab
# Create NFS server on /home and /opt/ohpc/pub in master
echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports
echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports
exportfs -a
systemctl restart nfs-server
systemctl enable nfs-server
# Add NTP server ip (master_ip) to compute node
chroot $CHROOT systemctl enable ntpd
echo "server master_ip" >> $CHROOT/etc/ntp.conf
# Update basic slurm configuration for nodes c1 and c2
perl -pi -e "s/^NodeName=(\S+)/NodeName=c[1-2]/" /etc/slurm/slurm.conf
perl -pi -e "s/^PartitionName=normal Nodes=(\S+)/PartitionName=normal Nodes=c[1-2]/" /etc/slurm/slurm.conf
perl -pi -e "s/^*Sockets=(\S+)/Sockets=1/" /etc/slurm/slurm.conf
perl -pi -e "s/^*CoresPerSocket=(\S+)/CoresPerSocket=2/" /etc/slurm/slurm.conf
perl -pi -e "s/^*ThreadsPerCore=(\S+)/ThreadsPerCore=1/" /etc/slurm/slurm.conf
perl -pi -e "s/^NodeName=(\S+)/NodeName=c[1-2]/" $CHROOT/etc/slurm/slurm.conf
perl -pi -e "s/^PartitionName=normal Nodes=(\S+)/PartitionName=normal Nodes=c[1-2]/" $CHROOT/etc/slurm/slurm.conf
perl -pi -e "s/^*Sockets=(\S+)/Sockets=1/" $CHROOT/etc/slurm/slurm.conf
perl -pi -e "s/^*CoresPerSocket=(\S+)/CoresPerSocket=2/" $CHROOT/etc/slurm/slurm.conf
perl -pi -e "s/^*ThreadsPerCore=(\S+)/ThreadsPerCore=1/" $CHROOT/etc/slurm/slurm.conf
# Start munge and slurmctld
systemctl enable munge
systemctl enable slurmctld
systemctl start munge
systemctl start slurmctld
chroot $CHROOT systemctl enable slurmd
# Increase locked memory limits
perl -pi -e 's/# End of file/\* soft memlock unlimited\n$&/s' /etc/security/limits.conf
perl -pi -e 's/# End of file/\* hard memlock unlimited\n$&/s' /etc/security/limits.conf
perl -pi -e 's/# End of file/\* soft memlock unlimited\n$&/s' $CHROOT/etc/security/limits.conf
perl -pi -e 's/# End of file/\* hard memlock unlimited\n$&/s' $CHROOT/etc/security/limits.conf
# Enabe slurm pam module
echo "account required pam_slurm.so" >> $CHROOT/etc/pam.d/sshd
# Set up rsyslog for the master node
perl -pi -e "s/\\#\\\$ModLoad imudp/\\\$ModLoad imudp/" /etc/rsyslog.conf
perl -pi -e "s/\\#\\\$UDPServerRun 514/\\\$UDPServerRun 514/" /etc/rsyslog.conf
systemctl restart rsyslog
echo "*.* @$master_ip:514" >> $CHROOT/etc/rsyslog.conf
perl -pi -e "s/^\*\.info/\\#\*\.info/" $CHROOT/etc/rsyslog.conf
perl -pi -e "s/^authpriv/\\#authpriv/" $CHROOT/etc/rsyslog.conf
perl -pi -e "s/^mail/\\#mail/" $CHROOT/etc/rsyslog.conf
perl -pi -e "s/^cron/\\#cron/" $CHROOT/etc/rsyslog.conf
perl -pi -e "s/^uucp/\\#uucp/" $CHROOT/etc/rsyslog.conf
# Install Ganglia monitor
yum -y install ohpc-ganglia
yum -y --installroot=$CHROOT install ganglia-gmond-ohpc
cp /opt/ohpc/pub/examples/ganglia/gmond.conf /etc/ganglia/gmond.conf
perl -pi -e "s/<sms>/master/" /etc/ganglia/gmond.conf
cp /etc/ganglia/gmond.conf $CHROOT/etc/ganglia/gmond.conf
echo "gridname MySite.." >> /etc/ganglia/gmetad.conf
#Restart and enable the services
systemctl enable gmond
systemctl enable gmetad
systemctl start gmond
systemctl start gmetad
chroot $CHROOT systemctl enable gmond
systemctl try-restart httpd
# Add role adm to ${compute_prefix}[1-${num_computes}] by compute_prefix = c and num_computes = 2:
yum -y install clustershell-ohpc
cd /etc/clustershell/groups.d
mv local.cfg local.cfg.orig
echo "adm: master" > local.cfg
echo "compute: c[1-2]" >> local.cfg
echo "all: @adm,@compute" >> local.cfg
cd
# Import files
wwsh file list
wwsh file import /etc/passwd
wwsh file import /etc/group
wwsh file import /etc/shadow
wwsh file import /etc/slurm/slurm.conf
wwsh file import /etc/munge/munge.key
wwsh file list
# Assemble bootstrap image
echo "drivers += updates/kernel/" >> $WW_CONF
echo "drivers += overlay" >> $WW_CONF
# Build bootstrap image
wwbootstrap `uname -r`
# Create Virtual Node File System(VNFS) image:
wwvnfs --chroot $CHROOT
# Define compute node from MAC Address manually:
echo "GATEWAYDEV=enp0s8" > /tmp/network.$$
wwsh -y file import /tmp/network.$$ --name network
wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0
wwsh file list
wwsh -y node new c1 --ipaddr=192.168.1.65 --hwaddr=08:00:27:99:B3:5F -D enp0s3
wwsh node list
# Defind VNFS to compute node
wwsh -y provision set "c1" --vnfs=centos7.7 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
wwsh provision list
# Delete any nodes if any mistake
wwsh node delete c1 # example
# Restart ganglia/dhcp services
systemctl restart gmond
systemctl restart gmetad
systemctl restart dhcpd
wwsh pxe update
# Create usersname test for testing purpose.
useradd -m test1
passwd test1
wwsh file resync
# Next step turn on the computing nodes
Error not able to turn the computer nodes on
Please register or sign in to comment