[GCP ] Install bigdata cluster

I applied google cloud for trial which give me 300$, so I initialize 4 severs to do test.

Servers:

Host

OS

Memory

CPU

Disk

Region

master.c.ambari-195807.internal

CentOS 7

13 GB

Intel Ivy Bridge: 2

200G

asia-east1-a

slave1.c.ambari-195807.internal

CentOS 7

13 GB

Intel Ivy Bridge: 2

200G

asia-east1-a

slave2.c.ambari-195807.internal

CentOS 7

13 GB

Intel Ivy Bridge: 2

200G

asia-east1-a

slave3.c.ambari-195807.internal

CentOS 7

13 GB

Intel Ivy Bridge: 2

200G

asia-east1-a

1.prepare

1.1.configure ssh key on each slave to make master login without password

1.2.install jdk1.8 on each server, download, set JAVA_HOME in profile

1.3.configure hostnames in /etc/hosts on each server


2.install hadoop

2.1.download hadoop 2.8.2

wget http://ftp.jaist.ac.jp/pub/apache/hadoop/common/hadoop-2.8.3/hadoop-2.8.3.tar.gz
tar -vzxf hadoop-2.8.3.tar.gz && cd hadoop-2.8.3

2.2.configure core-site.xml

<property>
    <name>fs.default.name</name>
    <value>hdfs://master.c.ambari-195807.internal:9000</value> 
</property>
<property>
    <name>hadoop.tmp.dir</name>  
    <value>/data/hadoop/hdfs/tmp</value>
</property>
<property>
    <name>hadoop.http.filter.initializers</name>
    <value>org.apache.hadoop.security.HttpCrossOriginFilterInitializer</value>
</property>

2.3.configure hdfs-site.xml

<property>
    <name>dfs.name.dir</name>
    <value>/data/hadoop/dfs/name</value>
</property>
<property>
    <name>dfs.data.dir</name>
    <value>/opt/hadoop/dfs/data</value>
</property>
<property>
    <name>dfs.replication</name>
    <value>3</value>
</property>

2.4.configure mapred-site.xml

<property>  
    <name>mapred.job.tracker</name>  
    <value>master.c.ambari-195807.internal:49001</value>  
</property>
<property>
    <name>mapreduce.framework.name</name>  
    <value>yarn</value>  
</property>
<property>
    <name>mapred.local.dir</name>  
    <value>/data/hadoop/mapred</value>  
</property>
<property>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value>2048</value>
</property>
<property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>4096</value>
</property>
  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>4096</value>
</property>
<property>
    <name>mapreduce.map.memory.mb</name>
    <value>4096</value>
</property>
<property>
    <name>mapreduce.reduce.memory.mb</name>
    <value>4096</value>
</property>
<property>
    <name>mapreduce.map.java.opts</name>
    <value>-Xmx6144m</value>
</property>
<property>
    <name>mapreduce.reduce.java.opts</name>
    <value>-Xmx6144m</value>
</property>

2.5.configure yarn-site.xml

<property>  
    <name>yarn.resourcemanager.hostname</name>  
    <value>master.c.ambari-195807.internal</value>  
</property>  
<property>  
    <name>yarn.resourcemanager.address</name>  
    <value>${yarn.resourcemanager.hostname}:8032</value>  
</property>  
<property>  
    <name>yarn.resourcemanager.scheduler.address</name>  
    <value>${yarn.resourcemanager.hostname}:8030</value>  
</property>  
<property>  
    <name>yarn.resourcemanager.webapp.address</name>  
    <value>${yarn.resourcemanager.hostname}:8088</value>  
</property>  
<property>  
    <name>yarn.resourcemanager.webapp.https.address</name>  
    <value>${yarn.resourcemanager.hostname}:8090</value>  
</property>  
<property>  
    <name>yarn.resourcemanager.resource-tracker.address</name>  
    <value>${yarn.resourcemanager.hostname}:8031</value>  
</property>  
<property>  
    <name>yarn.resourcemanager.admin.address</name>  
    <value>${yarn.resourcemanager.hostname}:8033</value>  
</property>  
<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>
<property>
    <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
    <name>yarn.timeline-service.enabled</name>
    <value>true</value>
</property>
<property>
    <name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
    <value>true</value>
</property>
<property>
    <name>yarn.timeline-service.generic-application-history.enabled</name>
    <value>true</value>
</property>
<property>
    <name>yarn.timeline-service.http-cross-origin.enabled</name>
    <value>true</value>
</property>
<property>
    <name>yarn.timeline-service.hostname</name>
    <value>master.c.ambari-195807.internal</value>
</property>
<property>
    <name>yarn.resourcemanager.webapp.cross-origin.enabled</name>
    <value>true</value>
</property>
<property>
    <name>yarn.resourcemanager.address</name>
    <value>master.c.ambari-195807.internal:8032</value>
</property>
<property>
    <name>yarn.resourcemanager.scheduler.address</name>
    <value>master.c.ambari-195807.internal:8030</value>
</property>
<property>
    <name>yarn.resourcemanager.resource-tracker.address</name>
    <value>master.c.ambari-195807.internal:8031</value>
</property>

2.6.set slaves

echo slave1.c.ambari-195807.internal >>slaves
echo slave2.c.ambari-195807.internal >>slaves
echo slave3.c.ambari-195807.internal >>slaves

2.7.copy hadoop from master to each slave

scp -r hadoop-2.8.3/ gizmo@slave1.c.ambari-195807.internal:/opt/apps/
scp -r hadoop-2.8.3/ gizmo@slave2.c.ambari-195807.internal:/opt/apps/
scp -r hadoop-2.8.3/ gizmo@slave3.c.ambari-195807.internal:/opt/apps/

2.8.configure hadoop env profile

echo 'export HADOOP_HOME=/opt/apps/hadoop-2.8.3' >>~/.bashrc
echo 'export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop' >>~/.bashrc
echo 'export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin:$JAVA_HOME/bin' >>~/.bashrc

2.9.start hdfs/yarn

start-dfs.hs
start-yarn.sh

2.10.check

hdfs, http://master.c.ambari-195807.internal:50070

yarn, http://master.c.ambari-195807.internal:8088


3.install hive

3.1.download hive 2.3.2

wget http://ftp.jaist.ac.jp/pub/apache/hive/hive-2.3.2/apache-hive-2.3.2-bin.tar.gz
tar -zvxf apache-hive-2.3.2-bin.tar.gz && cd apache-hive-2.3.2-bin

3.2.configure hive env profile

echo 'export HIVE_HOME=/opt/apps/apache-hive-2.3.2-bin' >>~/.bashrc
echo 'export PATH=$PATH:$HIVE_HOME/bin' >>~/.bashrc

3.3.install mysql to store metadata

rpm -ivh http://repo.mysql.com/mysql57-community-release-el7.rpm
yum install -y mysql-server
systemctl start mysqld
mysql_password="pa12ss34wo!@d#"
mysql_default_password=`grep 'temporary password' /var/log/mysqld.log | awk -F ': ' '{print $2}'`
mysql -u root -p${mysql_default_password} -e "set global validate_password_policy=0; set global validate_password_length=4;" --connect-expired-password
mysqladmin -u root -p${mysql_default_password} password ${mysql_password}
mysql -u root -p${mysql_password} -e "create database hive default charset 'utf8'; flush privileges;"
mysql -u root -p${mysql_password} -e "grant all privileges on hive.* to hive@'' identified by 'hive'; flush privileges;"

3.4.download mysql driver

wget http://central.maven.org/maven2/mysql/mysql-connector-java/5.1.45/mysql-connector-java-5.1.45.jar -O $HIVE_HOME/lib

3.5.configure hive-site.xml

<configuration>
    <property>
        <name>javax.jdo.option.ConnectionURL</name>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.jdbc.Driver</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>hive</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>hive</value>
    </property>
</configuration>

3.6.initialize hive meta tables

schematool -dbType mysql -initSchema

3.7.test hive


4.install tez

4.1.please follow the instruction “install tez on single server” on each server


5.install hbase

5.1.download hbase 1.2.6

wget http://ftp.jaist.ac.jp/pub/apache/hbase/1.2.6/hbase-1.2.6-bin.tar.gz
tar -vzxf hbase-1.2.6-bin.tar.gz && cd hbase-1.2.6

5.2.configure hbase-site.xml

<property>
    <name>hbase.rootdir</name>
    <value>hdfs://master.c.ambari-195807.internal:9000/hbase</value>
</property>
<property>
    <name>hbase.master</name>
    <value>master</value>
</property>
<property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
</property>
<property>
    <name>hbase.zookeeper.property.clientPort</name>
    <value>2181</value>
</property>
<property>
    <name>hbase.zookeeper.quorum</name>
    <value>slave1.c.ambari-195807.internal,slave2.c.ambari-195807.internal,slave3.c.ambari-195807.internal</value>
</property>
<property>
    <name>dfs.support.append</name>
    <value>true</value>
</property>
<property>  
    <name>hbase.master.info.port</name>  
    <value>60010</value>  
</property>

5.3.configure regionservers

echo slave1.c.ambari-195807.internal >>regionservers
echo slave2.c.ambari-195807.internal >>regionservers
echo slave3.c.ambari-195807.internal >>regionservers

5.4.copy hbase from master to each slave

5.5.configure hbase env profile

echo 'export HBASE_HOME=/opt/apps/hbase-1.2.6' >>~/.bashrc 
echo 'export PATH=$PATH:$HBASE_HOME/bin' >>~/.bashrc

5.6.start hbase

start-hbase.sh

5.7.check, http://35.194.253.162:60010


Things done!