Tagged: timelineserver Toggle Comment Threads | Keyboard Shortcuts

  • Wang 21:43 on 2018-03-02 Permalink | Reply
    Tags: , , , , , , , , , timelineserver   

    [GCP ] Install bigdata cluster 

    I applied google cloud for trial which give me 300$, so I initialize 4 severs to do test.

    Servers:

    Host

    OS

    Memory

    CPU

    Disk

    Region

    master.c.ambari-195807.internal

    CentOS 7

    13 GB

    Intel Ivy Bridge: 2

    200G

    asia-east1-a

    slave1.c.ambari-195807.internal

    CentOS 7

    13 GB

    Intel Ivy Bridge: 2

    200G

    asia-east1-a

    slave2.c.ambari-195807.internal

    CentOS 7

    13 GB

    Intel Ivy Bridge: 2

    200G

    asia-east1-a

    slave3.c.ambari-195807.internal

    CentOS 7

    13 GB

    Intel Ivy Bridge: 2

    200G

    asia-east1-a

    1.prepare

    1.1.configure ssh key on each slave to make master login without password

    1.2.install jdk1.8 on each server, download, set JAVA_HOME in profile

    1.3.configure hostnames in /etc/hosts on each server


    2.install hadoop

    2.1.download hadoop 2.8.2

    wget http://ftp.jaist.ac.jp/pub/apache/hadoop/common/hadoop-2.8.3/hadoop-2.8.3.tar.gz
    tar -vzxf hadoop-2.8.3.tar.gz && cd hadoop-2.8.3
    

    2.2.configure core-site.xml

    <property>
        <name>fs.default.name</name>
        <value>hdfs://master.c.ambari-195807.internal:9000</value> 
    </property>
    <property>
        <name>hadoop.tmp.dir</name>  
        <value>/data/hadoop/hdfs/tmp</value>
    </property>
    <property>
        <name>hadoop.http.filter.initializers</name>
        <value>org.apache.hadoop.security.HttpCrossOriginFilterInitializer</value>
    </property>
    

    2.3.configure hdfs-site.xml

    <property>
        <name>dfs.name.dir</name>
        <value>/data/hadoop/dfs/name</value>
    </property>
    <property>
        <name>dfs.data.dir</name>
        <value>/opt/hadoop/dfs/data</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>3</value>
    </property>
    

    2.4.configure mapred-site.xml

    <property>  
        <name>mapred.job.tracker</name>  
        <value>master.c.ambari-195807.internal:49001</value>  
    </property>
    <property>
        <name>mapreduce.framework.name</name>  
        <value>yarn</value>  
    </property>
    <property>
        <name>mapred.local.dir</name>  
        <value>/data/hadoop/mapred</value>  
    </property>
    <property>
        <name>yarn.scheduler.minimum-allocation-mb</name>
        <value>2048</value>
    </property>
    <property>
        <name>yarn.scheduler.maximum-allocation-mb</name>
        <value>4096</value>
    </property>
      <property>
        <name>yarn.nodemanager.resource.memory-mb</name>
        <value>4096</value>
    </property>
    <property>
        <name>mapreduce.map.memory.mb</name>
        <value>4096</value>
    </property>
    <property>
        <name>mapreduce.reduce.memory.mb</name>
        <value>4096</value>
    </property>
    <property>
        <name>mapreduce.map.java.opts</name>
        <value>-Xmx6144m</value>
    </property>
    <property>
        <name>mapreduce.reduce.java.opts</name>
        <value>-Xmx6144m</value>
    </property>
    

    2.5.configure yarn-site.xml

    <property>  
        <name>yarn.resourcemanager.hostname</name>  
        <value>master.c.ambari-195807.internal</value>  
    </property>  
    <property>  
        <name>yarn.resourcemanager.address</name>  
        <value>${yarn.resourcemanager.hostname}:8032</value>  
    </property>  
    <property>  
        <name>yarn.resourcemanager.scheduler.address</name>  
        <value>${yarn.resourcemanager.hostname}:8030</value>  
    </property>  
    <property>  
        <name>yarn.resourcemanager.webapp.address</name>  
        <value>${yarn.resourcemanager.hostname}:8088</value>  
    </property>  
    <property>  
        <name>yarn.resourcemanager.webapp.https.address</name>  
        <value>${yarn.resourcemanager.hostname}:8090</value>  
    </property>  
    <property>  
        <name>yarn.resourcemanager.resource-tracker.address</name>  
        <value>${yarn.resourcemanager.hostname}:8031</value>  
    </property>  
    <property>  
        <name>yarn.resourcemanager.admin.address</name>  
        <value>${yarn.resourcemanager.hostname}:8033</value>  
    </property>  
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.timeline-service.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.timeline-service.generic-application-history.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.timeline-service.http-cross-origin.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.timeline-service.hostname</name>
        <value>master.c.ambari-195807.internal</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.cross-origin.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>master.c.ambari-195807.internal:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>master.c.ambari-195807.internal:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>master.c.ambari-195807.internal:8031</value>
    </property>
    

    2.6.set slaves

    echo slave1.c.ambari-195807.internal >>slaves
    echo slave2.c.ambari-195807.internal >>slaves
    echo slave3.c.ambari-195807.internal >>slaves
    

    2.7.copy hadoop from master to each slave

    scp -r hadoop-2.8.3/ gizmo@slave1.c.ambari-195807.internal:/opt/apps/
    scp -r hadoop-2.8.3/ gizmo@slave2.c.ambari-195807.internal:/opt/apps/
    scp -r hadoop-2.8.3/ gizmo@slave3.c.ambari-195807.internal:/opt/apps/
    

    2.8.configure hadoop env profile

    echo 'export HADOOP_HOME=/opt/apps/hadoop-2.8.3' >>~/.bashrc
    echo 'export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop' >>~/.bashrc
    echo 'export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin:$JAVA_HOME/bin' >>~/.bashrc
    

    2.9.start hdfs/yarn

    start-dfs.hs
    start-yarn.sh
    

    2.10.check

    hdfs, http://master.c.ambari-195807.internal:50070

    yarn, http://master.c.ambari-195807.internal:8088


    3.install hive

    3.1.download hive 2.3.2

    wget http://ftp.jaist.ac.jp/pub/apache/hive/hive-2.3.2/apache-hive-2.3.2-bin.tar.gz
    tar -zvxf apache-hive-2.3.2-bin.tar.gz && cd apache-hive-2.3.2-bin
    

    3.2.configure hive env profile

    echo 'export HIVE_HOME=/opt/apps/apache-hive-2.3.2-bin' >>~/.bashrc
    echo 'export PATH=$PATH:$HIVE_HOME/bin' >>~/.bashrc
    

    3.3.install mysql to store metadata

    rpm -ivh http://repo.mysql.com/mysql57-community-release-el7.rpm
    yum install -y mysql-server
    systemctl start mysqld
    mysql_password="pa12ss34wo!@d#"
    mysql_default_password=`grep 'temporary password' /var/log/mysqld.log | awk -F ': ' '{print $2}'`
    mysql -u root -p${mysql_default_password} -e "set global validate_password_policy=0; set global validate_password_length=4;" --connect-expired-password
    mysqladmin -u root -p${mysql_default_password} password ${mysql_password}
    mysql -u root -p${mysql_password} -e "create database hive default charset 'utf8'; flush privileges;"
    mysql -u root -p${mysql_password} -e "grant all privileges on hive.* to hive@'' identified by 'hive'; flush privileges;"
    

    3.4.download mysql driver

    wget http://central.maven.org/maven2/mysql/mysql-connector-java/5.1.45/mysql-connector-java-5.1.45.jar -O $HIVE_HOME/lib
    

    3.5.configure hive-site.xml

    <configuration>
        <property>
            <name>javax.jdo.option.ConnectionURL</name>
        </property>
        <property>
            <name>javax.jdo.option.ConnectionDriverName</name>
            <value>com.mysql.jdbc.Driver</value>
        </property>
        <property>
            <name>javax.jdo.option.ConnectionUserName</name>
            <value>hive</value>
        </property>
        <property>
            <name>javax.jdo.option.ConnectionPassword</name>
            <value>hive</value>
        </property>
    </configuration>
    

    3.6.initialize hive meta tables

    schematool -dbType mysql -initSchema
    

    3.7.test hive


    4.install tez

    4.1.please follow the instruction “install tez on single server” on each server


    5.install hbase

    5.1.download hbase 1.2.6

    wget http://ftp.jaist.ac.jp/pub/apache/hbase/1.2.6/hbase-1.2.6-bin.tar.gz
    tar -vzxf hbase-1.2.6-bin.tar.gz && cd hbase-1.2.6
    

    5.2.configure hbase-site.xml

    <property>
        <name>hbase.rootdir</name>
        <value>hdfs://master.c.ambari-195807.internal:9000/hbase</value>
    </property>
    <property>
        <name>hbase.master</name>
        <value>master</value>
    </property>
    <property>
        <name>hbase.cluster.distributed</name>
        <value>true</value>
    </property>
    <property>
        <name>hbase.zookeeper.property.clientPort</name>
        <value>2181</value>
    </property>
    <property>
        <name>hbase.zookeeper.quorum</name>
        <value>slave1.c.ambari-195807.internal,slave2.c.ambari-195807.internal,slave3.c.ambari-195807.internal</value>
    </property>
    <property>
        <name>dfs.support.append</name>
        <value>true</value>
    </property>
    <property>  
        <name>hbase.master.info.port</name>  
        <value>60010</value>  
    </property>
    

    5.3.configure regionservers

    echo slave1.c.ambari-195807.internal >>regionservers
    echo slave2.c.ambari-195807.internal >>regionservers
    echo slave3.c.ambari-195807.internal >>regionservers
    

    5.4.copy hbase from master to each slave

    5.5.configure hbase env profile

    echo 'export HBASE_HOME=/opt/apps/hbase-1.2.6' >>~/.bashrc 
    echo 'export PATH=$PATH:$HBASE_HOME/bin' >>~/.bashrc
    

    5.6.start hbase

    start-hbase.sh
    

    5.7.check, http://35.194.253.162:60010


    Things done!

     
  • Wang 19:51 on 2018-02-24 Permalink | Reply
    Tags: , , , , timelineserver, tomcat   

    Replace MR with Tez on hive2 

    From hive2 Hive-on-MR is not recommended, you could see the warning information when running hive cli

    Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
    

    So I installed Tez to replace MR to run jobs, below are installation steps.

    1.install Tez

    1.1.down Tez and unpackage

    wget http://ftp.jaist.ac.jp/pub/apache/tez/0.9.0/apache-tez-0.9.0-src.tar.gz
    tar -zvxf apache-tez-0.9.0-src.tar.gz && cd apache-tez-0.9.0-src
    

    1.2.compile and build Tez jar, you need install protobuf/maven before compiling

    mvn clean package -DskipTests=true -Dmaven.javadoc.skip=true
    

    1.3.upload Tez to hdfs

    hadoop fs -mkdir /apps
    hadoop fs -copyFromLocal tez-dist/target/tez-0.9.0.tar.gz /apps/
    

    1.4.create tez-site.xml under hadoop conf directory

    cat <<'EOF' > $HADOOP_CONF_DIR/tez-site.xml
    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
        <property>
            <name>tez.lib.uris</name>
            <value>${fs.defaultFS}/apps/tez-0.9.0.tar.gz</value>
        </property>
        <property>
            <name>tez.history.logging.service.class</name>
            value>org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService</value>
        </property>
        <property>
            <name>tez.tez-ui.history-url.base</name>
            <value>http://localhost:8080/tez-ui/</value>
        </property>
    </configuration>
    EOF
    

    1.5.append configurations to yarn-site.xml

    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.timeline-service.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.timeline-service.generic-application-history.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.timeline-service.http-cross-origin.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.timeline-service.hostname</name>
        <value>localhost</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.cross-origin.enabled</name>
        <value>true</value>
    </property>
    <property>  
        <name>yarn.resourcemanager.address</name>  
        <value>localhost:8032</value>  
    </property>  
    <property>  
        <name>yarn.resourcemanager.scheduler.address</name>  
        <value>localhost:8030</value>  
    </property>  
    <property>  
        <name>yarn.resourcemanager.resource-tracker.address</name>  
        <value>localhost:8031</value>  
    </property>
    

    1.6.append configuration to core-site.xml

    <property>
        <name>fs.default.name</name>
        <value>hdfs://master:9000</value>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>  
        <value>/data/hadoop/hdfs/tmp</value>
    </property>
    <property>
        <name>hadoop.http.filter.initializers</name>
        <value>org.apache.hadoop.security.HttpCrossOriginFilterInitializer</value>
    </property>
    

    1.7.unpackage tez-dist/target/tez-0.9.0-minimal.tar.gz

    1.8.append env to /etc/profile

    export TEZ_CONF_DIR="location of tez-site.xml"
    export TEZ_JARS="location of unpackaged tez-0.9.0-minimal.tar.gz"
    export HADOOP_CLASSPATH=${TEZ_CONF_DIR}:${TEZ_JARS}/*:${TEZ_JARS}/lib/*
    

    1.9.start timelineserver

    yarn-daemon.sh start timelineserver
    

    1.10.configure tez ui, install tomcat, unpackage tez-ui/target/tez-ui-0.9.0.war into webapps, rename unpackaged directory to tez-ui

    1.11.start tomcat, visit http://localhost:8080/tez-ui to test

    2.test Tez

    2.1.change job engine to Tez

    hive> set hive.execution.engine=tez;
    

    2.2.run job to test

    hive> select count(*) from gbif_0004998;
    Query ID = wanghongmeng_20180224180801_e5ddcf23-1e1a-4724-8156-1393807c2ac0
    Total jobs = 1
    Launching Job 1 out of 1
    Status: Running (Executing on YARN cluster with App id application_1519462946874_0003)
    
    ----------------------------------------------------------------------------------------------
    VERTICES MODE STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED 
    ----------------------------------------------------------------------------------------------
    Map 1 .......... container SUCCEEDED 1 1 0 0 0 0 
    Reducer 2 ...... container SUCCEEDED 1 1 0 0 0 0 
    ----------------------------------------------------------------------------------------------
    VERTICES: 02/02 [==========================>>] 100% ELAPSED TIME: 9.87 s 
    ----------------------------------------------------------------------------------------------
    OK
    327316
    Time taken: 23.876 seconds, Fetched: 1 row(s)
    

    2.3.check result on tez ui

     
c
Compose new post
j
Next post/Next comment
k
Previous post/Previous comment
r
Reply
e
Edit
o
Show/Hide comments
t
Go to top
l
Go to login
h
Show/Hide help
shift + esc
Cancel
%d bloggers like this: