hive

install hive

  1. 安装jdk1.8
  2. 安装hadoop2.x,配置hdfs和yarn
  3. 安装hive2.x

安装jdk 1.8.0_231


mkdir /as4k; cd /as4k;
http://192.168.1.5/chfs/shared/share/jdk8/jdk-8u231-linux-x64.tar.gz

cat << 'EOF' >> /etc/profile
export JAVA_HOME=/as4k/jdk1.8.0_231
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
EOF

source /etc/profile; jps; java -version

安装hadoop-2.7.7

https://hadoop.apache.org/docs/r2.7.7/hadoop-project-dist/hadoop-common/SingleCluster.html

-------------- 1 免密认证 -------------------------------
ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa; cat ~/.ssh/id_rsa.pub
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys


------------------------- 2 安装hadoop-2.7.7  ---------------------
cd /as4k
http://192.168.1.5/chfs/shared/share/hadoop-2.7.7.tar.gz
bin/hadoop version

cp   etc/hadoop/hadoop-env.sh etc/hadoop/hadoop-env.sh.bak 
egrep -v "^$|^#" etc/hadoop/hadoop-env.sh.bak  > etc/hadoop/hadoop-env.sh
sed -i 's#\${JAVA_HOME}#/as4k/jdk1.8.0_231#' etc/hadoop/hadoop-env.sh


cat << 'EOF' > etc/hadoop/core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://localhost:9000</value>
    </property>
</configuration>
EOF

cat << 'EOF' > etc/hadoop/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
</configuration>
EOF

bin/hdfs namenode -format
cd /as4k/hadoop-2.7.7
sbin/start-dfs.sh

[root@node5 hadoop-2.7.7]# jps
21460 Jps
21351 SecondaryNameNode
21080 NameNode
21199 DataNode
[root@node5 hadoop-2.7.7]# netstat -lntup
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address           Foreign Address         State       PID/Program name    
tcp        0      0 127.0.0.1:9000          0.0.0.0:*               LISTEN      21080/java          
tcp        0      0 0.0.0.0:50090           0.0.0.0:*               LISTEN      21351/java          
tcp        0      0 127.0.0.1:40812         0.0.0.0:*               LISTEN      21199/java          
tcp        0      0 0.0.0.0:50070           0.0.0.0:*               LISTEN      21080/java          
tcp        0      0 0.0.0.0:22              0.0.0.0:*               LISTEN      3081/sshd           
tcp        0      0 0.0.0.0:50010           0.0.0.0:*               LISTEN      21199/java          
tcp        0      0 0.0.0.0:50075           0.0.0.0:*               LISTEN      21199/java          
tcp        0      0 0.0.0.0:50020           0.0.0.0:*               LISTEN      21199/java          
tcp6       0      0 :::22                   :::*                    LISTEN      3081/sshd           
udp        0      0 127.0.0.1:323           0.0.0.0:*                           19398/chronyd       
udp6       0      0 ::1:323                 :::*                                19398/chronyd       


NameNode - http://localhost:50070/
192.168.1.119:50070


bin/hdfs dfs -mkdir /user
bin/hdfs dfs -mkdir /user/root

bin/hdfs dfs -cat /user/root/output/*


-------------------- 3 YARN on a Single Node -------------------

cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml
cat << 'EOF' > etc/hadoop/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>
EOF


cat << 'EOF' > etc/hadoop/yarn-site.xml
<?xml version="1.0"?>
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
</configuration>
EOF

cd /as4k/hadoop-2.7.7
sbin/start-yarn.sh
ResourceManager - http://localhost:8088/
192.168.1.119:8088

[root@node5 hadoop-2.7.7]# jps
22598 ResourceManager
22694 NodeManager
22918 Jps
21351 SecondaryNameNode
21080 NameNode
21199 DataNode
[root@node5 hadoop-2.7.7]# 
[root@node5 hadoop-2.7.7]# netstat -lntup
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address           Foreign Address         State       PID/Program name    
tcp        0      0 127.0.0.1:9000          0.0.0.0:*               LISTEN      21080/java          
tcp        0      0 0.0.0.0:50090           0.0.0.0:*               LISTEN      21351/java          
tcp        0      0 127.0.0.1:40812         0.0.0.0:*               LISTEN      21199/java          
tcp        0      0 0.0.0.0:50070           0.0.0.0:*               LISTEN      21080/java          
tcp        0      0 0.0.0.0:22              0.0.0.0:*               LISTEN      3081/sshd           
tcp        0      0 0.0.0.0:50010           0.0.0.0:*               LISTEN      21199/java          
tcp        0      0 0.0.0.0:50075           0.0.0.0:*               LISTEN      21199/java          
tcp        0      0 0.0.0.0:50020           0.0.0.0:*               LISTEN      21199/java          
tcp6       0      0 :::8040                 :::*                    LISTEN      22694/java          
tcp6       0      0 :::8042                 :::*                    LISTEN      22694/java          
tcp6       0      0 :::22                   :::*                    LISTEN      3081/sshd           
tcp6       0      0 :::8088                 :::*                    LISTEN      22598/java          
tcp6       0      0 :::13562                :::*                    LISTEN      22694/java          
tcp6       0      0 :::46878                :::*                    LISTEN      22694/java          
tcp6       0      0 :::8030                 :::*                    LISTEN      22598/java          
tcp6       0      0 :::8031                 :::*                    LISTEN      22598/java          
tcp6       0      0 :::8032                 :::*                    LISTEN      22598/java          
tcp6       0      0 :::8033                 :::*                    LISTEN      22598/java          
udp        0      0 127.0.0.1:323           0.0.0.0:*                           19398/chronyd       
udp6       0      0 ::1:323                 :::*                                19398/chronyd       

bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.7.jar grep input output2 'dfs[a-z.]+'

![](https://gitee.com/as4k/xdocs/raw/master/image/2019-12-20-22-20-15.png)

安装hive2.3.6

https://cwiki.apache.org/confluence/display/Hive/GettingStarted

cd /as4k
http://192.168.1.5/chfs/shared/share/apache-hive-2.3.6-bin.tar.gz
tar xvf  apache-hive-2.3.6-bin.tar.gz

cat << 'EOF' >> /etc/profile
export HADOOP_HOME=/as4k/hadoop-2.7.7
export HIVE_HOME=/as4k/apache-hive-2.3.6-bin
export PATH=$HADOOP_HOME/bin:$HIVE_HOME/bin:$PATH
EOF
source /etc/profile
echo $PATH

[root@node5 ~]# echo $PATH
/as4k/hadoop-2.7.7/bin:/as4k/apache-hive-2.3.6-bin/bin:/as4k/jdk1.8.0_231/bin:/as4k/jdk1.8.0_231/jre/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/root/bin

hadoop fs -mkdir -p      /tmp
hadoop fs -mkdir -p      /user/hive/warehouse
hadoop fs -chmod g+w   /tmp
hadoop fs -chmod g+w   /user/hive/warehouse


cd /as4k/apache-hive-2.3.6-bin; schematool -dbType derby -initSchema; hive -e "show databases;"
上面的命令会在当前目录生成两个文件/目录, derby.log metastore_db 如初始化失败可删除之,重新初始化 (先执行hive命令,再初始化会出现此错误)

![](https://gitee.com/as4k/xdocs/raw/master/image/2019-12-22-22-41-47.png)

![](https://gitee.com/as4k/xdocs/raw/master/image/2019-12-20-22-51-05.png)

安装Hive-使用mysql


dir=/data/mysql-data; mkdir -p $dir; docker run --restart always --name mysql-hive -d -v $dir:/var/lib/mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=123456 daocloud.io/library/mysql:5.7.26

[root@node5 apache-hive-2.3.6-bin]# cat conf/hive-site.xml 
<configuration>
    <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://localhost:3306/hivedb?createDatabaseIfNotExist=true</value>
        <description>JDBC connect string for a JDBC metastore</description>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.jdbc.Driver</value>
        <description>Driver class name for a JDBC metastore</description>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>root</value>
        <description>username to use against metastore database</description>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>123456</value>
        <description>password to use against metastore database</description>
    </property>

    <property>
        <name>hive.metastore.warehouse.dir</name>
        <value>/hive/warehouse</value>
        <description>hive default warehouse, if nessecory, change it</description>
    </property>
</configuration>

hadoop fs -mkdir -p /hive/warehouse



cd /as4k/apache-hive-2.3.6-bin/lib;
wget http://aws.as4k.top/chfs/shared/mysql-connector-java-5.1.40.jar


schematool -dbType mysql -initSchema

docker exec mysql-hive mysql -uroot -p123456 hivedb -e "show tables";



hive  开始使用


TODO hive学习之路  https://www.cnblogs.com/qingyunzong/category/1191578.html


参考资料
https://www.cnblogs.com/qingyunzong/p/8708057.html
https://mvnrepository.com/artifact/mysql/mysql-connector-java/5.1.40
https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.40/mysql-connector-java-5.1.40.jar
http://aws.as4k.top/chfs/shared/mysql-connector-java-5.1.40.jar
load data local inpath "/root/student.txt" into table student;

关机重启hive

cd /as4k/hadoop-2.7.7
bash   sbin/stop-dfs.sh 
bash   sbin/stop-yarn.sh 
bash   sbin/start-dfs.sh 
bash   sbin/start-yarn.sh 
sleep 30; hive -e "show databases"

tmp

http://192.168.1.5/chfs/shared/share/apache-hive-1.2.2-bin.tar.gz

https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.18/mysql-connector-java-8.0.18.jar

export HADOOP_HOME=/as4k/hadoop-3.2.1 export HIVE_CONF_DIR=/as4k/apache-hive-3.1.2-bin/conf


cp hive-default.xml.template hive-site.xml
  vim hive-site.xml (配置路径与mysql)
     <property>
        <name>system:java.io.tmpdir</name>
        <value>/user/hive/warehouse</value>
      </property>
      <property>
        <name>system:user.name</name>
        <value>${user.name}</value>
      </property>
     <property>
        <name>hive.metastore.db.type</name>
        <value>mysql</value>
     </property>
     <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://node3:3306/metastore?createDatabaseIfNotExist=true</value>
     </property>
     <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.jdbc.Driver</value>
     </property>
    <property>
      <name>javax.jdo.option.ConnectionUserName</name>
      <value>hive</value>
      <description>user name for connecting to mysql server</description>
    </property>
    <property>
      <name>javax.jdo.option.ConnectionPassword</name>
      <value>123456</value>
      <description>password for connecting to mysql server</description>
    </property>


hadoop fs -mkdir -p /tmp
hadoop fs -mkdir -p /user/hive/warehouse
hadoop fs -chmod g+w   /tmp
hadoop fs -chmod g+w   /user/hive/warehouse
        



/as4k/apache-hive-3.1.2-bin/lib


http://192.168.1.5/chfs/shared/share/apache-hive-3.1.2-bin.tar.gz


export HIVE_HOME=/as4k/apache-hive-3.1.2-bin


schematool -dbType derby -initSchema


https://cwiki.apache.org/confluence/display/Hive/GettingStarted

https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/ClusterSetup.html

bin/schematool -dbType derby -initSchema