练习一
编写HQL查询语句(即Hive Shell语言)导入给定文本sample1.txt中的数据到HDFS的一个CSV表stock_data中
然后复制到序列表stock_data2中。
练习二
使用不同的查询语句查询stock_data文件中的数据。
1、从表中检索数据
使用select查询date为‘2017-10-06’的一条数据。
2、检索数据,并限定检索数量
检索数量小于1500000的三条数据。
练习三
用Hive的Java API,使用JDBC完成:
连接Hive服务器
建立数据库
创建数据表,并向其加载数据。
答案:
练习一
创建数据库
bin/hive
hive>create database demo1;
hive>use demo;
创建表
hive>create table stock_data(Date1 string,Open float,High float,Low float,Close float,Volume int,AdjClose float)row format delimited fields terminated by ',' stored as textfile;
hive>create table stock_data2(Date1 string,Open float,High float,Low float,Close float,Volume int,AdjClose float)stored as sequencefile;
查看建表结果
hive>Describe stock_data;
在表中加载数据
hive>load data local inpath '/usr/local/hive/demo/sample.txt' into table stock_data;
将stock_data中的数据复制到stock_data2中。
hive>insert overwrite table stock_data2 select * from stock_data;
练习二
hive>select * from stock_data where date1="2017-10-06";
hive>select * from stock_data where volume<150000 limit 3;
练习三
package com.hive.demo;
import java.util.List;
import org.apache.hadoop.hive.service.ThriftHive;
import org.apache.hadoop.hive.service.ThriftHive.Client;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.hadoop.hive.service.HiveServerException;
import org.apache.thrift.transport.TSocket;
public class Hive_demo_01 {
/**
* @param args
*/
static TSocket transport;
private static Client getClient(String hiveServer, Integer hivePort) {
final int SOME_BIG_NUMBER = 99999999;
Client client = null;
try {
//连接服务器
transport = new TSocket(hiveServer, hivePort);
transport.setTimeout(SOME_BIG_NUMBER);
transport.open();
TBinaryProtocol protocol = new TBinaryProtocol(transport);
client = new ThriftHive.Client(protocol);
//如果连接成功,就打印成功消息到控制台
System.out.println("Connection is established");
return client;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
private Client show_tables(Client c1) {
try {
try {
//执行HQL语句show tables;
c1.execute("show tables");
} catch (TException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//得到查询结果
List<String> li = null;
try {
li = c1.fetchAll();
} catch (TException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println(" *** The Tables List *** ");
//输出查询结果
for (String string : li) {
System.out.println(string);
}
System.out.println(" -------------------------------- ");
Client c2 = c1;
return c2;
} finally {
}
}
private Client create_tables(Client c1) {
try {
//建数据表sample_data
c1.execute("create table sample_data(name string)stored as textfile");
} catch (HiveServerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println(" *** sample_data tables is created *** ");
System.out.println(" -------------------------------- ");
Client c2 = c1;
return c2;
}
private Client load_data(Client c1, String tbl_name) {
try {
//加载本地数据到表中
c1.execute("load data local inpath '/usr/local/hive/sample.txt' into table "+tbl_name);
} catch (HiveServerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println(" *** loaded data into " + tbl_name + " *** ");
System.out.println(" -------------------------------- ");
Client c2 = c1;
return c2;
}
public static void main(String[] args) {
// TODO Auto-generated method stub
//定义hive服务器参数
String HIVE_SERVER = "localhost";
Integer HIVE_PORT = new Integer(10000);
Client client = getClient(HIVE_SERVER, HIVE_PORT);
//建立数据库
Hive_demo_01 obj = new Hive_demo_01();
//执行HQL语句
client = obj.show_tables(client);
System.out.println(" Before Creating the table sample_data ");
client = obj.create_tables(client);
System.out.println(" After Creating the table sample_data ");
client = obj.show_tables(client);
System.out.println(" loading data into sample_data ");
client = obj.load_data(client, "sample_data");
transport.close();
}
}