创建 MySQL 分区数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | DROP TABLE ord_order ; — 创建订单分区表 CREATE TABLE ord_order ( order_id BIGINT NOT NULL AUTO_INCREMENT COMMENT ‘订单ID’ , user_id INT NOT NULL COMMENT ‘用户ID’ , goods_id INT NOT NULL COMMENT ‘商品ID’ , order_price INT NOT NULL DEFAULT 0 COMMENT ‘订单实际价格(分)’ , create_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT ‘创建时间’ , PRIMARY KEY ( order_id , create_time ) ) PARTITION BY LIST ( YEAR ( create_time ) * 100 + MONTH ( create_time ) ) ( PARTITION p201601 VALUES IN ( 201601 ) , PARTITION p201602 VALUES IN ( 201602 ) , PARTITION p201603 VALUES IN ( 201603 ) , PARTITION p201604 VALUES IN ( 201604 ) , PARTITION p201605 VALUES IN ( 201605 ) , PARTITION p201606 VALUES IN ( 201606 ) , PARTITION p201607 VALUES IN ( 201607 ) , PARTITION p201608 VALUES IN ( 201608 ) , PARTITION p201609 VALUES IN ( 201609 ) , PARTITION p201610 VALUES IN ( 201610 ) , PARTITION p201611 VALUES IN ( 201611 ) , PARTITION p201612 VALUES IN ( 201612 ) ) ; — 插入相关数据 INSERT INTO ord_order VALUES ( NULL , 10000001 , 11111111 , 1000 , ‘2016-01-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-01-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-01-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-01-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-01-13 05:00:50’ ) , ( NULL , 10000001 , 11111111 , 1000 , ‘2016-02-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-02-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-02-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-02-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-02-13 05:00:50’ ) , ( NULL , 10000001 , 11111111 , 1000 , ‘2016-03-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-03-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-03-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-03-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-03-13 05:00:50’ ) , ( NULL , 10000001 , 11111111 , 1000 , ‘2016-04-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-04-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-04-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-04-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-04-13 05:00:50’ ) , ( NULL , 10000001 , 11111111 , 1000 , ‘2016-05-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-05-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-05-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-05-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-05-13 05:00:50’ ) , ( NULL , 10000001 , 11111111 , 1000 , ‘2016-06-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-06-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-06-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-06-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-06-13 05:00:50’ ) , ( NULL , 10000001 , 11111111 , 1000 , ‘2016-07-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-07-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-07-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-07-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-07-13 05:00:50’ ) , ( NULL , 10000001 , 11111111 , 1000 , ‘2016-08-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-08-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-08-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-08-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-08-13 05:00:50’ ) , ( NULL , 10000001 , 11111111 , 1000 , ‘2016-09-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-09-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-09-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-09-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-09-13 05:00:50’ ) , ( NULL , 10000001 , 11111111 , 1000 , ‘2016-10-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-10-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-10-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-10-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-10-13 05:00:50’ ) , ( NULL , 10000001 , 11111111 , 1000 , ‘2016-11-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-11-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-11-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-11-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-11-13 05:00:50’ ) , ( NULL , 10000001 , 11111111 , 1000 , ‘2016-12-13 01:00:10’ ) , ( NULL , 10000001 , 11111112 , 2000 , ‘2016-12-13 02:00:20’ ) , ( NULL , 10000001 , 11111113 , 3000 , ‘2016-12-13 03:00:30’ ) , ( NULL , 10000001 , 11111114 , 4000 , ‘2016-12-13 04:00:40’ ) , ( NULL , 10000001 , 11111115 , 5000 , ‘2016-12-13 05:00:50’ ) ; — 查看分区 p201601数据 SELECT * FROM ord_order PARTITION ( p201601 ) ; — 组合成的 row key SELECT CONCAT ( user_id , 10000000000 – UNIX_TIMESTAMP ( create_time ) , goods_id ) FROM ord_order PARTITION ( p201601 ) ; |
创建HBase表 ord_order
由于版本兼容的问题,这边我需要先创建好HBase对应的表。不然会报不能自动创建 column family 的错误。
使用 hbase shell 创建 ord_order 表
1 | hbase ( main ) : 033 : 0 > create ‘ord_order’ , { NAME = > ‘cf1’ } |
使用 Sqoop 将MySQL的ord_order 表的 p201601 分区的数据导入HBase表。
1 2 3 4 5 6 7 8 9 10 11 | / usr / local / sqoop / bin / sqoop import \ — connect jdbc : mysql : //192.168.137.11:3306/test \ — username HH \ — password oracle \ — query ‘SELECT CONCAT(user_id, 10000000000-UNIX_TIMESTAMP(create_time), goods_id) AS order_id, order_price, create_time FROM ord_order PARTITION(p201601) WHERE $CONDITIONS’ \ — hbase – table ord _order \ — hbase – create – table \ — hbase – row – key order _id \ — split – by order _id \ — column – family cf1 \ – m 1 |
导入成功后就可以在MySQL上面将相关分区删除,并且创建之后需要的分区
1 2 3 4 | ALTER TABLE ord_order ADD PARTITION ( PARTITION p201701 VALUES IN ( 201701 ) ) ; ALTER TABLE ord_order DROP PARTITION p201601 ; |
查看Hbase中导入的数据
1 2 3 4 5 6 7 8 9 10 11 12 13 | hbase ( main ) : 001 : 0 > scan ‘ord_order’ ROW COLUMN + CELL 10000001854736755011111115 column = cf1 : create_time , timestamp = 1479224942888 , value = 2016 – 01 – 13 05 : 00 : 50.0 10000001854736755011111115 column = cf1 : order_price , timestamp = 1479224942888 , value = 5000 10000001854737116011111114 column = cf1 : create_time , timestamp = 1479224942888 , value = 2016 – 01 – 13 04 : 00 : 40.0 10000001854737116011111114 column = cf1 : order_price , timestamp = 1479224942888 , value = 4000 10000001854737477011111113 column = cf1 : create_time , timestamp = 1479224942888 , value = 2016 – 01 – 13 03 : 00 : 30.0 10000001854737477011111113 column = cf1 : order_price , timestamp = 1479224942888 , value = 3000 10000001854737838011111112 column = cf1 : create_time , timestamp = 1479224942888 , value = 2016 – 01 – 13 02 : 00 : 20.0 10000001854737838011111112 column = cf1 : order_price , timestamp = 1479224942888 , value = 2000 10000001854738199011111111 column = cf1 : create_time , timestamp = 1479224942888 , value = 2016 – 01 – 13 01 : 00 : 10.0 10000001854738199011111111 column = cf1 : order_price , timestamp = 1479224942888 , value = 1000 5 row ( s ) in 0.5390 seconds |
HBase中的row key为 user_id, 10000000000-UNIX_TIMESTAMP(create_time), goods_id 3个字段组成。
这边值得注意的是 10000000000-UNIX_TIMESTAMP(create_time), 这样设计的原因是为了让订单能按时间的倒序排列, 这样就符合 越新的数据越先显示
如: 现在需要对用户 10000001 的订单进行分页, 每页两条数据, 并且按时间的倒序排序(最新订单最先显示)
1 2 3 4 | hbase ( main ) : 003 : 0 > scan ‘ord_order’ , { COLUMNS = > [ ‘cf1:order_price’ ] , ROWPREFIXFILTER = > ‘10000001’ , LIMIT = > 2 } ROW COLUMN + CELL 10000001854736755011111115 column = cf1 : order_price , timestamp = 1479224942888 , value = 5000 10000001854737116011111114 column = cf1 : order_price , timestamp = 1479224942888 , value = 4000 |
点击下一页的数据:
1 2 3 4 5 6 7 8 9 | hbase ( main ) : 004 : 0 > scan ‘ord_order’ , { COLUMNS = > [ ‘cf1:order_price’ ] , LIMIT = > 3 , STARTROW = > ‘10000001854737116011111114’ } ROW COLUMN + CELL 10000001854737116011111114 column = cf1 : order_price , timestamp = 1479224942888 , value = 4000 10000001854737477011111113 column = cf1 : order_price , timestamp = 1479224942888 , value = 3000 10000001854737838011111112 column = cf1 : order_price , timestamp = 1479224942888 , value = 2000 3 row ( s ) in 0.0260 seconds 上面获得了三行数据,在实际展现的时候去除第一行就好了,实际展示如下 : 10000001854737477011111113 column = cf1 : order_price , timestamp = 1479224942888 , value = 3000 10000001854737838011111112 column = cf1 : order_price , timestamp = 1479224942888 , value = 2000 |
点击上一页
1 2 3 4 5 6 7 8 9 10 11 12 | hbase ( main ) : 008 : 0 > scan ‘ord_order’ , { COLUMNS = > [ ‘cf1:order_price’ ] , LIMIT = > 3 , STARTROW = > ‘10000001854737477011111113’ , REVERSED = > true } ROW COLUMN + CELL 10000001854737477011111113 column = cf1 : order_price , timestamp = 1479224942888 , value = 3000 10000001854737116011111114 column = cf1 : order_price , timestamp = 1479224942888 , value = 4000 10000001854736755011111115 column = cf1 : order_price , timestamp = 1479224942888 , value = 5000 3 row ( s ) in 0.0640 seconds 上面同样获得了三条数据,我们需要去除第一行,让后按数据集合倒序显示 10000001854737116011111114 column = cf1 : order_price , timestamp = 1479224942888 , value = 4000 10000001854736755011111115 column = cf1 : order_price , timestamp = 1479224942888 , value = 5000 ↓↓↓↓↓上面两行是集合数据 下面两行数倒序遍历集合的数据 (也是最终显示的数据 ) 10000001854736755011111115 column = cf1 : order_price , timestamp = 1479224942888 , value = 5000 10000001854737116011111114 column = cf1 : order_price , timestamp = 1479224942888 , value = 4000 |
对 HBase 的设计和使用是门学问,在实际使用中,一般需要和开发产品一起讨论如何设计 row key 比较好。当然,一般多多阅读过来人的经验往往也能够解决很多问题。因为你遇到的问题,别人可能也会遇到。
文章转载来自:trustauth.cn