Import data from hive to hbase

Recently I need restore data from hive to hbase, I found there are no direct ways to do this by tools like sqoop, so I converted it myself.

1.create hbase namespace and table which contained one columnfamily named basic

create_namespace 'gbif'
create 'gbif.gbif_0004998', 'basic'

1.create intermediate hive table which following hive/hbase tables’s structure

CREATE EXTERNAL TABLE intermediate.hbase_gbif_0004998 (gbifid string, datasetkey string, occurrenceid string, kingdom string, phylum string, class string, orders string, family string, genus string, species string, infraspecificepithet string, taxonrank string, scientificname string, countrycode string, locality string, publishingorgkey string, decimallatitude string, decimallongitude string, coordinateuncertaintyinmeters string, coordinateprecision string, elevation string, elevationaccuracy string, depth string, depthaccuracy string, eventdate string, day string, month string, year string, taxonkey string, specieskey string, basisofrecord string, institutioncode string, collectioncode string, catalognumber string, recordnumber string, identifiedby string, license string, rightsholder string, recordedby string, typestatus string, establishmentmeans string, lastinterpreted string, mediatype string, issue string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' 
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,basic:datasetkey,basic:occurrenceid,basic:kingdom,basic:phylum,basic:class,basic:orders,basic:family,basic:genus,basic:species,basic:infraspecificepithet,basic:taxonrank,basic:scientificname,basic:countrycode,basic:locality,basic:publishingorgkey,basic:decimallatitude,basic:decimallongitude,basic:coordinateuncertaintyinmeters,basic:coordinateprecision,basic:elevation,basic:elevationaccuracy,basic:depth,basic:depthaccuracy,basic:eventdate,basic:day,basic:month,basic:year,basic:taxonkey,basic:specieskey,basic:basisofrecord,basic:institutioncode,basic:collectioncode,basic:catalognumber,basic:recordnumber,basic:identifiedby,basic:license,basic:rightsholder,basic:recordedby,basic:typestatus,basic:establishmentmeans,basic:lastinterpreted,basic:mediatype,basic:issue") 
TBLPROPERTIES("hbase.table.name" = "gbif.gbif_0004998");

3.insert data into intermediate hive table

insert overwrite table intermediate.hbase_gbif_0004998 select * from gbif.gbif_0004998;

4.get intermediate hive table’s hdfs path

desc formatted intermediate.hbase_gbif_0004998;

#5.import into hbase from hdfs

#hbase --config config_dir org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles 
#hdfs://localhost:9000/user/hive/warehouse/intermediate.db/hbase_gbif_0004998 
gbif.gbif_0004998

6.check hbase’s data

count 'gbif.gbif_0004998'

...
 ...
 Current count: 326000, row: 986217061
 Current count: 327000, row: 991771339
 327316 row(s) in 13.6890 seconds

=> 327316

7.get data from hbase table

hbase(main):008:0> get 'gbif.gbif_0004998', '1019778874'
COLUMN CELL 
basic:basisofrecord timestamp=1519452831179, value=LIVING_SPECIMEN 
basic:catalognumber timestamp=1519452831179, value=A0011 
basic:class timestamp=1519452831179, value=Liliopsida 
basic:collectioncode timestamp=1519452831179, value=ArxC3xA1ceas 
basic:coordinateprecision timestamp=1519452831179, value= 
basic:coordinateuncertaintyinmeters timestamp=1519452831179, value= 
basic:countrycode timestamp=1519452831179, value=CO 
basic:datasetkey timestamp=1519452831179, value=fd5ae2bb-6ee6-4e5c-8428-6284fa385f9a 
basic:day timestamp=1519452831179, value=23 
basic:decimallatitude timestamp=1519452831179, value= 
basic:decimallongitude timestamp=1519452831179, value= 
basic:depth timestamp=1519452831179, value= 
basic:depthaccuracy timestamp=1519452831179, value= 
basic:elevation timestamp=1519452831179, value= 
basic:elevationaccuracy timestamp=1519452831179, value= 
basic:establishmentmeans timestamp=1519452831179, value= 
basic:eventdate timestamp=1519452831179, value=2007-08-23T02:00Z 
basic:family timestamp=1519452831179, value=Araceae 
basic:genus timestamp=1519452831179, value=Anthurium 
basic:identifiedby timestamp=1519452831179, value= 
basic:infraspecificepithet timestamp=1519452831179, value= 
basic:institutioncode timestamp=1519452831179, value=CorporacixC3xB3n San Jorge 
basic:issue timestamp=1519452831179, value= 
basic:kingdom timestamp=1519452831179, value=Plantae 
basic:lastinterpreted timestamp=1519452831179, value=2018-02-03T23:09Z 
basic:license timestamp=1519452831179, value=CC0_1_0 
basic:locality timestamp=1519452831179, value= 
basic:mediatype timestamp=1519452831179, value= 
basic:month timestamp=1519452831179, value=8 
basic:occurrenceid timestamp=1519452831179, value=JBSJ:Araceas:A0011 
basic:orders timestamp=1519452831179, value=Alismatales 
basic:phylum timestamp=1519452831179, value=Tracheophyta 
basic:publishingorgkey timestamp=1519452831179, value=1904954c-81e7-4254-9778-ae3deed93de6 
basic:recordedby timestamp=1519452831179, value=Oyuela G. 
basic:recordnumber timestamp=1519452831179, value= 
basic:rightsholder timestamp=1519452831179, value=CorporacixC3xB3n San Jorge 
basic:scientificname timestamp=1519452831179, value=Anthurium cabrerense Engl. 
basic:species timestamp=1519452831179, value=Anthurium cabrerense 
basic:specieskey timestamp=1519452831179, value=2872557 
basic:taxonkey timestamp=1519452831179, value=2872557 
basic:taxonrank timestamp=1519452831179, value=SPECIES 
basic:typestatus timestamp=1519452831179, value= 
basic:year timestamp=1519452831179, value=2007