Import data from hive to hbase
Recently I need restore data from hive to hbase, I found there are no direct ways to do this by tools like sqoop, so I converted it myself.
1.create hbase namespace and table which contained one columnfamily named basic
create_namespace 'gbif'
create 'gbif.gbif_0004998', 'basic'
1.create intermediate hive table which following hive/hbase tables’s structure
CREATE EXTERNAL TABLE intermediate.hbase_gbif_0004998 (gbifid string, datasetkey string, occurrenceid string, kingdom string, phylum string, class string, orders string, family string, genus string, species string, infraspecificepithet string, taxonrank string, scientificname string, countrycode string, locality string, publishingorgkey string, decimallatitude string, decimallongitude string, coordinateuncertaintyinmeters string, coordinateprecision string, elevation string, elevationaccuracy string, depth string, depthaccuracy string, eventdate string, day string, month string, year string, taxonkey string, specieskey string, basisofrecord string, institutioncode string, collectioncode string, catalognumber string, recordnumber string, identifiedby string, license string, rightsholder string, recordedby string, typestatus string, establishmentmeans string, lastinterpreted string, mediatype string, issue string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,basic:datasetkey,basic:occurrenceid,basic:kingdom,basic:phylum,basic:class,basic:orders,basic:family,basic:genus,basic:species,basic:infraspecificepithet,basic:taxonrank,basic:scientificname,basic:countrycode,basic:locality,basic:publishingorgkey,basic:decimallatitude,basic:decimallongitude,basic:coordinateuncertaintyinmeters,basic:coordinateprecision,basic:elevation,basic:elevationaccuracy,basic:depth,basic:depthaccuracy,basic:eventdate,basic:day,basic:month,basic:year,basic:taxonkey,basic:specieskey,basic:basisofrecord,basic:institutioncode,basic:collectioncode,basic:catalognumber,basic:recordnumber,basic:identifiedby,basic:license,basic:rightsholder,basic:recordedby,basic:typestatus,basic:establishmentmeans,basic:lastinterpreted,basic:mediatype,basic:issue")
TBLPROPERTIES("hbase.table.name" = "gbif.gbif_0004998");
3.insert data into intermediate hive table
insert overwrite table intermediate.hbase_gbif_0004998 select * from gbif.gbif_0004998;
4.get intermediate hive table’s hdfs path
desc formatted intermediate.hbase_gbif_0004998;
#5.import into hbase from hdfs
#hbase --config config_dir org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles
#hdfs://localhost:9000/user/hive/warehouse/intermediate.db/hbase_gbif_0004998
gbif.gbif_0004998
6.check hbase’s data
count 'gbif.gbif_0004998'
...
...
Current count: 326000, row: 986217061
Current count: 327000, row: 991771339
327316 row(s) in 13.6890 seconds
=> 327316
7.get data from hbase table
hbase(main):008:0> get 'gbif.gbif_0004998', '1019778874'
COLUMN CELL
basic:basisofrecord timestamp=1519452831179, value=LIVING_SPECIMEN
basic:catalognumber timestamp=1519452831179, value=A0011
basic:class timestamp=1519452831179, value=Liliopsida
basic:collectioncode timestamp=1519452831179, value=ArxC3xA1ceas
basic:coordinateprecision timestamp=1519452831179, value=
basic:coordinateuncertaintyinmeters timestamp=1519452831179, value=
basic:countrycode timestamp=1519452831179, value=CO
basic:datasetkey timestamp=1519452831179, value=fd5ae2bb-6ee6-4e5c-8428-6284fa385f9a
basic:day timestamp=1519452831179, value=23
basic:decimallatitude timestamp=1519452831179, value=
basic:decimallongitude timestamp=1519452831179, value=
basic:depth timestamp=1519452831179, value=
basic:depthaccuracy timestamp=1519452831179, value=
basic:elevation timestamp=1519452831179, value=
basic:elevationaccuracy timestamp=1519452831179, value=
basic:establishmentmeans timestamp=1519452831179, value=
basic:eventdate timestamp=1519452831179, value=2007-08-23T02:00Z
basic:family timestamp=1519452831179, value=Araceae
basic:genus timestamp=1519452831179, value=Anthurium
basic:identifiedby timestamp=1519452831179, value=
basic:infraspecificepithet timestamp=1519452831179, value=
basic:institutioncode timestamp=1519452831179, value=CorporacixC3xB3n San Jorge
basic:issue timestamp=1519452831179, value=
basic:kingdom timestamp=1519452831179, value=Plantae
basic:lastinterpreted timestamp=1519452831179, value=2018-02-03T23:09Z
basic:license timestamp=1519452831179, value=CC0_1_0
basic:locality timestamp=1519452831179, value=
basic:mediatype timestamp=1519452831179, value=
basic:month timestamp=1519452831179, value=8
basic:occurrenceid timestamp=1519452831179, value=JBSJ:Araceas:A0011
basic:orders timestamp=1519452831179, value=Alismatales
basic:phylum timestamp=1519452831179, value=Tracheophyta
basic:publishingorgkey timestamp=1519452831179, value=1904954c-81e7-4254-9778-ae3deed93de6
basic:recordedby timestamp=1519452831179, value=Oyuela G.
basic:recordnumber timestamp=1519452831179, value=
basic:rightsholder timestamp=1519452831179, value=CorporacixC3xB3n San Jorge
basic:scientificname timestamp=1519452831179, value=Anthurium cabrerense Engl.
basic:species timestamp=1519452831179, value=Anthurium cabrerense
basic:specieskey timestamp=1519452831179, value=2872557
basic:taxonkey timestamp=1519452831179, value=2872557
basic:taxonrank timestamp=1519452831179, value=SPECIES
basic:typestatus timestamp=1519452831179, value=
basic:year timestamp=1519452831179, value=2007
Reply