CREATE TABLE table_name (col_name data_type, col_name data_type, ...)
CLUSTERED BY (col_name, col_name,...)
[SORTED BY (col_name, col_name,...)[ASC|DESC]]
INTO n BUCKETS
说明
[$host]transwarp> CREATE TABLE bucket_user_info(
> name STRING,
> acc_num STRING,
> password STRING,
> citizen_id STRING,
> bank_acc STRING,
> reg_date STRING,
> acc_level STRING)
> CLUSTERED BY (acc_num)
> INTO 3 BUCKETS;
举例:多层分桶,桶内排序
[$host]transwarp> CREATE TABLE bucket_user_info(
> name STRING,
> acc_num STRING,
> password STRING,
> citizen_id STRING,
> bank_acc STRING,
> reg_date STRING,
> acc_level STRING)
> CLUSTERED BY (acc_num, citizen_id)
> INTO 6 BUCKETS;
桶内排序
[$host]transwarp> CREATE TABLE user_info_bucketed (
> name STRING,
> acc_num STRING,
> password STRING,
> citizen_id STRING,
> bank_acc STRING,
> reg_date STRING)
> PARTITIONED BY (acc_level STRING)
> CLUSTERED BY (acc_num) INTO 25 BUCKETS;
我们在这里根据user_id分桶。现在我们写入数据:
[$host]transwarp> set hive.enforce.bucketing = true;
[$host]transwarp> FROM user_id
> INSERT OVERWRITE TABLE user_info_bucketed
> PARTITION (acc_level='A')
> SELECT name, acc_num, password, citizen_id, bank_acc, reg_date
>WHERE acc_level = 'A';
set hive.enforce.bucketing = true指令让Inceptor自动根据表的分桶填入数据,写入时,Inceptor会尽量均匀地将数据哈希进各个桶中。