X-Notes > Hive > 内置函数 > UDTF 表生成函数

UDTF 表生成函数

UDTF（Built-in Table-Generating Functions）

UDF：User-Defined Functions
T：Table-Generating

table-generating functions transform a single input row to multiple output rows

表生成函数将单个输入行转换为多个输出行

`explode` 与 `lateral view`

explode(ARRAY<T> a) 或 explode(MAP<Tkey,Tvalue> m)
- 将一个 Array 炸裂（explode，直译是爆炸的意思）成单列多行
- 将一个 Map 炸裂成双列多行，一列是 key ，一列是 value
- ```
-- 就是将一行中的 array 或 map 的复杂结构拆分成多行
select explode(array('A','B','C'));
select explode(map('a', 8, 'b', 88, 'c', 888));
```

lateral view

炸裂函数很方便，但在使用的时候存在局限性：

select '111', explode(array('A','B','C'));
select explode(explode(map('a', array('a','b','c'), 'A' , array('A','B','C'))));
-- FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested in expressions
select explode(array('A','B','C')), 'col1';
select explode(array('A','B','C')), explode(array('a','b','c'));
-- FAILED: SemanticException 1:36 Only a single expression in the SELECT clause is supported with UDTF's. Error encountered near token ''col1''

直接使用的时候，炸裂函数只能单独使用：既不能有其他列，也不能嵌套

而配合 lateral view （直译是侧视图）使用，可以规避这些局限
- Lateral view is used in conjunction with user-defined table generating functions such as explode()
- 侧视图的设计意图就是为了解决 UDTF 的这些问题

with t1 as (
    select '111' col1, array('A','B','C') col2
)
select col1, col3
from t1
lateral view explode(col2) t2 as col3;

with t1 as (
    select map('a', 8, 'b', 88, 'c', 888) col1, '222' col2
)
select col3, col4, col2
from t1
lateral view explode(col1) t2 as col3, col4;

-- （避免使用）两个炸裂函数并列使用，会出现笛卡尔积
with t1 as (
    select array('a','b','c') col1, array('A','B','C') col2
)
select col3, col4
from t1
lateral view explode(col1) t2 as col3
lateral view explode(col2) t3 as col4;

-- （避免使用）嵌套，也会产生笛卡尔积
with tmp1 as (
    select '嵌套' col1, map('a', array('a','b','c'), 'A' , array('A','B','C')) col2
)
select col1, col3, col5
from(
    select col1, col3, col4
    from tmp1
    lateral view explode(col2) t2 as col3, col4
) tmp2
lateral view explode(col4) t3 as col5;

posexplode(ARRAY<T> a)
- 类似于 explode(ARRAY<T> a)
- 区别在于炸成两列，多一列对应数组中元素的 index
- ```
select posexplode(array('A','B','C'));
```

`replicate_rows(n, cols...)`

将一行 “重复” 成 n 行
n 要求是 long 类型

select * from test.test limit 5;
select replicate_rows(2L, name) from test.test limit 5;
select replicate_rows(3L, name, art_score) from test.test limit 5;

其他

inline(ARRAY<STRUCT<f1:T1,...,fn:Tn>> a)

将 STRUCT 结构数组分解为多行
STRUCT 有几列，就返回几列

select
    array(
        named_struct('name', '刘一', 'course', '语文', 'score', 70),
        named_struct('name', '刘一', 'course', '数学', 'score', 80),
        named_struct('name', '刘一', 'course', '英语', 'score', 75),
        named_struct('name', '陈二', 'course', '语文', 'score', 65),
        named_struct('name', '陈二', 'course', '数学', 'score', 60),
        named_struct('name', '陈二', 'course', '英语', 'score', 93),
        named_struct('name', '张三', 'course', '语文', 'score', 75),
        named_struct('name', '张三', 'course', '数学', 'score', 73),
        named_struct('name', '张三', 'course', '英语', 'score', 79)
    );

with tmp as(
    select
        array(
            named_struct('name', '刘一', 'course', '语文', 'score', 70),
            named_struct('name', '刘一', 'course', '数学', 'score', 80),
            named_struct('name', '刘一', 'course', '英语', 'score', 75),
            named_struct('name', '陈二', 'course', '语文', 'score', 65),
            named_struct('name', '陈二', 'course', '数学', 'score', 60),
            named_struct('name', '陈二', 'course', '英语', 'score', 93),
            named_struct('name', '张三', 'course', '语文', 'score', 75),
            named_struct('name', '张三', 'course', '数学', 'score', 73),
            named_struct('name', '张三', 'course', '英语', 'score', 79)
        ) as scores
)
select inline(scores) from tmp;

with tmp as(
    select
        '2020-02-02' as `date`,
        named_struct('name', '刘一', 'score', array(70,80,75)) as scores
    union all
    select
        '2020-02-02' as `date`,
        named_struct('name', '陈二', 'score', array(65,60,93)) as scores
    union all
    select
        '2020-02-02' as `date`,
        named_struct('name', '张三', 'score', array(75,73,79)) as scores
)
select `date`, t1.name, t1.course, t1.score
from tmp
lateral view inline(
    array(
        struct(scores.name, '语文', scores.score[0]),
        struct(scores.name, '数学', scores.score[1]),
        struct(scores.name, '英语', scores.score[2])
    )
)t1 as name, course, score;

stack(int n,T1 V1,...,Tn/r Vn)

turns k columns into n rows of size k/n each
将 k 列（原列数）的数据分解为 n 行，每行 k/n 列（k 必须是 n 的整数倍）
用于将原本拼成一行的，相同列数的，相同列顺序，相同列类型的数据重新拆分成多行

select stack(3,'1','2','3');
select stack(3,'1','2','3','4','5','6');
select stack(3,'1','2','3','4','5','6','7','8','9');

select stack(2,'A',10,date '2015-01-01','B',20,date '2016-01-01');
select stack(2,'A',10,date '2015-01-01','B',20,date '2016-01-01') as (cola,colb,colc);

with tmp as(
    select 
        'test' as cola,
        struct('A',10,date '2015-01-01','B',20,date '2016-01-01') as colb
)
select cola,`col-0`,`col-1`,`col-2`
from tmp
lateral view stack(
    2,
    -- colb.*
    colb.col1, colb.col2, colb.col3,
    colb.col4, colb.col5, colb.col6
) t1 as `col-0`,`col-1`,`col-2`;

UDTF 表生成函数

explode 与 lateral view

replicate_rows(n, cols...)

其他

`explode` 与 `lateral view`

`replicate_rows(n, cols...)`