[[151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645,
198, 151644, 872, 198, 151652, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,
151655, 151653, 14880, 101042, 45930, 62926, 107439, 55338, 101479, 108704,
43815, 3837, 59879, 45181, 77559, 26939, 64817, 5373, 45181, 17447,
26939, 101373, 102006, 3837, 31526, 100073, 108704, 151645, 198, 151644,
77091, 198]]
Tensor[[1, 302], u32, cuda:0]

grid_thw: [[1, 16, 64]] -> merge后 [[1, 8, 32]], 一共256个token
前15个token生成的position_ids

[[0, 1, 2, ... 12, 13, 14],
[0, 1, 2, ... 12, 13, 14],
[0, 1, 2, ... 12, 13, 14]]
shape: (3, 15)
中间256个视觉token生成的position_ids:

[[15, 15, 15, ... 15, 15, 15],
[15, 15, 15, ..., 16, 16, 16, ..., 17, 17, 17, ...22, 22, 22],
[15, 16,...45,46, 15, 16,...45,46,..., 15, 16,...45,46]]
shape: (3, 256)
后面31个text token生成的position_ids:

[[47, 48, 49, ... 75, 76, 77],
[47, 48, 49, ... 75, 76, 77],
[47, 48, 49, ... 75, 76, 77]]
shape: (3, 31)
生成过程中的position_ids由之前的最大值加1得到
常见因果注意力模型