hva-h264.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056
  1. /*
  2. * Copyright (C) STMicroelectronics SA 2015
  3. * Authors: Yannick Fertre <yannick.fertre@st.com>
  4. * Hugues Fruchet <hugues.fruchet@st.com>
  5. * License terms: GNU General Public License (GPL), version 2
  6. */
  7. #include "hva.h"
  8. #include "hva-hw.h"
  9. #define MAX_SPS_PPS_SIZE 128
  10. #define BITSTREAM_OFFSET_MASK 0x7F
  11. /* video max size*/
  12. #define H264_MAX_SIZE_W 1920
  13. #define H264_MAX_SIZE_H 1920
  14. /* macroBlocs number (width & height) */
  15. #define MB_W(w) ((w + 0xF) / 0x10)
  16. #define MB_H(h) ((h + 0xF) / 0x10)
  17. /* formula to get temporal or spatial data size */
  18. #define DATA_SIZE(w, h) (MB_W(w) * MB_H(h) * 16)
  19. #define SEARCH_WINDOW_BUFFER_MAX_SIZE(w) ((4 * MB_W(w) + 42) * 256 * 3 / 2)
  20. #define CABAC_CONTEXT_BUFFER_MAX_SIZE(w) (MB_W(w) * 16)
  21. #define CTX_MB_BUFFER_MAX_SIZE(w) (MB_W(w) * 16 * 8)
  22. #define SLICE_HEADER_SIZE (4 * 16)
  23. #define BRC_DATA_SIZE (5 * 16)
  24. /* source buffer copy in YUV 420 MB-tiled format with size=16*256*3/2 */
  25. #define CURRENT_WINDOW_BUFFER_MAX_SIZE (16 * 256 * 3 / 2)
  26. /*
  27. * 4 lines of pixels (in Luma, Chroma blue and Chroma red) of top MB
  28. * for deblocking with size=4*16*MBx*2
  29. */
  30. #define LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(w) (4 * 16 * MB_W(w) * 2)
  31. /* factor for bitrate and cpb buffer size max values if profile >= high */
  32. #define H264_FACTOR_HIGH 1200
  33. /* factor for bitrate and cpb buffer size max values if profile < high */
  34. #define H264_FACTOR_BASELINE 1000
  35. /* number of bytes for NALU_TYPE_FILLER_DATA header and footer */
  36. #define H264_FILLER_DATA_SIZE 6
  37. struct h264_profile {
  38. enum v4l2_mpeg_video_h264_level level;
  39. u32 max_mb_per_seconds;
  40. u32 max_frame_size;
  41. u32 max_bitrate;
  42. u32 max_cpb_size;
  43. u32 min_comp_ratio;
  44. };
  45. static const struct h264_profile h264_infos_list[] = {
  46. {V4L2_MPEG_VIDEO_H264_LEVEL_1_0, 1485, 99, 64, 175, 2},
  47. {V4L2_MPEG_VIDEO_H264_LEVEL_1B, 1485, 99, 128, 350, 2},
  48. {V4L2_MPEG_VIDEO_H264_LEVEL_1_1, 3000, 396, 192, 500, 2},
  49. {V4L2_MPEG_VIDEO_H264_LEVEL_1_2, 6000, 396, 384, 1000, 2},
  50. {V4L2_MPEG_VIDEO_H264_LEVEL_1_3, 11880, 396, 768, 2000, 2},
  51. {V4L2_MPEG_VIDEO_H264_LEVEL_2_0, 11880, 396, 2000, 2000, 2},
  52. {V4L2_MPEG_VIDEO_H264_LEVEL_2_1, 19800, 792, 4000, 4000, 2},
  53. {V4L2_MPEG_VIDEO_H264_LEVEL_2_2, 20250, 1620, 4000, 4000, 2},
  54. {V4L2_MPEG_VIDEO_H264_LEVEL_3_0, 40500, 1620, 10000, 10000, 2},
  55. {V4L2_MPEG_VIDEO_H264_LEVEL_3_1, 108000, 3600, 14000, 14000, 4},
  56. {V4L2_MPEG_VIDEO_H264_LEVEL_3_2, 216000, 5120, 20000, 20000, 4},
  57. {V4L2_MPEG_VIDEO_H264_LEVEL_4_0, 245760, 8192, 20000, 25000, 4},
  58. {V4L2_MPEG_VIDEO_H264_LEVEL_4_1, 245760, 8192, 50000, 62500, 2},
  59. {V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 522240, 8704, 50000, 62500, 2},
  60. {V4L2_MPEG_VIDEO_H264_LEVEL_5_0, 589824, 22080, 135000, 135000, 2},
  61. {V4L2_MPEG_VIDEO_H264_LEVEL_5_1, 983040, 36864, 240000, 240000, 2}
  62. };
  63. enum hva_brc_type {
  64. BRC_TYPE_NONE = 0,
  65. BRC_TYPE_CBR = 1,
  66. BRC_TYPE_VBR = 2,
  67. BRC_TYPE_VBR_LOW_DELAY = 3
  68. };
  69. enum hva_entropy_coding_mode {
  70. CAVLC = 0,
  71. CABAC = 1
  72. };
  73. enum hva_picture_coding_type {
  74. PICTURE_CODING_TYPE_I = 0,
  75. PICTURE_CODING_TYPE_P = 1,
  76. PICTURE_CODING_TYPE_B = 2
  77. };
  78. enum hva_h264_sampling_mode {
  79. SAMPLING_MODE_NV12 = 0,
  80. SAMPLING_MODE_UYVY = 1,
  81. SAMPLING_MODE_RGB3 = 3,
  82. SAMPLING_MODE_XRGB4 = 4,
  83. SAMPLING_MODE_NV21 = 8,
  84. SAMPLING_MODE_VYUY = 9,
  85. SAMPLING_MODE_BGR3 = 11,
  86. SAMPLING_MODE_XBGR4 = 12,
  87. SAMPLING_MODE_RGBX4 = 20,
  88. SAMPLING_MODE_BGRX4 = 28
  89. };
  90. enum hva_h264_nalu_type {
  91. NALU_TYPE_UNKNOWN = 0,
  92. NALU_TYPE_SLICE = 1,
  93. NALU_TYPE_SLICE_DPA = 2,
  94. NALU_TYPE_SLICE_DPB = 3,
  95. NALU_TYPE_SLICE_DPC = 4,
  96. NALU_TYPE_SLICE_IDR = 5,
  97. NALU_TYPE_SEI = 6,
  98. NALU_TYPE_SPS = 7,
  99. NALU_TYPE_PPS = 8,
  100. NALU_TYPE_AU_DELIMITER = 9,
  101. NALU_TYPE_SEQ_END = 10,
  102. NALU_TYPE_STREAM_END = 11,
  103. NALU_TYPE_FILLER_DATA = 12,
  104. NALU_TYPE_SPS_EXT = 13,
  105. NALU_TYPE_PREFIX_UNIT = 14,
  106. NALU_TYPE_SUBSET_SPS = 15,
  107. NALU_TYPE_SLICE_AUX = 19,
  108. NALU_TYPE_SLICE_EXT = 20
  109. };
  110. enum hva_h264_sei_payload_type {
  111. SEI_BUFFERING_PERIOD = 0,
  112. SEI_PICTURE_TIMING = 1,
  113. SEI_STEREO_VIDEO_INFO = 21,
  114. SEI_FRAME_PACKING_ARRANGEMENT = 45
  115. };
  116. /**
  117. * stereo Video Info struct
  118. */
  119. struct hva_h264_stereo_video_sei {
  120. u8 field_views_flag;
  121. u8 top_field_is_left_view_flag;
  122. u8 current_frame_is_left_view_flag;
  123. u8 next_frame_is_second_view_flag;
  124. u8 left_view_self_contained_flag;
  125. u8 right_view_self_contained_flag;
  126. };
  127. /**
  128. * @frame_width: width in pixels of the buffer containing the input frame
  129. * @frame_height: height in pixels of the buffer containing the input frame
  130. * @frame_num: the parameter to be written in the slice header
  131. * @picture_coding_type: type I, P or B
  132. * @pic_order_cnt_type: POC mode, as defined in H264 std : can be 0,1,2
  133. * @first_picture_in_sequence: flag telling to encoder that this is the
  134. * first picture in a video sequence.
  135. * Used for VBR
  136. * @slice_size_type: 0 = no constraint to close the slice
  137. * 1= a slice is closed as soon as the slice_mb_size limit
  138. * is reached
  139. * 2= a slice is closed as soon as the slice_byte_size limit
  140. * is reached
  141. * 3= a slice is closed as soon as either the slice_byte_size
  142. * limit or the slice_mb_size limit is reached
  143. * @slice_mb_size: defines the slice size in number of macroblocks
  144. * (used when slice_size_type=1 or slice_size_type=3)
  145. * @ir_param_option: defines the number of macroblocks per frame to be
  146. * refreshed by AIR algorithm OR the refresh period
  147. * by CIR algorithm
  148. * @intra_refresh_type: enables the adaptive intra refresh algorithm.
  149. * Disable=0 / Adaptative=1 and Cycle=2 as intra refresh
  150. * @use_constrained_intra_flag: constrained_intra_pred_flag from PPS
  151. * @transform_mode: controls the use of 4x4/8x8 transform mode
  152. * @disable_deblocking_filter_idc:
  153. * 0: specifies that all luma and chroma block edges of
  154. * the slice are filtered.
  155. * 1: specifies that deblocking is disabled for all block
  156. * edges of the slice.
  157. * 2: specifies that all luma and chroma block edges of
  158. * the slice are filtered with exception of the block edges
  159. * that coincide with slice boundaries
  160. * @slice_alpha_c0_offset_div2: to be written in slice header,
  161. * controls deblocking
  162. * @slice_beta_offset_div2: to be written in slice header,
  163. * controls deblocking
  164. * @encoder_complexity: encoder complexity control (IME).
  165. * 0 = I_16x16, P_16x16, Full ME Complexity
  166. * 1 = I_16x16, I_NxN, P_16x16, Full ME Complexity
  167. * 2 = I_16x16, I_NXN, P_16x16, P_WxH, Full ME Complexity
  168. * 4 = I_16x16, P_16x16, Reduced ME Complexity
  169. * 5 = I_16x16, I_NxN, P_16x16, Reduced ME Complexity
  170. * 6 = I_16x16, I_NXN, P_16x16, P_WxH, Reduced ME Complexity
  171. * @chroma_qp_index_offset: coming from picture parameter set
  172. * (PPS see [H.264 STD] 7.4.2.2)
  173. * @entropy_coding_mode: entropy coding mode.
  174. * 0 = CAVLC
  175. * 1 = CABAC
  176. * @brc_type: selects the bit-rate control algorithm
  177. * 0 = constant Qp, (no BRC)
  178. * 1 = CBR
  179. * 2 = VBR
  180. * @quant: Quantization param used in case of fix QP encoding (no BRC)
  181. * @non_VCL_NALU_Size: size of non-VCL NALUs (SPS, PPS, filler),
  182. * used by BRC
  183. * @cpb_buffer_size: size of Coded Picture Buffer, used by BRC
  184. * @bit_rate: target bitrate, for BRC
  185. * @qp_min: min QP threshold
  186. * @qp_max: max QP threshold
  187. * @framerate_num: target framerate numerator , used by BRC
  188. * @framerate_den: target framerate denomurator , used by BRC
  189. * @delay: End-to-End Initial Delay
  190. * @strict_HRD_compliancy: flag for HDR compliancy (1)
  191. * May impact quality encoding
  192. * @addr_source_buffer: address of input frame buffer for current frame
  193. * @addr_fwd_Ref_Buffer: address of reference frame buffer
  194. * @addr_rec_buffer: address of reconstructed frame buffer
  195. * @addr_output_bitstream_start: output bitstream start address
  196. * @addr_output_bitstream_end: output bitstream end address
  197. * @addr_external_sw : address of external search window
  198. * @addr_lctx : address of context picture buffer
  199. * @addr_local_rec_buffer: address of local reconstructed buffer
  200. * @addr_spatial_context: address of spatial context buffer
  201. * @bitstream_offset: offset in bits between aligned bitstream start
  202. * address and first bit to be written by HVA.
  203. * Range value is [0..63]
  204. * @sampling_mode: Input picture format .
  205. * 0: YUV420 semi_planar Interleaved
  206. * 1: YUV422 raster Interleaved
  207. * @addr_param_out: address of output parameters structure
  208. * @addr_scaling_matrix: address to the coefficient of
  209. * the inverse scaling matrix
  210. * @addr_scaling_matrix_dir: address to the coefficient of
  211. * the direct scaling matrix
  212. * @addr_cabac_context_buffer: address of cabac context buffer
  213. * @GmvX: Input information about the horizontal global displacement of
  214. * the encoded frame versus the previous one
  215. * @GmvY: Input information about the vertical global displacement of
  216. * the encoded frame versus the previous one
  217. * @window_width: width in pixels of the window to be encoded inside
  218. * the input frame
  219. * @window_height: width in pixels of the window to be encoded inside
  220. * the input frame
  221. * @window_horizontal_offset: horizontal offset in pels for input window
  222. * within input frame
  223. * @window_vertical_offset: vertical offset in pels for input window
  224. * within input frame
  225. * @addr_roi: Map of QP offset for the Region of Interest algorithm and
  226. * also used for Error map.
  227. * Bit 0-6 used for qp offset (value -64 to 63).
  228. * Bit 7 used to force intra
  229. * @addr_slice_header: address to slice header
  230. * @slice_header_size_in_bits: size in bits of the Slice header
  231. * @slice_header_offset0: Slice header offset where to insert
  232. * first_Mb_in_slice
  233. * @slice_header_offset1: Slice header offset where to insert
  234. * slice_qp_delta
  235. * @slice_header_offset2: Slice header offset where to insert
  236. * num_MBs_in_slice
  237. * @slice_synchro_enable: enable "slice ready" interrupt after each slice
  238. * @max_slice_number: Maximum number of slice in a frame
  239. * (0 is strictly forbidden)
  240. * @rgb2_yuv_y_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
  241. * YUV for the Y component.
  242. * Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
  243. * @rgb2_yuv_u_coeff: four coefficients (C0C1C2C3) to convert from RGB to
  244. * YUV for the Y component.
  245. * Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
  246. * @rgb2_yuv_v_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
  247. * YUV for the U (Cb) component.
  248. * U = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
  249. * @slice_byte_size: maximum slice size in bytes
  250. * (used when slice_size_type=2 or slice_size_type=3)
  251. * @max_air_intra_mb_nb: Maximum number of intra macroblock in a frame
  252. * for the AIR algorithm
  253. * @brc_no_skip: Disable skipping in the Bitrate Controller
  254. * @addr_brc_in_out_parameter: address of static buffer for BRC parameters
  255. */
  256. struct hva_h264_td {
  257. u16 frame_width;
  258. u16 frame_height;
  259. u32 frame_num;
  260. u16 picture_coding_type;
  261. u16 reserved1;
  262. u16 pic_order_cnt_type;
  263. u16 first_picture_in_sequence;
  264. u16 slice_size_type;
  265. u16 reserved2;
  266. u32 slice_mb_size;
  267. u16 ir_param_option;
  268. u16 intra_refresh_type;
  269. u16 use_constrained_intra_flag;
  270. u16 transform_mode;
  271. u16 disable_deblocking_filter_idc;
  272. s16 slice_alpha_c0_offset_div2;
  273. s16 slice_beta_offset_div2;
  274. u16 encoder_complexity;
  275. s16 chroma_qp_index_offset;
  276. u16 entropy_coding_mode;
  277. u16 brc_type;
  278. u16 quant;
  279. u32 non_vcl_nalu_size;
  280. u32 cpb_buffer_size;
  281. u32 bit_rate;
  282. u16 qp_min;
  283. u16 qp_max;
  284. u16 framerate_num;
  285. u16 framerate_den;
  286. u16 delay;
  287. u16 strict_hrd_compliancy;
  288. u32 addr_source_buffer;
  289. u32 addr_fwd_ref_buffer;
  290. u32 addr_rec_buffer;
  291. u32 addr_output_bitstream_start;
  292. u32 addr_output_bitstream_end;
  293. u32 addr_external_sw;
  294. u32 addr_lctx;
  295. u32 addr_local_rec_buffer;
  296. u32 addr_spatial_context;
  297. u16 bitstream_offset;
  298. u16 sampling_mode;
  299. u32 addr_param_out;
  300. u32 addr_scaling_matrix;
  301. u32 addr_scaling_matrix_dir;
  302. u32 addr_cabac_context_buffer;
  303. u32 reserved3;
  304. u32 reserved4;
  305. s16 gmv_x;
  306. s16 gmv_y;
  307. u16 window_width;
  308. u16 window_height;
  309. u16 window_horizontal_offset;
  310. u16 window_vertical_offset;
  311. u32 addr_roi;
  312. u32 addr_slice_header;
  313. u16 slice_header_size_in_bits;
  314. u16 slice_header_offset0;
  315. u16 slice_header_offset1;
  316. u16 slice_header_offset2;
  317. u32 reserved5;
  318. u32 reserved6;
  319. u16 reserved7;
  320. u16 reserved8;
  321. u16 slice_synchro_enable;
  322. u16 max_slice_number;
  323. u32 rgb2_yuv_y_coeff;
  324. u32 rgb2_yuv_u_coeff;
  325. u32 rgb2_yuv_v_coeff;
  326. u32 slice_byte_size;
  327. u16 max_air_intra_mb_nb;
  328. u16 brc_no_skip;
  329. u32 addr_temporal_context;
  330. u32 addr_brc_in_out_parameter;
  331. };
  332. /**
  333. * @ slice_size: slice size
  334. * @ slice_start_time: start time
  335. * @ slice_stop_time: stop time
  336. * @ slice_num: slice number
  337. */
  338. struct hva_h264_slice_po {
  339. u32 slice_size;
  340. u32 slice_start_time;
  341. u32 slice_end_time;
  342. u32 slice_num;
  343. };
  344. /**
  345. * @ bitstream_size: bitstream size
  346. * @ dct_bitstream_size: dtc bitstream size
  347. * @ stuffing_bits: number of stuffing bits inserted by the encoder
  348. * @ removal_time: removal time of current frame (nb of ticks 1/framerate)
  349. * @ hvc_start_time: hvc start time
  350. * @ hvc_stop_time: hvc stop time
  351. * @ slice_count: slice count
  352. */
  353. struct hva_h264_po {
  354. u32 bitstream_size;
  355. u32 dct_bitstream_size;
  356. u32 stuffing_bits;
  357. u32 removal_time;
  358. u32 hvc_start_time;
  359. u32 hvc_stop_time;
  360. u32 slice_count;
  361. u32 reserved0;
  362. struct hva_h264_slice_po slice_params[16];
  363. };
  364. struct hva_h264_task {
  365. struct hva_h264_td td;
  366. struct hva_h264_po po;
  367. };
  368. /**
  369. * @seq_info: sequence information buffer
  370. * @ref_frame: reference frame buffer
  371. * @rec_frame: reconstructed frame buffer
  372. * @task: task descriptor
  373. */
  374. struct hva_h264_ctx {
  375. struct hva_buffer *seq_info;
  376. struct hva_buffer *ref_frame;
  377. struct hva_buffer *rec_frame;
  378. struct hva_buffer *task;
  379. };
  380. static int hva_h264_fill_slice_header(struct hva_ctx *pctx,
  381. u8 *slice_header_addr,
  382. struct hva_controls *ctrls,
  383. int frame_num,
  384. u16 *header_size,
  385. u16 *header_offset0,
  386. u16 *header_offset1,
  387. u16 *header_offset2)
  388. {
  389. /*
  390. * with this HVA hardware version, part of the slice header is computed
  391. * on host and part by hardware.
  392. * The part of host is precomputed and available through this array.
  393. */
  394. struct device *dev = ctx_to_dev(pctx);
  395. int cabac = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC;
  396. const unsigned char slice_header[] = { 0x00, 0x00, 0x00, 0x01,
  397. 0x41, 0x34, 0x07, 0x00};
  398. int idr_pic_id = frame_num % 2;
  399. enum hva_picture_coding_type type;
  400. u32 frame_order = frame_num % ctrls->gop_size;
  401. if (!(frame_num % ctrls->gop_size))
  402. type = PICTURE_CODING_TYPE_I;
  403. else
  404. type = PICTURE_CODING_TYPE_P;
  405. memcpy(slice_header_addr, slice_header, sizeof(slice_header));
  406. *header_size = 56;
  407. *header_offset0 = 40;
  408. *header_offset1 = 13;
  409. *header_offset2 = 0;
  410. if (type == PICTURE_CODING_TYPE_I) {
  411. slice_header_addr[4] = 0x65;
  412. slice_header_addr[5] = 0x11;
  413. /* toggle the I frame */
  414. if ((frame_num / ctrls->gop_size) % 2) {
  415. *header_size += 4;
  416. *header_offset1 += 4;
  417. slice_header_addr[6] = 0x04;
  418. slice_header_addr[7] = 0x70;
  419. } else {
  420. *header_size += 2;
  421. *header_offset1 += 2;
  422. slice_header_addr[6] = 0x09;
  423. slice_header_addr[7] = 0xC0;
  424. }
  425. } else {
  426. if (ctrls->entropy_mode == cabac) {
  427. *header_size += 1;
  428. *header_offset1 += 1;
  429. slice_header_addr[7] = 0x80;
  430. }
  431. /*
  432. * update slice header with P frame order
  433. * frame order is limited to 16 (coded on 4bits only)
  434. */
  435. slice_header_addr[5] += ((frame_order & 0x0C) >> 2);
  436. slice_header_addr[6] += ((frame_order & 0x03) << 6);
  437. }
  438. dev_dbg(dev,
  439. "%s %s slice header order %d idrPicId %d header size %d\n",
  440. pctx->name, __func__, frame_order, idr_pic_id, *header_size);
  441. return 0;
  442. }
  443. static int hva_h264_fill_data_nal(struct hva_ctx *pctx,
  444. unsigned int stuffing_bytes, u8 *addr,
  445. unsigned int stream_size, unsigned int *size)
  446. {
  447. struct device *dev = ctx_to_dev(pctx);
  448. const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
  449. dev_dbg(dev, "%s %s stuffing bytes %d\n", pctx->name, __func__,
  450. stuffing_bytes);
  451. if ((*size + stuffing_bytes + H264_FILLER_DATA_SIZE) > stream_size) {
  452. dev_dbg(dev, "%s %s too many stuffing bytes %d\n",
  453. pctx->name, __func__, stuffing_bytes);
  454. return 0;
  455. }
  456. /* start code */
  457. memcpy(addr + *size, start, sizeof(start));
  458. *size += sizeof(start);
  459. /* nal_unit_type */
  460. addr[*size] = NALU_TYPE_FILLER_DATA;
  461. *size += 1;
  462. memset(addr + *size, 0xff, stuffing_bytes);
  463. *size += stuffing_bytes;
  464. addr[*size] = 0x80;
  465. *size += 1;
  466. return 0;
  467. }
  468. static int hva_h264_fill_sei_nal(struct hva_ctx *pctx,
  469. enum hva_h264_sei_payload_type type,
  470. u8 *addr, u32 *size)
  471. {
  472. struct device *dev = ctx_to_dev(pctx);
  473. const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
  474. struct hva_h264_stereo_video_sei info;
  475. u8 offset = 7;
  476. u8 msg = 0;
  477. /* start code */
  478. memcpy(addr + *size, start, sizeof(start));
  479. *size += sizeof(start);
  480. /* nal_unit_type */
  481. addr[*size] = NALU_TYPE_SEI;
  482. *size += 1;
  483. /* payload type */
  484. addr[*size] = type;
  485. *size += 1;
  486. switch (type) {
  487. case SEI_STEREO_VIDEO_INFO:
  488. memset(&info, 0, sizeof(info));
  489. /* set to top/bottom frame packing arrangement */
  490. info.field_views_flag = 1;
  491. info.top_field_is_left_view_flag = 1;
  492. /* payload size */
  493. addr[*size] = 1;
  494. *size += 1;
  495. /* payload */
  496. msg = info.field_views_flag << offset--;
  497. if (info.field_views_flag) {
  498. msg |= info.top_field_is_left_view_flag <<
  499. offset--;
  500. } else {
  501. msg |= info.current_frame_is_left_view_flag <<
  502. offset--;
  503. msg |= info.next_frame_is_second_view_flag <<
  504. offset--;
  505. }
  506. msg |= info.left_view_self_contained_flag << offset--;
  507. msg |= info.right_view_self_contained_flag << offset--;
  508. addr[*size] = msg;
  509. *size += 1;
  510. addr[*size] = 0x80;
  511. *size += 1;
  512. return 0;
  513. case SEI_BUFFERING_PERIOD:
  514. case SEI_PICTURE_TIMING:
  515. case SEI_FRAME_PACKING_ARRANGEMENT:
  516. default:
  517. dev_err(dev, "%s sei nal type not supported %d\n",
  518. pctx->name, type);
  519. return -EINVAL;
  520. }
  521. }
  522. static int hva_h264_prepare_task(struct hva_ctx *pctx,
  523. struct hva_h264_task *task,
  524. struct hva_frame *frame,
  525. struct hva_stream *stream)
  526. {
  527. struct hva_dev *hva = ctx_to_hdev(pctx);
  528. struct device *dev = ctx_to_dev(pctx);
  529. struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
  530. struct hva_buffer *seq_info = ctx->seq_info;
  531. struct hva_buffer *fwd_ref_frame = ctx->ref_frame;
  532. struct hva_buffer *loc_rec_frame = ctx->rec_frame;
  533. struct hva_h264_td *td = &task->td;
  534. struct hva_controls *ctrls = &pctx->ctrls;
  535. struct v4l2_fract *time_per_frame = &pctx->ctrls.time_per_frame;
  536. int cavlc = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC;
  537. u32 frame_num = pctx->stream_num;
  538. u32 addr_esram = hva->esram_addr;
  539. enum v4l2_mpeg_video_h264_level level;
  540. dma_addr_t paddr = 0;
  541. u8 *slice_header_vaddr;
  542. u32 frame_width = frame->info.aligned_width;
  543. u32 frame_height = frame->info.aligned_height;
  544. u32 max_cpb_buffer_size;
  545. unsigned int payload = stream->bytesused;
  546. u32 max_bitrate;
  547. /* check width and height parameters */
  548. if ((frame_width > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H)) ||
  549. (frame_height > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H))) {
  550. dev_err(dev,
  551. "%s width(%d) or height(%d) exceeds limits (%dx%d)\n",
  552. pctx->name, frame_width, frame_height,
  553. H264_MAX_SIZE_W, H264_MAX_SIZE_H);
  554. pctx->frame_errors++;
  555. return -EINVAL;
  556. }
  557. level = ctrls->level;
  558. memset(td, 0, sizeof(struct hva_h264_td));
  559. td->frame_width = frame_width;
  560. td->frame_height = frame_height;
  561. /* set frame alignement */
  562. td->window_width = frame_width;
  563. td->window_height = frame_height;
  564. td->window_horizontal_offset = 0;
  565. td->window_vertical_offset = 0;
  566. td->first_picture_in_sequence = (!frame_num) ? 1 : 0;
  567. /* pic_order_cnt_type hard coded to '2' as only I & P frames */
  568. td->pic_order_cnt_type = 2;
  569. /* useConstrainedIntraFlag set to false for better coding efficiency */
  570. td->use_constrained_intra_flag = false;
  571. td->brc_type = (ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR)
  572. ? BRC_TYPE_CBR : BRC_TYPE_VBR;
  573. td->entropy_coding_mode = (ctrls->entropy_mode == cavlc) ? CAVLC :
  574. CABAC;
  575. td->bit_rate = ctrls->bitrate;
  576. /* set framerate, framerate = 1 n/ time per frame */
  577. if (time_per_frame->numerator >= 536) {
  578. /*
  579. * due to a hardware bug, framerate denominator can't exceed
  580. * 536 (BRC overflow). Compute nearest framerate
  581. */
  582. td->framerate_den = 1;
  583. td->framerate_num = (time_per_frame->denominator +
  584. (time_per_frame->numerator >> 1) - 1) /
  585. time_per_frame->numerator;
  586. /*
  587. * update bitrate to introduce a correction due to
  588. * the new framerate
  589. * new bitrate = (old bitrate * new framerate) / old framerate
  590. */
  591. td->bit_rate /= time_per_frame->numerator;
  592. td->bit_rate *= time_per_frame->denominator;
  593. td->bit_rate /= td->framerate_num;
  594. } else {
  595. td->framerate_den = time_per_frame->numerator;
  596. td->framerate_num = time_per_frame->denominator;
  597. }
  598. /* compute maximum bitrate depending on profile */
  599. if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
  600. max_bitrate = h264_infos_list[level].max_bitrate *
  601. H264_FACTOR_HIGH;
  602. else
  603. max_bitrate = h264_infos_list[level].max_bitrate *
  604. H264_FACTOR_BASELINE;
  605. /* check if bitrate doesn't exceed max size */
  606. if (td->bit_rate > max_bitrate) {
  607. dev_dbg(dev,
  608. "%s bitrate (%d) larger than level and profile allow, clip to %d\n",
  609. pctx->name, td->bit_rate, max_bitrate);
  610. td->bit_rate = max_bitrate;
  611. }
  612. /* convert cpb_buffer_size in bits */
  613. td->cpb_buffer_size = ctrls->cpb_size * 8000;
  614. /* compute maximum cpb buffer size depending on profile */
  615. if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
  616. max_cpb_buffer_size =
  617. h264_infos_list[level].max_cpb_size * H264_FACTOR_HIGH;
  618. else
  619. max_cpb_buffer_size =
  620. h264_infos_list[level].max_cpb_size * H264_FACTOR_BASELINE;
  621. /* check if cpb buffer size doesn't exceed max size */
  622. if (td->cpb_buffer_size > max_cpb_buffer_size) {
  623. dev_dbg(dev,
  624. "%s cpb size larger than level %d allows, clip to %d\n",
  625. pctx->name, td->cpb_buffer_size, max_cpb_buffer_size);
  626. td->cpb_buffer_size = max_cpb_buffer_size;
  627. }
  628. /* enable skipping in the Bitrate Controller */
  629. td->brc_no_skip = 0;
  630. /* initial delay */
  631. if ((ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) &&
  632. td->bit_rate)
  633. td->delay = 1000 * (td->cpb_buffer_size / td->bit_rate);
  634. else
  635. td->delay = 0;
  636. switch (frame->info.pixelformat) {
  637. case V4L2_PIX_FMT_NV12:
  638. td->sampling_mode = SAMPLING_MODE_NV12;
  639. break;
  640. case V4L2_PIX_FMT_NV21:
  641. td->sampling_mode = SAMPLING_MODE_NV21;
  642. break;
  643. default:
  644. dev_err(dev, "%s invalid source pixel format\n",
  645. pctx->name);
  646. pctx->frame_errors++;
  647. return -EINVAL;
  648. }
  649. /*
  650. * fill matrix color converter (RGB to YUV)
  651. * Y = 0,299 R + 0,587 G + 0,114 B
  652. * Cb = -0,1687 R -0,3313 G + 0,5 B + 128
  653. * Cr = 0,5 R - 0,4187 G - 0,0813 B + 128
  654. */
  655. td->rgb2_yuv_y_coeff = 0x12031008;
  656. td->rgb2_yuv_u_coeff = 0x800EF7FB;
  657. td->rgb2_yuv_v_coeff = 0x80FEF40E;
  658. /* enable/disable transform mode */
  659. td->transform_mode = ctrls->dct8x8;
  660. /* encoder complexity fix to 2, ENCODE_I_16x16_I_NxN_P_16x16_P_WxH */
  661. td->encoder_complexity = 2;
  662. /* quant fix to 28, default VBR value */
  663. td->quant = 28;
  664. if (td->framerate_den == 0) {
  665. dev_err(dev, "%s invalid framerate\n", pctx->name);
  666. pctx->frame_errors++;
  667. return -EINVAL;
  668. }
  669. /* if automatic framerate, deactivate bitrate controller */
  670. if (td->framerate_num == 0)
  671. td->brc_type = 0;
  672. /* compliancy fix to true */
  673. td->strict_hrd_compliancy = 1;
  674. /* set minimum & maximum quantizers */
  675. td->qp_min = clamp_val(ctrls->qpmin, 0, 51);
  676. td->qp_max = clamp_val(ctrls->qpmax, 0, 51);
  677. td->addr_source_buffer = frame->paddr;
  678. td->addr_fwd_ref_buffer = fwd_ref_frame->paddr;
  679. td->addr_rec_buffer = loc_rec_frame->paddr;
  680. td->addr_output_bitstream_end = (u32)stream->paddr + stream->size;
  681. td->addr_output_bitstream_start = (u32)stream->paddr;
  682. td->bitstream_offset = (((u32)stream->paddr & 0xF) << 3) &
  683. BITSTREAM_OFFSET_MASK;
  684. td->addr_param_out = (u32)ctx->task->paddr +
  685. offsetof(struct hva_h264_task, po);
  686. /* swap spatial and temporal context */
  687. if (frame_num % 2) {
  688. paddr = seq_info->paddr;
  689. td->addr_spatial_context = ALIGN(paddr, 0x100);
  690. paddr = seq_info->paddr + DATA_SIZE(frame_width,
  691. frame_height);
  692. td->addr_temporal_context = ALIGN(paddr, 0x100);
  693. } else {
  694. paddr = seq_info->paddr;
  695. td->addr_temporal_context = ALIGN(paddr, 0x100);
  696. paddr = seq_info->paddr + DATA_SIZE(frame_width,
  697. frame_height);
  698. td->addr_spatial_context = ALIGN(paddr, 0x100);
  699. }
  700. paddr = seq_info->paddr + 2 * DATA_SIZE(frame_width, frame_height);
  701. td->addr_brc_in_out_parameter = ALIGN(paddr, 0x100);
  702. paddr = td->addr_brc_in_out_parameter + BRC_DATA_SIZE;
  703. td->addr_slice_header = ALIGN(paddr, 0x100);
  704. td->addr_external_sw = ALIGN(addr_esram, 0x100);
  705. addr_esram += SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width);
  706. td->addr_local_rec_buffer = ALIGN(addr_esram, 0x100);
  707. addr_esram += LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width);
  708. td->addr_lctx = ALIGN(addr_esram, 0x100);
  709. addr_esram += CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height));
  710. td->addr_cabac_context_buffer = ALIGN(addr_esram, 0x100);
  711. if (!(frame_num % ctrls->gop_size)) {
  712. td->picture_coding_type = PICTURE_CODING_TYPE_I;
  713. stream->vbuf.flags |= V4L2_BUF_FLAG_KEYFRAME;
  714. } else {
  715. td->picture_coding_type = PICTURE_CODING_TYPE_P;
  716. stream->vbuf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
  717. }
  718. /* fill the slice header part */
  719. slice_header_vaddr = seq_info->vaddr + (td->addr_slice_header -
  720. seq_info->paddr);
  721. hva_h264_fill_slice_header(pctx, slice_header_vaddr, ctrls, frame_num,
  722. &td->slice_header_size_in_bits,
  723. &td->slice_header_offset0,
  724. &td->slice_header_offset1,
  725. &td->slice_header_offset2);
  726. td->chroma_qp_index_offset = 2;
  727. td->slice_synchro_enable = 0;
  728. td->max_slice_number = 1;
  729. /*
  730. * check the sps/pps header size for key frame only
  731. * sps/pps header was previously fill by libv4l
  732. * during qbuf of stream buffer
  733. */
  734. if ((stream->vbuf.flags == V4L2_BUF_FLAG_KEYFRAME) &&
  735. (payload > MAX_SPS_PPS_SIZE)) {
  736. dev_err(dev, "%s invalid sps/pps size %d\n", pctx->name,
  737. payload);
  738. pctx->frame_errors++;
  739. return -EINVAL;
  740. }
  741. if (stream->vbuf.flags != V4L2_BUF_FLAG_KEYFRAME)
  742. payload = 0;
  743. /* add SEI nal (video stereo info) */
  744. if (ctrls->sei_fp && hva_h264_fill_sei_nal(pctx, SEI_STEREO_VIDEO_INFO,
  745. (u8 *)stream->vaddr,
  746. &payload)) {
  747. dev_err(dev, "%s fail to get SEI nal\n", pctx->name);
  748. pctx->frame_errors++;
  749. return -EINVAL;
  750. }
  751. /* fill size of non-VCL NAL units (SPS, PPS, filler and SEI) */
  752. td->non_vcl_nalu_size = payload * 8;
  753. /* compute bitstream offset & new start address of bitstream */
  754. td->addr_output_bitstream_start += ((payload >> 4) << 4);
  755. td->bitstream_offset += (payload - ((payload >> 4) << 4)) * 8;
  756. stream->bytesused = payload;
  757. return 0;
  758. }
  759. static unsigned int hva_h264_get_stream_size(struct hva_h264_task *task)
  760. {
  761. struct hva_h264_po *po = &task->po;
  762. return po->bitstream_size;
  763. }
  764. static u32 hva_h264_get_stuffing_bytes(struct hva_h264_task *task)
  765. {
  766. struct hva_h264_po *po = &task->po;
  767. return po->stuffing_bits >> 3;
  768. }
  769. static int hva_h264_open(struct hva_ctx *pctx)
  770. {
  771. struct device *dev = ctx_to_dev(pctx);
  772. struct hva_h264_ctx *ctx;
  773. struct hva_dev *hva = ctx_to_hdev(pctx);
  774. u32 frame_width = pctx->frameinfo.aligned_width;
  775. u32 frame_height = pctx->frameinfo.aligned_height;
  776. u32 size;
  777. int ret;
  778. /* check esram size necessary to encode a frame */
  779. size = SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width) +
  780. LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width) +
  781. CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)) +
  782. CABAC_CONTEXT_BUFFER_MAX_SIZE(frame_width);
  783. if (hva->esram_size < size) {
  784. dev_err(dev, "%s not enough esram (max:%d request:%d)\n",
  785. pctx->name, hva->esram_size, size);
  786. ret = -EINVAL;
  787. goto err;
  788. }
  789. /* allocate context for codec */
  790. ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
  791. if (!ctx) {
  792. ret = -ENOMEM;
  793. goto err;
  794. }
  795. /* allocate sequence info buffer */
  796. ret = hva_mem_alloc(pctx,
  797. 2 * DATA_SIZE(frame_width, frame_height) +
  798. SLICE_HEADER_SIZE +
  799. BRC_DATA_SIZE,
  800. "hva sequence info",
  801. &ctx->seq_info);
  802. if (ret) {
  803. dev_err(dev,
  804. "%s failed to allocate sequence info buffer\n",
  805. pctx->name);
  806. goto err_ctx;
  807. }
  808. /* allocate reference frame buffer */
  809. ret = hva_mem_alloc(pctx,
  810. frame_width * frame_height * 3 / 2,
  811. "hva reference frame",
  812. &ctx->ref_frame);
  813. if (ret) {
  814. dev_err(dev, "%s failed to allocate reference frame buffer\n",
  815. pctx->name);
  816. goto err_seq_info;
  817. }
  818. /* allocate reconstructed frame buffer */
  819. ret = hva_mem_alloc(pctx,
  820. frame_width * frame_height * 3 / 2,
  821. "hva reconstructed frame",
  822. &ctx->rec_frame);
  823. if (ret) {
  824. dev_err(dev,
  825. "%s failed to allocate reconstructed frame buffer\n",
  826. pctx->name);
  827. goto err_ref_frame;
  828. }
  829. /* allocate task descriptor */
  830. ret = hva_mem_alloc(pctx,
  831. sizeof(struct hva_h264_task),
  832. "hva task descriptor",
  833. &ctx->task);
  834. if (ret) {
  835. dev_err(dev,
  836. "%s failed to allocate task descriptor\n",
  837. pctx->name);
  838. goto err_rec_frame;
  839. }
  840. pctx->priv = (void *)ctx;
  841. return 0;
  842. err_rec_frame:
  843. hva_mem_free(pctx, ctx->rec_frame);
  844. err_ref_frame:
  845. hva_mem_free(pctx, ctx->ref_frame);
  846. err_seq_info:
  847. hva_mem_free(pctx, ctx->seq_info);
  848. err_ctx:
  849. devm_kfree(dev, ctx);
  850. err:
  851. pctx->sys_errors++;
  852. return ret;
  853. }
  854. static int hva_h264_close(struct hva_ctx *pctx)
  855. {
  856. struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
  857. struct device *dev = ctx_to_dev(pctx);
  858. if (ctx->seq_info)
  859. hva_mem_free(pctx, ctx->seq_info);
  860. if (ctx->ref_frame)
  861. hva_mem_free(pctx, ctx->ref_frame);
  862. if (ctx->rec_frame)
  863. hva_mem_free(pctx, ctx->rec_frame);
  864. if (ctx->task)
  865. hva_mem_free(pctx, ctx->task);
  866. devm_kfree(dev, ctx);
  867. return 0;
  868. }
  869. static int hva_h264_encode(struct hva_ctx *pctx, struct hva_frame *frame,
  870. struct hva_stream *stream)
  871. {
  872. struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
  873. struct hva_h264_task *task = (struct hva_h264_task *)ctx->task->vaddr;
  874. struct hva_buffer *tmp_frame;
  875. u32 stuffing_bytes = 0;
  876. int ret = 0;
  877. ret = hva_h264_prepare_task(pctx, task, frame, stream);
  878. if (ret)
  879. goto err;
  880. ret = hva_hw_execute_task(pctx, H264_ENC, ctx->task);
  881. if (ret)
  882. goto err;
  883. pctx->stream_num++;
  884. stream->bytesused += hva_h264_get_stream_size(task);
  885. stuffing_bytes = hva_h264_get_stuffing_bytes(task);
  886. if (stuffing_bytes)
  887. hva_h264_fill_data_nal(pctx, stuffing_bytes,
  888. (u8 *)stream->vaddr,
  889. stream->size,
  890. &stream->bytesused);
  891. /* switch reference & reconstructed frame */
  892. tmp_frame = ctx->ref_frame;
  893. ctx->ref_frame = ctx->rec_frame;
  894. ctx->rec_frame = tmp_frame;
  895. return 0;
  896. err:
  897. stream->bytesused = 0;
  898. return ret;
  899. }
  900. const struct hva_enc nv12h264enc = {
  901. .name = "H264(NV12)",
  902. .pixelformat = V4L2_PIX_FMT_NV12,
  903. .streamformat = V4L2_PIX_FMT_H264,
  904. .max_width = H264_MAX_SIZE_W,
  905. .max_height = H264_MAX_SIZE_H,
  906. .open = hva_h264_open,
  907. .close = hva_h264_close,
  908. .encode = hva_h264_encode,
  909. };
  910. const struct hva_enc nv21h264enc = {
  911. .name = "H264(NV21)",
  912. .pixelformat = V4L2_PIX_FMT_NV21,
  913. .streamformat = V4L2_PIX_FMT_H264,
  914. .max_width = H264_MAX_SIZE_W,
  915. .max_height = H264_MAX_SIZE_H,
  916. .open = hva_h264_open,
  917. .close = hva_h264_close,
  918. .encode = hva_h264_encode,
  919. };