hva-h264.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) STMicroelectronics SA 2015
  4. * Authors: Yannick Fertre <yannick.fertre@st.com>
  5. * Hugues Fruchet <hugues.fruchet@st.com>
  6. */
  7. #include "hva.h"
  8. #include "hva-hw.h"
  9. #define MAX_SPS_PPS_SIZE 128
  10. #define BITSTREAM_OFFSET_MASK 0x7F
  11. /* video max size*/
  12. #define H264_MAX_SIZE_W 1920
  13. #define H264_MAX_SIZE_H 1920
  14. /* macroBlocs number (width & height) */
  15. #define MB_W(w) ((w + 0xF) / 0x10)
  16. #define MB_H(h) ((h + 0xF) / 0x10)
  17. /* formula to get temporal or spatial data size */
  18. #define DATA_SIZE(w, h) (MB_W(w) * MB_H(h) * 16)
  19. #define SEARCH_WINDOW_BUFFER_MAX_SIZE(w) ((4 * MB_W(w) + 42) * 256 * 3 / 2)
  20. #define CABAC_CONTEXT_BUFFER_MAX_SIZE(w) (MB_W(w) * 16)
  21. #define CTX_MB_BUFFER_MAX_SIZE(w) (MB_W(w) * 16 * 8)
  22. #define SLICE_HEADER_SIZE (4 * 16)
  23. #define BRC_DATA_SIZE (5 * 16)
  24. /* source buffer copy in YUV 420 MB-tiled format with size=16*256*3/2 */
  25. #define CURRENT_WINDOW_BUFFER_MAX_SIZE (16 * 256 * 3 / 2)
  26. /*
  27. * 4 lines of pixels (in Luma, Chroma blue and Chroma red) of top MB
  28. * for deblocking with size=4*16*MBx*2
  29. */
  30. #define LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(w) (4 * 16 * MB_W(w) * 2)
  31. /* factor for bitrate and cpb buffer size max values if profile >= high */
  32. #define H264_FACTOR_HIGH 1200
  33. /* factor for bitrate and cpb buffer size max values if profile < high */
  34. #define H264_FACTOR_BASELINE 1000
  35. /* number of bytes for NALU_TYPE_FILLER_DATA header and footer */
  36. #define H264_FILLER_DATA_SIZE 6
  37. struct h264_profile {
  38. enum v4l2_mpeg_video_h264_level level;
  39. u32 max_mb_per_seconds;
  40. u32 max_frame_size;
  41. u32 max_bitrate;
  42. u32 max_cpb_size;
  43. u32 min_comp_ratio;
  44. };
  45. static const struct h264_profile h264_infos_list[] = {
  46. {V4L2_MPEG_VIDEO_H264_LEVEL_1_0, 1485, 99, 64, 175, 2},
  47. {V4L2_MPEG_VIDEO_H264_LEVEL_1B, 1485, 99, 128, 350, 2},
  48. {V4L2_MPEG_VIDEO_H264_LEVEL_1_1, 3000, 396, 192, 500, 2},
  49. {V4L2_MPEG_VIDEO_H264_LEVEL_1_2, 6000, 396, 384, 1000, 2},
  50. {V4L2_MPEG_VIDEO_H264_LEVEL_1_3, 11880, 396, 768, 2000, 2},
  51. {V4L2_MPEG_VIDEO_H264_LEVEL_2_0, 11880, 396, 2000, 2000, 2},
  52. {V4L2_MPEG_VIDEO_H264_LEVEL_2_1, 19800, 792, 4000, 4000, 2},
  53. {V4L2_MPEG_VIDEO_H264_LEVEL_2_2, 20250, 1620, 4000, 4000, 2},
  54. {V4L2_MPEG_VIDEO_H264_LEVEL_3_0, 40500, 1620, 10000, 10000, 2},
  55. {V4L2_MPEG_VIDEO_H264_LEVEL_3_1, 108000, 3600, 14000, 14000, 4},
  56. {V4L2_MPEG_VIDEO_H264_LEVEL_3_2, 216000, 5120, 20000, 20000, 4},
  57. {V4L2_MPEG_VIDEO_H264_LEVEL_4_0, 245760, 8192, 20000, 25000, 4},
  58. {V4L2_MPEG_VIDEO_H264_LEVEL_4_1, 245760, 8192, 50000, 62500, 2},
  59. {V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 522240, 8704, 50000, 62500, 2},
  60. {V4L2_MPEG_VIDEO_H264_LEVEL_5_0, 589824, 22080, 135000, 135000, 2},
  61. {V4L2_MPEG_VIDEO_H264_LEVEL_5_1, 983040, 36864, 240000, 240000, 2}
  62. };
  63. enum hva_brc_type {
  64. BRC_TYPE_NONE = 0,
  65. BRC_TYPE_CBR = 1,
  66. BRC_TYPE_VBR = 2,
  67. BRC_TYPE_VBR_LOW_DELAY = 3
  68. };
  69. enum hva_entropy_coding_mode {
  70. CAVLC = 0,
  71. CABAC = 1
  72. };
  73. enum hva_picture_coding_type {
  74. PICTURE_CODING_TYPE_I = 0,
  75. PICTURE_CODING_TYPE_P = 1,
  76. PICTURE_CODING_TYPE_B = 2
  77. };
  78. enum hva_h264_sampling_mode {
  79. SAMPLING_MODE_NV12 = 0,
  80. SAMPLING_MODE_UYVY = 1,
  81. SAMPLING_MODE_RGB3 = 3,
  82. SAMPLING_MODE_XRGB4 = 4,
  83. SAMPLING_MODE_NV21 = 8,
  84. SAMPLING_MODE_VYUY = 9,
  85. SAMPLING_MODE_BGR3 = 11,
  86. SAMPLING_MODE_XBGR4 = 12,
  87. SAMPLING_MODE_RGBX4 = 20,
  88. SAMPLING_MODE_BGRX4 = 28
  89. };
  90. enum hva_h264_nalu_type {
  91. NALU_TYPE_UNKNOWN = 0,
  92. NALU_TYPE_SLICE = 1,
  93. NALU_TYPE_SLICE_DPA = 2,
  94. NALU_TYPE_SLICE_DPB = 3,
  95. NALU_TYPE_SLICE_DPC = 4,
  96. NALU_TYPE_SLICE_IDR = 5,
  97. NALU_TYPE_SEI = 6,
  98. NALU_TYPE_SPS = 7,
  99. NALU_TYPE_PPS = 8,
  100. NALU_TYPE_AU_DELIMITER = 9,
  101. NALU_TYPE_SEQ_END = 10,
  102. NALU_TYPE_STREAM_END = 11,
  103. NALU_TYPE_FILLER_DATA = 12,
  104. NALU_TYPE_SPS_EXT = 13,
  105. NALU_TYPE_PREFIX_UNIT = 14,
  106. NALU_TYPE_SUBSET_SPS = 15,
  107. NALU_TYPE_SLICE_AUX = 19,
  108. NALU_TYPE_SLICE_EXT = 20
  109. };
  110. enum hva_h264_sei_payload_type {
  111. SEI_BUFFERING_PERIOD = 0,
  112. SEI_PICTURE_TIMING = 1,
  113. SEI_STEREO_VIDEO_INFO = 21,
  114. SEI_FRAME_PACKING_ARRANGEMENT = 45
  115. };
  116. /*
  117. * stereo Video Info struct
  118. */
  119. struct hva_h264_stereo_video_sei {
  120. u8 field_views_flag;
  121. u8 top_field_is_left_view_flag;
  122. u8 current_frame_is_left_view_flag;
  123. u8 next_frame_is_second_view_flag;
  124. u8 left_view_self_contained_flag;
  125. u8 right_view_self_contained_flag;
  126. };
  127. /*
  128. * struct hva_h264_td
  129. *
  130. * @frame_width: width in pixels of the buffer containing the input frame
  131. * @frame_height: height in pixels of the buffer containing the input frame
  132. * @frame_num: the parameter to be written in the slice header
  133. * @picture_coding_type: type I, P or B
  134. * @pic_order_cnt_type: POC mode, as defined in H264 std : can be 0,1,2
  135. * @first_picture_in_sequence: flag telling to encoder that this is the
  136. * first picture in a video sequence.
  137. * Used for VBR
  138. * @slice_size_type: 0 = no constraint to close the slice
  139. * 1= a slice is closed as soon as the slice_mb_size limit
  140. * is reached
  141. * 2= a slice is closed as soon as the slice_byte_size limit
  142. * is reached
  143. * 3= a slice is closed as soon as either the slice_byte_size
  144. * limit or the slice_mb_size limit is reached
  145. * @slice_mb_size: defines the slice size in number of macroblocks
  146. * (used when slice_size_type=1 or slice_size_type=3)
  147. * @ir_param_option: defines the number of macroblocks per frame to be
  148. * refreshed by AIR algorithm OR the refresh period
  149. * by CIR algorithm
  150. * @intra_refresh_type: enables the adaptive intra refresh algorithm.
  151. * Disable=0 / Adaptative=1 and Cycle=2 as intra refresh
  152. * @use_constrained_intra_flag: constrained_intra_pred_flag from PPS
  153. * @transform_mode: controls the use of 4x4/8x8 transform mode
  154. * @disable_deblocking_filter_idc:
  155. * 0: specifies that all luma and chroma block edges of
  156. * the slice are filtered.
  157. * 1: specifies that deblocking is disabled for all block
  158. * edges of the slice.
  159. * 2: specifies that all luma and chroma block edges of
  160. * the slice are filtered with exception of the block edges
  161. * that coincide with slice boundaries
  162. * @slice_alpha_c0_offset_div2: to be written in slice header,
  163. * controls deblocking
  164. * @slice_beta_offset_div2: to be written in slice header,
  165. * controls deblocking
  166. * @encoder_complexity: encoder complexity control (IME).
  167. * 0 = I_16x16, P_16x16, Full ME Complexity
  168. * 1 = I_16x16, I_NxN, P_16x16, Full ME Complexity
  169. * 2 = I_16x16, I_NXN, P_16x16, P_WxH, Full ME Complexity
  170. * 4 = I_16x16, P_16x16, Reduced ME Complexity
  171. * 5 = I_16x16, I_NxN, P_16x16, Reduced ME Complexity
  172. * 6 = I_16x16, I_NXN, P_16x16, P_WxH, Reduced ME Complexity
  173. * @chroma_qp_index_offset: coming from picture parameter set
  174. * (PPS see [H.264 STD] 7.4.2.2)
  175. * @entropy_coding_mode: entropy coding mode.
  176. * 0 = CAVLC
  177. * 1 = CABAC
  178. * @brc_type: selects the bit-rate control algorithm
  179. * 0 = constant Qp, (no BRC)
  180. * 1 = CBR
  181. * 2 = VBR
  182. * @quant: Quantization param used in case of fix QP encoding (no BRC)
  183. * @non_VCL_NALU_Size: size of non-VCL NALUs (SPS, PPS, filler),
  184. * used by BRC
  185. * @cpb_buffer_size: size of Coded Picture Buffer, used by BRC
  186. * @bit_rate: target bitrate, for BRC
  187. * @qp_min: min QP threshold
  188. * @qp_max: max QP threshold
  189. * @framerate_num: target framerate numerator , used by BRC
  190. * @framerate_den: target framerate denomurator , used by BRC
  191. * @delay: End-to-End Initial Delay
  192. * @strict_HRD_compliancy: flag for HDR compliancy (1)
  193. * May impact quality encoding
  194. * @addr_source_buffer: address of input frame buffer for current frame
  195. * @addr_fwd_Ref_Buffer: address of reference frame buffer
  196. * @addr_rec_buffer: address of reconstructed frame buffer
  197. * @addr_output_bitstream_start: output bitstream start address
  198. * @addr_output_bitstream_end: output bitstream end address
  199. * @addr_external_sw : address of external search window
  200. * @addr_lctx : address of context picture buffer
  201. * @addr_local_rec_buffer: address of local reconstructed buffer
  202. * @addr_spatial_context: address of spatial context buffer
  203. * @bitstream_offset: offset in bits between aligned bitstream start
  204. * address and first bit to be written by HVA.
  205. * Range value is [0..63]
  206. * @sampling_mode: Input picture format .
  207. * 0: YUV420 semi_planar Interleaved
  208. * 1: YUV422 raster Interleaved
  209. * @addr_param_out: address of output parameters structure
  210. * @addr_scaling_matrix: address to the coefficient of
  211. * the inverse scaling matrix
  212. * @addr_scaling_matrix_dir: address to the coefficient of
  213. * the direct scaling matrix
  214. * @addr_cabac_context_buffer: address of cabac context buffer
  215. * @GmvX: Input information about the horizontal global displacement of
  216. * the encoded frame versus the previous one
  217. * @GmvY: Input information about the vertical global displacement of
  218. * the encoded frame versus the previous one
  219. * @window_width: width in pixels of the window to be encoded inside
  220. * the input frame
  221. * @window_height: width in pixels of the window to be encoded inside
  222. * the input frame
  223. * @window_horizontal_offset: horizontal offset in pels for input window
  224. * within input frame
  225. * @window_vertical_offset: vertical offset in pels for input window
  226. * within input frame
  227. * @addr_roi: Map of QP offset for the Region of Interest algorithm and
  228. * also used for Error map.
  229. * Bit 0-6 used for qp offset (value -64 to 63).
  230. * Bit 7 used to force intra
  231. * @addr_slice_header: address to slice header
  232. * @slice_header_size_in_bits: size in bits of the Slice header
  233. * @slice_header_offset0: Slice header offset where to insert
  234. * first_Mb_in_slice
  235. * @slice_header_offset1: Slice header offset where to insert
  236. * slice_qp_delta
  237. * @slice_header_offset2: Slice header offset where to insert
  238. * num_MBs_in_slice
  239. * @slice_synchro_enable: enable "slice ready" interrupt after each slice
  240. * @max_slice_number: Maximum number of slice in a frame
  241. * (0 is strictly forbidden)
  242. * @rgb2_yuv_y_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
  243. * YUV for the Y component.
  244. * Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
  245. * @rgb2_yuv_u_coeff: four coefficients (C0C1C2C3) to convert from RGB to
  246. * YUV for the Y component.
  247. * Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
  248. * @rgb2_yuv_v_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
  249. * YUV for the U (Cb) component.
  250. * U = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
  251. * @slice_byte_size: maximum slice size in bytes
  252. * (used when slice_size_type=2 or slice_size_type=3)
  253. * @max_air_intra_mb_nb: Maximum number of intra macroblock in a frame
  254. * for the AIR algorithm
  255. * @brc_no_skip: Disable skipping in the Bitrate Controller
  256. * @addr_brc_in_out_parameter: address of static buffer for BRC parameters
  257. */
  258. struct hva_h264_td {
  259. u16 frame_width;
  260. u16 frame_height;
  261. u32 frame_num;
  262. u16 picture_coding_type;
  263. u16 reserved1;
  264. u16 pic_order_cnt_type;
  265. u16 first_picture_in_sequence;
  266. u16 slice_size_type;
  267. u16 reserved2;
  268. u32 slice_mb_size;
  269. u16 ir_param_option;
  270. u16 intra_refresh_type;
  271. u16 use_constrained_intra_flag;
  272. u16 transform_mode;
  273. u16 disable_deblocking_filter_idc;
  274. s16 slice_alpha_c0_offset_div2;
  275. s16 slice_beta_offset_div2;
  276. u16 encoder_complexity;
  277. s16 chroma_qp_index_offset;
  278. u16 entropy_coding_mode;
  279. u16 brc_type;
  280. u16 quant;
  281. u32 non_vcl_nalu_size;
  282. u32 cpb_buffer_size;
  283. u32 bit_rate;
  284. u16 qp_min;
  285. u16 qp_max;
  286. u16 framerate_num;
  287. u16 framerate_den;
  288. u16 delay;
  289. u16 strict_hrd_compliancy;
  290. u32 addr_source_buffer;
  291. u32 addr_fwd_ref_buffer;
  292. u32 addr_rec_buffer;
  293. u32 addr_output_bitstream_start;
  294. u32 addr_output_bitstream_end;
  295. u32 addr_external_sw;
  296. u32 addr_lctx;
  297. u32 addr_local_rec_buffer;
  298. u32 addr_spatial_context;
  299. u16 bitstream_offset;
  300. u16 sampling_mode;
  301. u32 addr_param_out;
  302. u32 addr_scaling_matrix;
  303. u32 addr_scaling_matrix_dir;
  304. u32 addr_cabac_context_buffer;
  305. u32 reserved3;
  306. u32 reserved4;
  307. s16 gmv_x;
  308. s16 gmv_y;
  309. u16 window_width;
  310. u16 window_height;
  311. u16 window_horizontal_offset;
  312. u16 window_vertical_offset;
  313. u32 addr_roi;
  314. u32 addr_slice_header;
  315. u16 slice_header_size_in_bits;
  316. u16 slice_header_offset0;
  317. u16 slice_header_offset1;
  318. u16 slice_header_offset2;
  319. u32 reserved5;
  320. u32 reserved6;
  321. u16 reserved7;
  322. u16 reserved8;
  323. u16 slice_synchro_enable;
  324. u16 max_slice_number;
  325. u32 rgb2_yuv_y_coeff;
  326. u32 rgb2_yuv_u_coeff;
  327. u32 rgb2_yuv_v_coeff;
  328. u32 slice_byte_size;
  329. u16 max_air_intra_mb_nb;
  330. u16 brc_no_skip;
  331. u32 addr_temporal_context;
  332. u32 addr_brc_in_out_parameter;
  333. };
  334. /*
  335. * struct hva_h264_slice_po
  336. *
  337. * @ slice_size: slice size
  338. * @ slice_start_time: start time
  339. * @ slice_stop_time: stop time
  340. * @ slice_num: slice number
  341. */
  342. struct hva_h264_slice_po {
  343. u32 slice_size;
  344. u32 slice_start_time;
  345. u32 slice_end_time;
  346. u32 slice_num;
  347. };
  348. /*
  349. * struct hva_h264_po
  350. *
  351. * @ bitstream_size: bitstream size
  352. * @ dct_bitstream_size: dtc bitstream size
  353. * @ stuffing_bits: number of stuffing bits inserted by the encoder
  354. * @ removal_time: removal time of current frame (nb of ticks 1/framerate)
  355. * @ hvc_start_time: hvc start time
  356. * @ hvc_stop_time: hvc stop time
  357. * @ slice_count: slice count
  358. */
  359. struct hva_h264_po {
  360. u32 bitstream_size;
  361. u32 dct_bitstream_size;
  362. u32 stuffing_bits;
  363. u32 removal_time;
  364. u32 hvc_start_time;
  365. u32 hvc_stop_time;
  366. u32 slice_count;
  367. u32 reserved0;
  368. struct hva_h264_slice_po slice_params[16];
  369. };
  370. struct hva_h264_task {
  371. struct hva_h264_td td;
  372. struct hva_h264_po po;
  373. };
  374. /*
  375. * struct hva_h264_ctx
  376. *
  377. * @seq_info: sequence information buffer
  378. * @ref_frame: reference frame buffer
  379. * @rec_frame: reconstructed frame buffer
  380. * @task: task descriptor
  381. */
  382. struct hva_h264_ctx {
  383. struct hva_buffer *seq_info;
  384. struct hva_buffer *ref_frame;
  385. struct hva_buffer *rec_frame;
  386. struct hva_buffer *task;
  387. };
  388. static int hva_h264_fill_slice_header(struct hva_ctx *pctx,
  389. u8 *slice_header_addr,
  390. struct hva_controls *ctrls,
  391. int frame_num,
  392. u16 *header_size,
  393. u16 *header_offset0,
  394. u16 *header_offset1,
  395. u16 *header_offset2)
  396. {
  397. /*
  398. * with this HVA hardware version, part of the slice header is computed
  399. * on host and part by hardware.
  400. * The part of host is precomputed and available through this array.
  401. */
  402. struct device *dev = ctx_to_dev(pctx);
  403. int cabac = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC;
  404. const unsigned char slice_header[] = { 0x00, 0x00, 0x00, 0x01,
  405. 0x41, 0x34, 0x07, 0x00};
  406. int idr_pic_id = frame_num % 2;
  407. enum hva_picture_coding_type type;
  408. u32 frame_order = frame_num % ctrls->gop_size;
  409. if (!(frame_num % ctrls->gop_size))
  410. type = PICTURE_CODING_TYPE_I;
  411. else
  412. type = PICTURE_CODING_TYPE_P;
  413. memcpy(slice_header_addr, slice_header, sizeof(slice_header));
  414. *header_size = 56;
  415. *header_offset0 = 40;
  416. *header_offset1 = 13;
  417. *header_offset2 = 0;
  418. if (type == PICTURE_CODING_TYPE_I) {
  419. slice_header_addr[4] = 0x65;
  420. slice_header_addr[5] = 0x11;
  421. /* toggle the I frame */
  422. if ((frame_num / ctrls->gop_size) % 2) {
  423. *header_size += 4;
  424. *header_offset1 += 4;
  425. slice_header_addr[6] = 0x04;
  426. slice_header_addr[7] = 0x70;
  427. } else {
  428. *header_size += 2;
  429. *header_offset1 += 2;
  430. slice_header_addr[6] = 0x09;
  431. slice_header_addr[7] = 0xC0;
  432. }
  433. } else {
  434. if (ctrls->entropy_mode == cabac) {
  435. *header_size += 1;
  436. *header_offset1 += 1;
  437. slice_header_addr[7] = 0x80;
  438. }
  439. /*
  440. * update slice header with P frame order
  441. * frame order is limited to 16 (coded on 4bits only)
  442. */
  443. slice_header_addr[5] += ((frame_order & 0x0C) >> 2);
  444. slice_header_addr[6] += ((frame_order & 0x03) << 6);
  445. }
  446. dev_dbg(dev,
  447. "%s %s slice header order %d idrPicId %d header size %d\n",
  448. pctx->name, __func__, frame_order, idr_pic_id, *header_size);
  449. return 0;
  450. }
  451. static int hva_h264_fill_data_nal(struct hva_ctx *pctx,
  452. unsigned int stuffing_bytes, u8 *addr,
  453. unsigned int stream_size, unsigned int *size)
  454. {
  455. struct device *dev = ctx_to_dev(pctx);
  456. const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
  457. dev_dbg(dev, "%s %s stuffing bytes %d\n", pctx->name, __func__,
  458. stuffing_bytes);
  459. if ((*size + stuffing_bytes + H264_FILLER_DATA_SIZE) > stream_size) {
  460. dev_dbg(dev, "%s %s too many stuffing bytes %d\n",
  461. pctx->name, __func__, stuffing_bytes);
  462. return 0;
  463. }
  464. /* start code */
  465. memcpy(addr + *size, start, sizeof(start));
  466. *size += sizeof(start);
  467. /* nal_unit_type */
  468. addr[*size] = NALU_TYPE_FILLER_DATA;
  469. *size += 1;
  470. memset(addr + *size, 0xff, stuffing_bytes);
  471. *size += stuffing_bytes;
  472. addr[*size] = 0x80;
  473. *size += 1;
  474. return 0;
  475. }
  476. static int hva_h264_fill_sei_nal(struct hva_ctx *pctx,
  477. enum hva_h264_sei_payload_type type,
  478. u8 *addr, u32 *size)
  479. {
  480. struct device *dev = ctx_to_dev(pctx);
  481. const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
  482. struct hva_h264_stereo_video_sei info;
  483. u8 offset = 7;
  484. u8 msg = 0;
  485. /* start code */
  486. memcpy(addr + *size, start, sizeof(start));
  487. *size += sizeof(start);
  488. /* nal_unit_type */
  489. addr[*size] = NALU_TYPE_SEI;
  490. *size += 1;
  491. /* payload type */
  492. addr[*size] = type;
  493. *size += 1;
  494. switch (type) {
  495. case SEI_STEREO_VIDEO_INFO:
  496. memset(&info, 0, sizeof(info));
  497. /* set to top/bottom frame packing arrangement */
  498. info.field_views_flag = 1;
  499. info.top_field_is_left_view_flag = 1;
  500. /* payload size */
  501. addr[*size] = 1;
  502. *size += 1;
  503. /* payload */
  504. msg = info.field_views_flag << offset--;
  505. if (info.field_views_flag) {
  506. msg |= info.top_field_is_left_view_flag <<
  507. offset--;
  508. } else {
  509. msg |= info.current_frame_is_left_view_flag <<
  510. offset--;
  511. msg |= info.next_frame_is_second_view_flag <<
  512. offset--;
  513. }
  514. msg |= info.left_view_self_contained_flag << offset--;
  515. msg |= info.right_view_self_contained_flag << offset--;
  516. addr[*size] = msg;
  517. *size += 1;
  518. addr[*size] = 0x80;
  519. *size += 1;
  520. return 0;
  521. case SEI_BUFFERING_PERIOD:
  522. case SEI_PICTURE_TIMING:
  523. case SEI_FRAME_PACKING_ARRANGEMENT:
  524. default:
  525. dev_err(dev, "%s sei nal type not supported %d\n",
  526. pctx->name, type);
  527. return -EINVAL;
  528. }
  529. }
  530. static int hva_h264_prepare_task(struct hva_ctx *pctx,
  531. struct hva_h264_task *task,
  532. struct hva_frame *frame,
  533. struct hva_stream *stream)
  534. {
  535. struct hva_dev *hva = ctx_to_hdev(pctx);
  536. struct device *dev = ctx_to_dev(pctx);
  537. struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
  538. struct hva_buffer *seq_info = ctx->seq_info;
  539. struct hva_buffer *fwd_ref_frame = ctx->ref_frame;
  540. struct hva_buffer *loc_rec_frame = ctx->rec_frame;
  541. struct hva_h264_td *td = &task->td;
  542. struct hva_controls *ctrls = &pctx->ctrls;
  543. struct v4l2_fract *time_per_frame = &pctx->ctrls.time_per_frame;
  544. int cavlc = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC;
  545. u32 frame_num = pctx->stream_num;
  546. u32 addr_esram = hva->esram_addr;
  547. enum v4l2_mpeg_video_h264_level level;
  548. dma_addr_t paddr = 0;
  549. u8 *slice_header_vaddr;
  550. u32 frame_width = frame->info.aligned_width;
  551. u32 frame_height = frame->info.aligned_height;
  552. u32 max_cpb_buffer_size;
  553. unsigned int payload = stream->bytesused;
  554. u32 max_bitrate;
  555. /* check width and height parameters */
  556. if ((frame_width > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H)) ||
  557. (frame_height > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H))) {
  558. dev_err(dev,
  559. "%s width(%d) or height(%d) exceeds limits (%dx%d)\n",
  560. pctx->name, frame_width, frame_height,
  561. H264_MAX_SIZE_W, H264_MAX_SIZE_H);
  562. pctx->frame_errors++;
  563. return -EINVAL;
  564. }
  565. level = ctrls->level;
  566. memset(td, 0, sizeof(struct hva_h264_td));
  567. td->frame_width = frame_width;
  568. td->frame_height = frame_height;
  569. /* set frame alignement */
  570. td->window_width = frame_width;
  571. td->window_height = frame_height;
  572. td->window_horizontal_offset = 0;
  573. td->window_vertical_offset = 0;
  574. td->first_picture_in_sequence = (!frame_num) ? 1 : 0;
  575. /* pic_order_cnt_type hard coded to '2' as only I & P frames */
  576. td->pic_order_cnt_type = 2;
  577. /* useConstrainedIntraFlag set to false for better coding efficiency */
  578. td->use_constrained_intra_flag = false;
  579. td->brc_type = (ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR)
  580. ? BRC_TYPE_CBR : BRC_TYPE_VBR;
  581. td->entropy_coding_mode = (ctrls->entropy_mode == cavlc) ? CAVLC :
  582. CABAC;
  583. td->bit_rate = ctrls->bitrate;
  584. /* set framerate, framerate = 1 n/ time per frame */
  585. if (time_per_frame->numerator >= 536) {
  586. /*
  587. * due to a hardware bug, framerate denominator can't exceed
  588. * 536 (BRC overflow). Compute nearest framerate
  589. */
  590. td->framerate_den = 1;
  591. td->framerate_num = (time_per_frame->denominator +
  592. (time_per_frame->numerator >> 1) - 1) /
  593. time_per_frame->numerator;
  594. /*
  595. * update bitrate to introduce a correction due to
  596. * the new framerate
  597. * new bitrate = (old bitrate * new framerate) / old framerate
  598. */
  599. td->bit_rate /= time_per_frame->numerator;
  600. td->bit_rate *= time_per_frame->denominator;
  601. td->bit_rate /= td->framerate_num;
  602. } else {
  603. td->framerate_den = time_per_frame->numerator;
  604. td->framerate_num = time_per_frame->denominator;
  605. }
  606. /* compute maximum bitrate depending on profile */
  607. if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
  608. max_bitrate = h264_infos_list[level].max_bitrate *
  609. H264_FACTOR_HIGH;
  610. else
  611. max_bitrate = h264_infos_list[level].max_bitrate *
  612. H264_FACTOR_BASELINE;
  613. /* check if bitrate doesn't exceed max size */
  614. if (td->bit_rate > max_bitrate) {
  615. dev_dbg(dev,
  616. "%s bitrate (%d) larger than level and profile allow, clip to %d\n",
  617. pctx->name, td->bit_rate, max_bitrate);
  618. td->bit_rate = max_bitrate;
  619. }
  620. /* convert cpb_buffer_size in bits */
  621. td->cpb_buffer_size = ctrls->cpb_size * 8000;
  622. /* compute maximum cpb buffer size depending on profile */
  623. if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
  624. max_cpb_buffer_size =
  625. h264_infos_list[level].max_cpb_size * H264_FACTOR_HIGH;
  626. else
  627. max_cpb_buffer_size =
  628. h264_infos_list[level].max_cpb_size * H264_FACTOR_BASELINE;
  629. /* check if cpb buffer size doesn't exceed max size */
  630. if (td->cpb_buffer_size > max_cpb_buffer_size) {
  631. dev_dbg(dev,
  632. "%s cpb size larger than level %d allows, clip to %d\n",
  633. pctx->name, td->cpb_buffer_size, max_cpb_buffer_size);
  634. td->cpb_buffer_size = max_cpb_buffer_size;
  635. }
  636. /* enable skipping in the Bitrate Controller */
  637. td->brc_no_skip = 0;
  638. /* initial delay */
  639. if ((ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) &&
  640. td->bit_rate)
  641. td->delay = 1000 * (td->cpb_buffer_size / td->bit_rate);
  642. else
  643. td->delay = 0;
  644. switch (frame->info.pixelformat) {
  645. case V4L2_PIX_FMT_NV12:
  646. td->sampling_mode = SAMPLING_MODE_NV12;
  647. break;
  648. case V4L2_PIX_FMT_NV21:
  649. td->sampling_mode = SAMPLING_MODE_NV21;
  650. break;
  651. default:
  652. dev_err(dev, "%s invalid source pixel format\n",
  653. pctx->name);
  654. pctx->frame_errors++;
  655. return -EINVAL;
  656. }
  657. /*
  658. * fill matrix color converter (RGB to YUV)
  659. * Y = 0,299 R + 0,587 G + 0,114 B
  660. * Cb = -0,1687 R -0,3313 G + 0,5 B + 128
  661. * Cr = 0,5 R - 0,4187 G - 0,0813 B + 128
  662. */
  663. td->rgb2_yuv_y_coeff = 0x12031008;
  664. td->rgb2_yuv_u_coeff = 0x800EF7FB;
  665. td->rgb2_yuv_v_coeff = 0x80FEF40E;
  666. /* enable/disable transform mode */
  667. td->transform_mode = ctrls->dct8x8;
  668. /* encoder complexity fix to 2, ENCODE_I_16x16_I_NxN_P_16x16_P_WxH */
  669. td->encoder_complexity = 2;
  670. /* quant fix to 28, default VBR value */
  671. td->quant = 28;
  672. if (td->framerate_den == 0) {
  673. dev_err(dev, "%s invalid framerate\n", pctx->name);
  674. pctx->frame_errors++;
  675. return -EINVAL;
  676. }
  677. /* if automatic framerate, deactivate bitrate controller */
  678. if (td->framerate_num == 0)
  679. td->brc_type = 0;
  680. /* compliancy fix to true */
  681. td->strict_hrd_compliancy = 1;
  682. /* set minimum & maximum quantizers */
  683. td->qp_min = clamp_val(ctrls->qpmin, 0, 51);
  684. td->qp_max = clamp_val(ctrls->qpmax, 0, 51);
  685. td->addr_source_buffer = frame->paddr;
  686. td->addr_fwd_ref_buffer = fwd_ref_frame->paddr;
  687. td->addr_rec_buffer = loc_rec_frame->paddr;
  688. td->addr_output_bitstream_end = (u32)stream->paddr + stream->size;
  689. td->addr_output_bitstream_start = (u32)stream->paddr;
  690. td->bitstream_offset = (((u32)stream->paddr & 0xF) << 3) &
  691. BITSTREAM_OFFSET_MASK;
  692. td->addr_param_out = (u32)ctx->task->paddr +
  693. offsetof(struct hva_h264_task, po);
  694. /* swap spatial and temporal context */
  695. if (frame_num % 2) {
  696. paddr = seq_info->paddr;
  697. td->addr_spatial_context = ALIGN(paddr, 0x100);
  698. paddr = seq_info->paddr + DATA_SIZE(frame_width,
  699. frame_height);
  700. td->addr_temporal_context = ALIGN(paddr, 0x100);
  701. } else {
  702. paddr = seq_info->paddr;
  703. td->addr_temporal_context = ALIGN(paddr, 0x100);
  704. paddr = seq_info->paddr + DATA_SIZE(frame_width,
  705. frame_height);
  706. td->addr_spatial_context = ALIGN(paddr, 0x100);
  707. }
  708. paddr = seq_info->paddr + 2 * DATA_SIZE(frame_width, frame_height);
  709. td->addr_brc_in_out_parameter = ALIGN(paddr, 0x100);
  710. paddr = td->addr_brc_in_out_parameter + BRC_DATA_SIZE;
  711. td->addr_slice_header = ALIGN(paddr, 0x100);
  712. td->addr_external_sw = ALIGN(addr_esram, 0x100);
  713. addr_esram += SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width);
  714. td->addr_local_rec_buffer = ALIGN(addr_esram, 0x100);
  715. addr_esram += LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width);
  716. td->addr_lctx = ALIGN(addr_esram, 0x100);
  717. addr_esram += CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height));
  718. td->addr_cabac_context_buffer = ALIGN(addr_esram, 0x100);
  719. if (!(frame_num % ctrls->gop_size)) {
  720. td->picture_coding_type = PICTURE_CODING_TYPE_I;
  721. stream->vbuf.flags |= V4L2_BUF_FLAG_KEYFRAME;
  722. } else {
  723. td->picture_coding_type = PICTURE_CODING_TYPE_P;
  724. stream->vbuf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
  725. }
  726. /* fill the slice header part */
  727. slice_header_vaddr = seq_info->vaddr + (td->addr_slice_header -
  728. seq_info->paddr);
  729. hva_h264_fill_slice_header(pctx, slice_header_vaddr, ctrls, frame_num,
  730. &td->slice_header_size_in_bits,
  731. &td->slice_header_offset0,
  732. &td->slice_header_offset1,
  733. &td->slice_header_offset2);
  734. td->chroma_qp_index_offset = 2;
  735. td->slice_synchro_enable = 0;
  736. td->max_slice_number = 1;
  737. /*
  738. * check the sps/pps header size for key frame only
  739. * sps/pps header was previously fill by libv4l
  740. * during qbuf of stream buffer
  741. */
  742. if ((stream->vbuf.flags == V4L2_BUF_FLAG_KEYFRAME) &&
  743. (payload > MAX_SPS_PPS_SIZE)) {
  744. dev_err(dev, "%s invalid sps/pps size %d\n", pctx->name,
  745. payload);
  746. pctx->frame_errors++;
  747. return -EINVAL;
  748. }
  749. if (stream->vbuf.flags != V4L2_BUF_FLAG_KEYFRAME)
  750. payload = 0;
  751. /* add SEI nal (video stereo info) */
  752. if (ctrls->sei_fp && hva_h264_fill_sei_nal(pctx, SEI_STEREO_VIDEO_INFO,
  753. (u8 *)stream->vaddr,
  754. &payload)) {
  755. dev_err(dev, "%s fail to get SEI nal\n", pctx->name);
  756. pctx->frame_errors++;
  757. return -EINVAL;
  758. }
  759. /* fill size of non-VCL NAL units (SPS, PPS, filler and SEI) */
  760. td->non_vcl_nalu_size = payload * 8;
  761. /* compute bitstream offset & new start address of bitstream */
  762. td->addr_output_bitstream_start += ((payload >> 4) << 4);
  763. td->bitstream_offset += (payload - ((payload >> 4) << 4)) * 8;
  764. stream->bytesused = payload;
  765. return 0;
  766. }
  767. static unsigned int hva_h264_get_stream_size(struct hva_h264_task *task)
  768. {
  769. struct hva_h264_po *po = &task->po;
  770. return po->bitstream_size;
  771. }
  772. static u32 hva_h264_get_stuffing_bytes(struct hva_h264_task *task)
  773. {
  774. struct hva_h264_po *po = &task->po;
  775. return po->stuffing_bits >> 3;
  776. }
  777. static int hva_h264_open(struct hva_ctx *pctx)
  778. {
  779. struct device *dev = ctx_to_dev(pctx);
  780. struct hva_h264_ctx *ctx;
  781. struct hva_dev *hva = ctx_to_hdev(pctx);
  782. u32 frame_width = pctx->frameinfo.aligned_width;
  783. u32 frame_height = pctx->frameinfo.aligned_height;
  784. u32 size;
  785. int ret;
  786. /* check esram size necessary to encode a frame */
  787. size = SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width) +
  788. LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width) +
  789. CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)) +
  790. CABAC_CONTEXT_BUFFER_MAX_SIZE(frame_width);
  791. if (hva->esram_size < size) {
  792. dev_err(dev, "%s not enough esram (max:%d request:%d)\n",
  793. pctx->name, hva->esram_size, size);
  794. ret = -EINVAL;
  795. goto err;
  796. }
  797. /* allocate context for codec */
  798. ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
  799. if (!ctx) {
  800. ret = -ENOMEM;
  801. goto err;
  802. }
  803. /* allocate sequence info buffer */
  804. ret = hva_mem_alloc(pctx,
  805. 2 * DATA_SIZE(frame_width, frame_height) +
  806. SLICE_HEADER_SIZE +
  807. BRC_DATA_SIZE,
  808. "hva sequence info",
  809. &ctx->seq_info);
  810. if (ret) {
  811. dev_err(dev,
  812. "%s failed to allocate sequence info buffer\n",
  813. pctx->name);
  814. goto err_ctx;
  815. }
  816. /* allocate reference frame buffer */
  817. ret = hva_mem_alloc(pctx,
  818. frame_width * frame_height * 3 / 2,
  819. "hva reference frame",
  820. &ctx->ref_frame);
  821. if (ret) {
  822. dev_err(dev, "%s failed to allocate reference frame buffer\n",
  823. pctx->name);
  824. goto err_seq_info;
  825. }
  826. /* allocate reconstructed frame buffer */
  827. ret = hva_mem_alloc(pctx,
  828. frame_width * frame_height * 3 / 2,
  829. "hva reconstructed frame",
  830. &ctx->rec_frame);
  831. if (ret) {
  832. dev_err(dev,
  833. "%s failed to allocate reconstructed frame buffer\n",
  834. pctx->name);
  835. goto err_ref_frame;
  836. }
  837. /* allocate task descriptor */
  838. ret = hva_mem_alloc(pctx,
  839. sizeof(struct hva_h264_task),
  840. "hva task descriptor",
  841. &ctx->task);
  842. if (ret) {
  843. dev_err(dev,
  844. "%s failed to allocate task descriptor\n",
  845. pctx->name);
  846. goto err_rec_frame;
  847. }
  848. pctx->priv = (void *)ctx;
  849. return 0;
  850. err_rec_frame:
  851. hva_mem_free(pctx, ctx->rec_frame);
  852. err_ref_frame:
  853. hva_mem_free(pctx, ctx->ref_frame);
  854. err_seq_info:
  855. hva_mem_free(pctx, ctx->seq_info);
  856. err_ctx:
  857. devm_kfree(dev, ctx);
  858. err:
  859. pctx->sys_errors++;
  860. return ret;
  861. }
  862. static int hva_h264_close(struct hva_ctx *pctx)
  863. {
  864. struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
  865. struct device *dev = ctx_to_dev(pctx);
  866. if (ctx->seq_info)
  867. hva_mem_free(pctx, ctx->seq_info);
  868. if (ctx->ref_frame)
  869. hva_mem_free(pctx, ctx->ref_frame);
  870. if (ctx->rec_frame)
  871. hva_mem_free(pctx, ctx->rec_frame);
  872. if (ctx->task)
  873. hva_mem_free(pctx, ctx->task);
  874. devm_kfree(dev, ctx);
  875. return 0;
  876. }
  877. static int hva_h264_encode(struct hva_ctx *pctx, struct hva_frame *frame,
  878. struct hva_stream *stream)
  879. {
  880. struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
  881. struct hva_h264_task *task = (struct hva_h264_task *)ctx->task->vaddr;
  882. u32 stuffing_bytes = 0;
  883. int ret = 0;
  884. ret = hva_h264_prepare_task(pctx, task, frame, stream);
  885. if (ret)
  886. goto err;
  887. ret = hva_hw_execute_task(pctx, H264_ENC, ctx->task);
  888. if (ret)
  889. goto err;
  890. pctx->stream_num++;
  891. stream->bytesused += hva_h264_get_stream_size(task);
  892. stuffing_bytes = hva_h264_get_stuffing_bytes(task);
  893. if (stuffing_bytes)
  894. hva_h264_fill_data_nal(pctx, stuffing_bytes,
  895. (u8 *)stream->vaddr,
  896. stream->size,
  897. &stream->bytesused);
  898. /* switch reference & reconstructed frame */
  899. swap(ctx->ref_frame, ctx->rec_frame);
  900. return 0;
  901. err:
  902. stream->bytesused = 0;
  903. return ret;
  904. }
  905. const struct hva_enc nv12h264enc = {
  906. .name = "H264(NV12)",
  907. .pixelformat = V4L2_PIX_FMT_NV12,
  908. .streamformat = V4L2_PIX_FMT_H264,
  909. .max_width = H264_MAX_SIZE_W,
  910. .max_height = H264_MAX_SIZE_H,
  911. .open = hva_h264_open,
  912. .close = hva_h264_close,
  913. .encode = hva_h264_encode,
  914. };
  915. const struct hva_enc nv21h264enc = {
  916. .name = "H264(NV21)",
  917. .pixelformat = V4L2_PIX_FMT_NV21,
  918. .streamformat = V4L2_PIX_FMT_H264,
  919. .max_width = H264_MAX_SIZE_W,
  920. .max_height = H264_MAX_SIZE_H,
  921. .open = hva_h264_open,
  922. .close = hva_h264_close,
  923. .encode = hva_h264_encode,
  924. };