| /* |
| * Samsung Exynos SoC series VPU driver |
| * |
| * Copyright (c) 2016 Samsung Electronics Co., Ltd |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License version 2 as |
| * published by the Free Software Foundation. |
| */ |
| |
| #include <linux/bitmap.h> |
| |
| #include "lib/vpul-def.h" |
| #include "lib/vpul-errno.h" |
| #include "lib/vpul-gen.h" |
| #include "lib/vpul-ds.h" |
| #include "lib/vpul-hwmapper.h" |
| #include "vpul-latency-balancing.h" |
| #include "lib/vpul-translator.h" |
| #include "lib/vpul-pu.h" |
| #include "vpu-hardware.h" |
| #include "lib/vpul-hw-v2.1.h" |
| #include "lib/vpu-fwif-hw-gen.h" |
| |
| struct port_index_2_pu { |
| struct vpul_pu *pu; |
| __u32 port_index_in_pu; |
| }; |
| |
| /** |
| * The following structure is used for returning the results of calculation |
| * of number of MPRBs needed. |
| * in addition to number of large MPRBs and small MPRBs, it also returns an |
| * array describing the PU RAM ports to use for MPRB connections; this is |
| * useful for those exception cases to the rule of using consecutive ports, |
| * as for fast disparity, upscaler and inpaint. |
| * This structure is initialized by calling function to default values : |
| * - num_lrg_mprbs and num_sm_mprbs both initialized to 0 |
| * - ram_ports_array = consecutive_mem_ports[] |
| * the functions calculating number of MPRBs needed and RAM ports to use need |
| * to write in struct result_of_calc_num_mprbs_needed only the data which is |
| * different from default values; for example, if no small MPRBs are needed, no |
| * need to write 0 to num_sm_mprbs. |
| */ |
| struct result_of_calc_num_mprbs_needed { |
| __u32 num_lrg_mprbs; |
| __u32 num_sm_mprbs; |
| const __u32 *ram_ports_array; |
| }; |
| |
| typedef void (*__calc_nbr_of_mprbs_needed)(const struct vpul_pu *pu, |
| __u32 width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result); |
| |
| #include "lib/mem_inter_connect.def" |
| |
| static const struct pu_ram_port_range pu_inst_2_ram_port[VPU_PU_NUMBER] = { |
| #define VPU_PU_INSTANCE(a, b, c, d, e, f, g, h, i) {f, g}, |
| #include "lib/vpul_pu_instances.def" |
| #undef VPU_PU_INSTANCE |
| }; |
| |
| const enum VPU_PU_TYPES pu_inst2type[VPU_PU_NUMBER + 1] = { |
| #define VPU_PU_INSTANCE(a, b, c, d, e, f, g, h,i) b, |
| #include "lib/vpul_pu_instances.def" |
| /* indicates "end of array" */ |
| END_OF_PU_INST2TYPE_TRANSLATOR |
| #undef VPU_PU_INSTANCE |
| }; |
| |
| /** |
| * the following arrays specify patterns of PU RAM ports to be used for |
| * MPRB connections |
| */ |
| const __u32 upscaler_2_mem_ports[] = {0, 2}; |
| |
| const __u32 inpaint_4_mem_ports[] = {0, 2, 4, 6}; |
| |
| const __u32 fast_disp__mem_ports_width_511_or_less[] = {0, 1, 2, 3, 4, 5, 6, |
| 7, 8, 9, 10, 11, 12, |
| 20, 22, 24, 26, 28, |
| 30, 32, 34, 37, 40}; |
| |
| const __u32 fast_disp__mem_ports_width_512_to_1023[] = {0, 1, 2, 3, 4, 5, 6, |
| 7, 8, 9, 10, 11, 12, |
| 20, 21, 22, 23, 24, |
| 25, 26, 27, 28, 29, |
| 30, 31, 32, 33, 34, |
| 35, 37, 38, 40, 41}; |
| |
| const __u32 fast_disp__mem_ports_width_1024_or_more[] = {0, 1, 2, 3, 4, 5, 6, |
| 7, 8, 9, 10, 11, 12, |
| 13, 14, 15, 16, 17, 18, |
| 19, 34, 35, 36, 37, 38, |
| 39, 40, 41, 42}; |
| |
| const __u32 integr_img_mem_ports_width_up_to_1024[] = {0, 4, 5, 6, 7, 8, 9, 10, |
| 11, 12, 13, 14, 15, 16, 17, 18, 19}; |
| |
| const __u32 integr_img_mem_ports_width_1025_to_2048[] = {0, 1, 4, 5, 6, 7, 8, |
| 9, 10, 11, 12, 13, 14, |
| 15, 16, 17, 18, 19}; |
| |
| const __u32 consecutive_mem_ports[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, |
| 10, 11, 12, 13, 14, 15, 16, 17, |
| 18, 19, 20, 21, 22, 23, 24, 25, |
| 26, 27, 28, 29, 30, 31, 32, 33, |
| 34, 35, 36, 37, 38, 39, 40, 41, |
| 42, 43, 44, 45, 46}; |
| |
| /** |
| * ************************************************************** |
| * following declarations are for filters (linear and non linear) |
| * ************************************************************** |
| */ |
| #define NUM_OF_WIDTH_RANGES_FOR_FILTERS_MEM_REQUIR 5 |
| #define NUM_OF_FILTER_SIZES_FOR_FILTERS_MEM_REQUIR 6 |
| |
| struct mprb_requirements { |
| __u32 mprb_num_lrg; |
| __u32 mprb_num_sm; |
| }; |
| |
| struct mprb_req_per_pixel_size { |
| struct mprb_requirements mprb_req_for_8bit_pixel; |
| struct mprb_requirements mprb_req_for_16bit_pixel; |
| struct mprb_requirements mprb_req_for_32bit_pixel; |
| }; |
| |
| const __u32 width_sizes_2_index[NUM_OF_WIDTH_RANGES_FOR_FILTERS_MEM_REQUIR] = { |
| 257, 513, 1025, 2049, 4097}; |
| /** |
| * the following LUT provides the number of MPRBs used for filters, according |
| * to width and filter size |
| * The contents of each entry are pairs of values : |
| * - 1st value = number of large MPRBs needed |
| * - 2nd value = number of small MPRBs needed |
| * None of the pairs has its 2 values both different from 0 |
| */ |
| const struct mprb_req_per_pixel_size mprb_req_per_pixel_size_values |
| [NUM_OF_WIDTH_RANGES_FOR_FILTERS_MEM_REQUIR] |
| [NUM_OF_FILTER_SIZES_FOR_FILTERS_MEM_REQUIR] = { |
| |
| /* 1 byte/pixel - 2 bytes/pixel - 4 bytes/pixel */ |
| |
| /* width 1 to 256, filter size 1x1 */ |
| { |
| {{0, 0}, {0, 0}, {0, 0} }, |
| /* width 1 to 64, filter size 3x3 */ |
| {{0, 1}, {0, 1}, {0, 2} }, |
| /* width 1 to 64, filter size 5x5 */ |
| {{0, 1}, {0, 2}, {0, 4} }, |
| /* width 1 to 64, filter size 7x7 */ |
| {{0, 4}, {0, 4}, {0, 6} }, |
| /* width 1 to 64, filter size 9x9 */ |
| {{0, 2}, {0, 4}, {0, 0} }, |
| /* width 1 to 64, filter size 11x11 */ |
| {{0, 4}, {0, 6}, {0, 0} }, |
| }, { |
| /* width 257 to 512, filter size 1x1 */ |
| {{0, 0}, {0, 0}, {0, 0} }, |
| /* width 257 to 512, filter size 3x3 */ |
| {{0, 1}, {0, 2}, {2, 0} }, |
| /* width 257 to 512, filter size 5x5 */ |
| {{0, 2}, {0, 4}, {4, 0} }, |
| /* width 257 to 512, filter size 7x7 */ |
| {{0, 4}, {0, 4}, {6, 0} }, |
| /* width 257 to 512, filter size 9x9 */ |
| {{0, 4}, {4, 0}, {0, 0} }, |
| /* width 257 to 512, filter size 11x11 */ |
| {{4, 0}, {0, 6}, {0, 0} }, |
| }, { |
| /* width 513 to 1024, filter size 1x1 */ |
| {{0, 0}, {0, 0}, {0, 0} }, |
| /* width 513 to 1024, filter size 3x3 */ |
| {{0, 2}, {1, 0}, {2, 0} }, |
| /* width 513 to 1024, filter size 5x5 */ |
| {{1, 0}, {2, 0}, {4, 0} }, |
| /* width 513 to 1024, filter size 7x7 */ |
| {{4, 0}, {4, 0}, {6, 0} }, |
| /* width 513 to 1024, filter size 9x9 */ |
| {{2, 0}, {4, 0}, {0, 0} }, |
| /* width 513 to 1024, filter size 11x11 */ |
| {{4, 0}, {6, 0}, {0, 0} }, |
| }, { |
| /* width 1025 to 2048, filter size 1x1 */ |
| {{0, 0}, {0, 0}, {0, 0} }, |
| /* width 1025 to 2048, filter size 3x3 */ |
| {{1, 0}, {2, 0}, {4, 0} }, |
| /* width 1025 to 2048, filter size 5x5 */ |
| {{2, 0}, {4, 0}, {8, 0} }, |
| /* width 1025 to 2048, filter size 7x7 */ |
| {{4, 0}, {8, 0}, {12, 0} }, |
| /* width 1025 to 2048, filter size 9x9 */ |
| {{4, 0}, {8, 0}, {0, 0} }, |
| /* width 1025 to 2048, filter size 11x11 */ |
| {{8, 0}, {12, 0}, {0, 0} }, |
| }, { |
| /* width 2049 to 4096, filter size 1x1 */ |
| {{0, 0}, {0, 0}, {0, 0} }, |
| /* width 2049 to 4096, filter size 3x3 */ |
| {{2, 0}, {4, 0}, {8, 0} }, |
| /* width 2049 to 4096, filter size 5x5 */ |
| {{4, 0}, {8, 0}, {16, 0} }, |
| /* width 2049 to 4096, filter size 7x7 */ |
| {{8, 0}, {12, 0}, {24, 0} }, |
| /* width 2049 to 4096, filter size 9x9 */ |
| {{8, 0}, {16, 0}, {0, 0} }, |
| /* width 2049 to 4096, filter size 11x11 */ |
| {{16, 0}, {24, 0}, {0, 0} } |
| } |
| }; |
| |
| /** |
| * *********************************************** |
| * following declarations are for non-filter PUs |
| * *********************************************** |
| */ |
| struct mprb_needed_per_img_width { |
| __u32 width_upper_limit; |
| __u32 nbr_lrg_mprbs; |
| __u32 nbr_sm_mprbs; |
| const __u32 *ram_ports_array; |
| }; |
| |
| const struct mprb_needed_per_img_width mprb_per_img_width_upsc[] = { |
| {1025, 2, 0, upscaler_2_mem_ports}, |
| {0xFFFFFFFF, 4, 0, consecutive_mem_ports} |
| }; |
| |
| const struct mprb_needed_per_img_width mprb_per_img_width_downsc[] = { |
| {1025, 1, 0, consecutive_mem_ports}, |
| {2049, 2, 0, consecutive_mem_ports}, |
| {0xFFFFFFFF, 4, 0, consecutive_mem_ports} |
| }; |
| |
| const struct mprb_needed_per_img_width mprb_per_img_width_integ_II_or_CC[] = { |
| {1025, 1, 0, integr_img_mem_ports_width_up_to_1024}, |
| {2049, 2, 0, integr_img_mem_ports_width_1025_to_2048}, |
| {0xFFFFFFFF, 4, 0, consecutive_mem_ports} |
| }; |
| |
| const struct mprb_needed_per_img_width mprb_per_img_width_inpnt[] = { |
| {621, 4, 0, inpaint_4_mem_ports}, |
| {0xFFFFFFFF, 8, 0, consecutive_mem_ports} |
| }; |
| |
| const struct mprb_needed_per_img_width mprb_per_img_width_fdepth[] = { |
| {512, 13, 10, fast_disp__mem_ports_width_511_or_less}, |
| {1024, 13, 20, fast_disp__mem_ports_width_512_to_1023}, |
| {0xFFFFFFFF, 20, 9, fast_disp__mem_ports_width_1024_or_more} |
| }; |
| |
| static __s32 set_size_for_dma( |
| const struct vpul_task *task, |
| const struct vpul_vertex *vertex, |
| const struct vpul_pu *pu, |
| __u32 *actual_sizes_array) |
| { |
| __u32 inout_type; |
| __u32 memmap_index; |
| __u32 roi_idx; |
| const union vpul_pu_parameters *pu_params; |
| const struct vpul_pu_dma *pu_dma_in_params; |
| const struct vpul_image_size_desc *size_desc; |
| const struct vpul_process *process; |
| __u32 in_size_index; |
| |
| in_size_index = pu->in_size_idx; |
| pu_params = &pu->params; |
| pu_dma_in_params = &pu_params->dma; |
| inout_type = pu_dma_in_params->inout_index; |
| process = &vertex->proc; |
| roi_idx = process->io.inout_types[inout_type].roi_index; |
| memmap_index = process->io.fixed_map_roi[roi_idx].memmap_idx; |
| size_desc = |
| &task->memmap_desc[memmap_index].image_sizes; |
| if (process->io.sizes[in_size_index].type != VPUL_SIZEOP_INOUT){ |
| if (is_pu_dma_in(pu)) |
| return -1; /* err */ |
| else |
| return 0; /* dma out case, just do not update */ |
| } |
| actual_sizes_array[in_size_index] = size_desc->width; |
| return 0; |
| } |
| |
| static __s32 set_sizes_for_all_dma_pus(const struct vpul_task *task, |
| const struct vpul_vertex *vertex, |
| __u32 *actual_sizes_array) |
| { |
| __u32 num_of_pus; |
| const struct vpul_pu *curr_pu; |
| __u32 i, j, subchain_count; |
| const struct vpul_subchain *subchain; |
| |
| subchain_count = vertex->num_of_subchains; |
| subchain = fst_vtx_sc_ptr(task, vertex); |
| |
| for (i = 0; i < subchain_count; i++, subchain++) { |
| num_of_pus = subchain->num_of_pus; |
| curr_pu = fst_sc_pu_ptr(task, subchain); |
| |
| if ((num_of_pus != 0) && (curr_pu != NULL)) { |
| for (j = 0; j < num_of_pus; j++, curr_pu++) { |
| if (is_pu_dma_in(curr_pu) || is_pu_dma_out(curr_pu)) // condition should check dma in OR out is_pu_dma_in(curr_pu) || is_pu_dma_out(curr_pu) |
| if (set_size_for_dma( |
| task, |
| vertex, |
| curr_pu, |
| actual_sizes_array)) |
| return -1; |
| } |
| } |
| } |
| return 0; |
| } |
| |
| static __s32 set_3dnn_data_inout_sizes(const struct vpul_task *task, |
| const struct vpul_3dnn_process_base *proc3dnn_base, |
| __u32 *actual_sizes_array) |
| { |
| __u32 i; |
| __u32 num_layers = proc3dnn_base->number_of_layers; |
| const struct vpul_3dnn_size *size_op; |
| __u32 largest_width = 0; |
| __u32 num_operations = proc3dnn_base->io.n_sizes_op; |
| |
| for (i = 0; i < num_layers; i++) { |
| if (proc3dnn_base->layers[i].dim_size_input.x > largest_width) |
| largest_width = proc3dnn_base->layers[i].dim_size_input.x; |
| } |
| |
| for (i = 0; i < num_operations; i++) { |
| size_op = &proc3dnn_base->io.sizes_3dnn[i]; |
| if ((size_op->type == VPUL_3DXY_SIZEOP_INOUT) && |
| (size_op->inout_3dnn_type == VPUL_IO_3DNN_INPUT)) |
| actual_sizes_array[i] = largest_width; |
| } |
| return 0; |
| } |
| |
| static __u32 set_size_crop(const struct vpul_sizes *size_op, |
| const struct vpul_process *process, |
| __u32 *actual_sizes_array, |
| __u32 index) |
| { |
| __u32 previous_size; |
| const struct vpul_croppers *crop_params; |
| |
| previous_size = actual_sizes_array[size_op->src_idx]; |
| if (previous_size == 0xFFFFFFFF) |
| return 0; |
| crop_params = &process->io.croppers[size_op->op_ind]; |
| actual_sizes_array[index] = previous_size - crop_params->Left - crop_params->Right; |
| return 1; |
| } |
| |
| static __u32 set_size_scale(const struct vpul_sizes *size_op, |
| const struct vpul_process *process, |
| __u32 *actual_sizes_array, |
| __u32 index) |
| { |
| __u32 previous_size; |
| const struct vpul_scales *scale_params; |
| __u32 temp_val; |
| |
| previous_size = actual_sizes_array[size_op->src_idx]; |
| if (previous_size == 0xFFFFFFFF) |
| return 0; |
| scale_params = &process->io.scales[size_op->op_ind]; |
| temp_val = previous_size * scale_params->horizontal.numerator; |
| temp_val += scale_params->horizontal.denominator - 1; |
| actual_sizes_array[index] = temp_val / scale_params->horizontal.denominator; |
| return 1; |
| } |
| |
| static __s32 set_actual_sizes(const struct vpul_task *task, |
| const struct vpul_vertex *vertex, |
| __u32 *actual_sizes_array) |
| { |
| __u32 i; |
| const struct vpul_process *process = &vertex->proc; |
| __u32 num_operations = process->io.n_sizes_op; |
| const struct vpul_sizes *size_op; |
| __u32 sizes_were_calculated_flag; |
| __u32 sizes_left_to_calc_flag; |
| |
| /* initialize all entries in array to "not calculated yet" */ |
| for (i = 0; i < num_operations; i++) |
| actual_sizes_array[i] = 0xFFFFFFFF; |
| |
| /* set actual sizes for all DMA-in PUs (should be of type "inout") */ |
| if (set_sizes_for_all_dma_pus(task, vertex, actual_sizes_array)) |
| return -1; |
| |
| sizes_left_to_calc_flag = 1; |
| |
| while (sizes_left_to_calc_flag) { |
| sizes_were_calculated_flag = 0; |
| sizes_left_to_calc_flag = 0; |
| for (i = 0; i < num_operations; i++) { |
| if (actual_sizes_array[i] == 0xFFFFFFFF) { |
| size_op = &process->io.sizes[i]; |
| switch (size_op->type) { |
| case VPUL_SIZEOP_INOUT: |
| return -1; |
| case VPUL_SIZEOP_FIX: |
| /* TBD */ |
| break; |
| case VPUL_SIZEOP_FORCE_CROP: |
| case VPUL_SIZEOP_CROP_ON_EDGES_TILES: |
| if (set_size_crop(size_op, process, |
| actual_sizes_array, i)) |
| sizes_were_calculated_flag = 1; |
| else |
| sizes_left_to_calc_flag = 1; |
| break; |
| case VPUL_SIZEOP_SCALE: |
| if (set_size_scale(size_op, process, |
| actual_sizes_array, i)) |
| sizes_were_calculated_flag = 1; |
| else |
| sizes_left_to_calc_flag = 1; |
| break; |
| default: |
| return -1; |
| } |
| } |
| } |
| if ((sizes_left_to_calc_flag) && (!sizes_were_calculated_flag)) |
| return -1; |
| } |
| return 0; |
| } |
| |
| static __u32 set_size_3dnn_crop(const struct vpul_3dnn_size *size_op_3dnn, |
| const struct vpul_3dnn_process_base *proc_3dnn_base, |
| __u32 *actual_sizes_array, |
| __u32 index) |
| { |
| const struct vpul_croppers *crop_params; |
| __u32 previous_size; |
| __u32 i; |
| __u32 crop_val; |
| __u32 smallest_crop_val; |
| |
| previous_size = actual_sizes_array[size_op_3dnn->src_idx]; |
| if (previous_size == 0xFFFFFFFF) |
| return 0; |
| |
| /* find smallest crop size --> largest remaining size |
| * ("worst case" for MPRBs allocation) |
| */ |
| crop_params = &proc_3dnn_base->layers[0].croppers[size_op_3dnn->op_ind]; |
| smallest_crop_val = crop_params->Left + crop_params->Right; |
| |
| for (i = 0; i < proc_3dnn_base->number_of_layers; i++) { |
| crop_params = &proc_3dnn_base->layers[i].croppers[size_op_3dnn->op_ind]; |
| crop_val = crop_params->Left + crop_params->Right; |
| if (crop_val < smallest_crop_val) |
| smallest_crop_val = crop_val; |
| } |
| |
| actual_sizes_array[index] = previous_size - smallest_crop_val; |
| return 1; |
| } |
| |
| static __u32 set_size_3dnn_scale(const struct vpul_3dnn_size *size_op_3dnn, |
| const struct vpul_3dnn_process_base *proc_3dnn_base, |
| __u32 *actual_sizes_array, |
| __u32 index) |
| { |
| __u32 previous_size, temp_val, i; |
| const struct vpul_ratio *scale_val; |
| const struct vpul_ratio *largest_scale_val; |
| |
| previous_size = actual_sizes_array[size_op_3dnn->src_idx]; |
| if (previous_size == 0xFFFFFFFF) |
| return 0; |
| /* find largest scale size --> "worst case" for MPRBs allocation */ |
| largest_scale_val = &proc_3dnn_base->layers[0].scales[size_op_3dnn->op_ind].horizontal; |
| |
| for (i = 0; i < proc_3dnn_base->number_of_layers; i++) { |
| scale_val = &proc_3dnn_base->layers[i].scales[size_op_3dnn->op_ind].horizontal; |
| if ((scale_val->numerator * largest_scale_val->denominator) > |
| (scale_val->denominator * largest_scale_val->numerator)) { |
| largest_scale_val = scale_val; |
| } |
| } |
| |
| temp_val = previous_size * largest_scale_val->numerator; |
| temp_val += largest_scale_val->denominator - 1; |
| actual_sizes_array[index] = temp_val / largest_scale_val->denominator; |
| return 1; |
| } |
| |
| static __s32 set_actual_sizes_3dnn(const struct vpul_task *task, |
| const struct vpul_3dnn_process_base *proc_3dnn_base, |
| __u32 *actual_sizes_array) |
| { |
| __u32 i; |
| const struct vpul_3dnn_size *size_op_3dnn; |
| __u32 sizes_were_calculated_flag; |
| __u32 sizes_left_to_calc_flag; |
| __u32 num_operations = proc_3dnn_base->io.n_sizes_op; |
| |
| /* initialize all entries in array to "not calculated yet" */ |
| for (i = 0; i < num_operations; i++) |
| actual_sizes_array[i] = 0xFFFFFFFF; |
| |
| /* set actual sizes for all DMA-in PUs for data input (should be of type "inout") */ |
| if (set_3dnn_data_inout_sizes(task, proc_3dnn_base, actual_sizes_array)) |
| return -1; |
| |
| sizes_left_to_calc_flag = 1; |
| |
| while (sizes_left_to_calc_flag) { |
| sizes_were_calculated_flag = 0; |
| sizes_left_to_calc_flag = 0; |
| for (i = 0; i < num_operations; i++) { |
| if (actual_sizes_array[i] == 0xFFFFFFFF) { |
| size_op_3dnn = &proc_3dnn_base->io.sizes_3dnn[i]; |
| if (size_op_3dnn->type == VPUL_3DXY_SIZEOP_CROP) { |
| if (set_size_3dnn_crop(size_op_3dnn, |
| proc_3dnn_base, |
| actual_sizes_array, |
| i)) |
| sizes_were_calculated_flag = 1; |
| else |
| sizes_left_to_calc_flag = 1; |
| } else if (size_op_3dnn->type == VPUL_3DXY_SIZEOP_SCALE) { |
| if (set_size_3dnn_scale(size_op_3dnn, |
| proc_3dnn_base, |
| actual_sizes_array, |
| i)) |
| sizes_were_calculated_flag = 1; |
| else |
| sizes_left_to_calc_flag = 1; |
| } |
| } |
| } |
| if ((sizes_left_to_calc_flag) && (!sizes_were_calculated_flag)) |
| return -1; |
| } |
| return 0; |
| } |
| |
| static __u32 get_width_index_for_filter_mem_req(__u32 width) |
| { |
| __u32 i; |
| __u32 retval = 0xFF; |
| |
| for (i = 0; i < sizeof(width_sizes_2_index) / sizeof(__u32); i++) { |
| if (width < width_sizes_2_index[i]) { |
| retval = i; |
| break; |
| } |
| } |
| return retval; |
| } |
| |
| /** |
| * this function returns index in range 0 to 5, according to |
| * VPUH_FILTER_SIZE values - or 0xFF in case of invalid input |
| */ |
| __u32 get_filter_size_index_for_linear_filter(__u32 filter_size) |
| { |
| __u32 retval = 0xFF; |
| |
| switch (filter_size) { |
| case VPUH_FILTER_SIZE_1x1: |
| retval = 0; |
| break; |
| case VPUH_FILTER_SIZE_3x3: |
| retval = 1; |
| break; |
| case VPUH_FILTER_SIZE_5x5: |
| retval = 2; |
| break; |
| case VPUH_FILTER_SIZE_7x7: |
| retval = 3; |
| break; |
| case VPUH_FILTER_SIZE_9x9: |
| retval = 4; |
| break; |
| case VPUH_FILTER_SIZE_11x11: |
| retval = 5; |
| break; |
| default: |
| break; |
| } |
| BUG_ON(retval == 0xFF); |
| return retval; |
| } |
| |
| /** |
| * this function returns index = 1, 2 or 3, according to |
| * VPUH_NLF_MODE values - or 0xFF in case of invalid input |
| */ |
| __u32 get_filter_size_idx_for_non_linear_filter(__u32 nlf_mode) |
| { |
| __u32 retval = 0xFF; |
| |
| switch (nlf_mode) { |
| case VPUH_NLF_MODE_MIN: |
| case VPUH_NLF_MODE_MAX: |
| case VPUH_NLF_MODE_MED: |
| retval = 1; |
| break; |
| case VPUH_NLF_MODE_FAST: |
| retval = 3; |
| break; |
| case VPUH_NLF_MODE_CENSUS: |
| case VPUH_NLF_MODE_SM_SMOOTH: |
| retval = 2; |
| break; |
| default: |
| break; |
| } |
| return retval; |
| } |
| |
| void calc_num_mprbs_for_filters(__u32 img_width, __u32 pixel_bytes, |
| __u32 filter_size_index, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| __u32 width_idx; |
| const struct mprb_req_per_pixel_size *mprb_req_per_pix_size; |
| const struct mprb_requirements *mprb_req; |
| |
| width_idx = get_width_index_for_filter_mem_req(img_width); |
| mprb_req_per_pix_size = |
| &mprb_req_per_pixel_size_values[width_idx][filter_size_index]; |
| |
| if (pixel_bytes == VPUH_BITS_8_BITS) |
| mprb_req = &mprb_req_per_pix_size->mprb_req_for_8bit_pixel; |
| else if (pixel_bytes == VPUH_BITS_16_BITS) |
| mprb_req = &mprb_req_per_pix_size->mprb_req_for_16bit_pixel; |
| else |
| mprb_req = &mprb_req_per_pix_size->mprb_req_for_32bit_pixel; |
| |
| num_mprbs_needed_calc_result->num_lrg_mprbs = mprb_req->mprb_num_lrg; |
| num_mprbs_needed_calc_result->num_sm_mprbs = mprb_req->mprb_num_sm; |
| } |
| |
| void get_num_mprbs_acc_to_width_for_non_filters( |
| __u32 width, |
| const struct mprb_needed_per_img_width *mprb_per_image_width, |
| __u32 nbr_entries, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| __u32 i; |
| |
| for (i = 0; i < nbr_entries; i++) { |
| if (width < mprb_per_image_width[i].width_upper_limit) |
| break; |
| } |
| num_mprbs_needed_calc_result->num_lrg_mprbs = |
| mprb_per_image_width[i].nbr_lrg_mprbs; |
| num_mprbs_needed_calc_result->num_sm_mprbs = |
| mprb_per_image_width[i].nbr_sm_mprbs; |
| num_mprbs_needed_calc_result->ram_ports_array = |
| mprb_per_image_width[i].ram_ports_array; |
| } |
| |
| void vpu_resource_no_mprbs_needed(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_nms(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| const union vpul_pu_parameters *pu_params = &pu->params; |
| const struct vpul_pu_nms *nms = &(pu_params->nms); |
| __u32 filter_size_index; |
| |
| filter_size_index = |
| get_filter_size_index_for_linear_filter(nms->support); |
| |
| calc_num_mprbs_for_filters(img_width, nms->bits_in, filter_size_index, |
| num_mprbs_needed_calc_result); |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_slf(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| const union vpul_pu_parameters *pu_params = &pu->params; |
| const struct vpul_pu_slf *slf = &(pu_params->slf); |
| __u32 filter_size_index = 0; |
| __u32 ram_size_for_1_line = 0; |
| __u32 num_of_line_buffers = 0; |
| |
| if (slf->invert_columns) { |
| ram_size_for_1_line = img_width; |
| if (slf->bits_in == VPUH_BITS_16_BITS) |
| /* double line size if 16 bits per pixel */ |
| ram_size_for_1_line <<= 1; |
| /* divide by 1024 to find number of small MPRBs needed */ |
| num_of_line_buffers = (ram_size_for_1_line + 1023) >> 10; |
| if (num_of_line_buffers < 4) |
| num_mprbs_needed_calc_result->num_sm_mprbs = num_of_line_buffers; |
| else |
| /* divide by 4096 to find number of large MPRBs needed */ |
| num_mprbs_needed_calc_result->num_lrg_mprbs = |
| (ram_size_for_1_line + 4095) >> 12; |
| } else { |
| filter_size_index = |
| get_filter_size_index_for_linear_filter(slf->filter_size_mode); |
| |
| calc_num_mprbs_for_filters(img_width, slf->bits_in, filter_size_index, |
| num_mprbs_needed_calc_result); |
| } |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_glf5(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| const union vpul_pu_parameters *pu_params = &pu->params; |
| const struct vpul_pu_glf *glf = &(pu_params->glf); |
| __u32 filter_size_index; |
| |
| filter_size_index = |
| get_filter_size_index_for_linear_filter(glf->filter_size_mode); |
| |
| calc_num_mprbs_for_filters(img_width, glf->bits_in, filter_size_index, |
| num_mprbs_needed_calc_result); |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_nlf(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| const union vpul_pu_parameters *pu_params = &pu->params; |
| const struct vpul_pu_nlf *nlf = &(pu_params->nlf); |
| __u32 filter_size_index; |
| |
| filter_size_index = get_filter_size_idx_for_non_linear_filter( |
| nlf->filter_mode); |
| |
| calc_num_mprbs_for_filters(img_width, nlf->bits_in, filter_size_index, |
| num_mprbs_needed_calc_result); |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_upsc(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| get_num_mprbs_acc_to_width_for_non_filters(img_width, |
| mprb_per_img_width_upsc, 2, |
| num_mprbs_needed_calc_result); |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_downsc(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| get_num_mprbs_acc_to_width_for_non_filters(img_width, |
| mprb_per_img_width_downsc, 3, |
| num_mprbs_needed_calc_result); |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_intimg(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| const union vpul_pu_parameters *pu_params = &pu->params; |
| const struct vpul_pu_integral *intg_pr = &pu_params->integral; |
| |
| switch (intg_pr->integral_image_mode) { |
| case VPUH_INTIMG_MODE_LUT: |
| /* lut size / 2048 */ |
| num_mprbs_needed_calc_result->num_lrg_mprbs = |
| (intg_pr->lut_number_of_values + 2047) >> 11; |
| break; |
| case VPUH_INTIMG_MODE_CONNECTED_COMP: |
| get_num_mprbs_acc_to_width_for_non_filters(img_width, |
| mprb_per_img_width_integ_II_or_CC, 3, |
| num_mprbs_needed_calc_result); |
| num_mprbs_needed_calc_result->num_lrg_mprbs += |
| ((intg_pr->cc_label_vector_size + 2047) >> 11); |
| break; |
| default: |
| /* II mode */ |
| get_num_mprbs_acc_to_width_for_non_filters(img_width, |
| mprb_per_img_width_integ_II_or_CC, 3, |
| num_mprbs_needed_calc_result); |
| break; |
| } |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_histog(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| num_mprbs_needed_calc_result->num_lrg_mprbs = 2; |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_dispar(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| num_mprbs_needed_calc_result->num_lrg_mprbs = 2; |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_inpnt(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| get_num_mprbs_acc_to_width_for_non_filters(img_width, |
| mprb_per_img_width_inpnt, 2, |
| num_mprbs_needed_calc_result); |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_tn2(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| /* all RAM ports in use */ |
| num_mprbs_needed_calc_result->num_lrg_mprbs = 4; |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_tn3(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| /* all RAM ports in use */ |
| num_mprbs_needed_calc_result->num_lrg_mprbs = 4; |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_tn4(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| /* all RAM ports in use */ |
| num_mprbs_needed_calc_result->num_lrg_mprbs = 8; |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_tn5(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| /* all RAM ports in use */ |
| num_mprbs_needed_calc_result->num_lrg_mprbs = 8; |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_cnn(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| /* all RAM ports in use */ |
| num_mprbs_needed_calc_result->num_lrg_mprbs = 20; |
| num_mprbs_needed_calc_result->num_sm_mprbs = 21; |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_lut(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| num_mprbs_needed_calc_result->num_sm_mprbs = 1; |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_flmorb(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| num_mprbs_needed_calc_result->num_lrg_mprbs = 3; |
| |
| |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_fdepth(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| get_num_mprbs_acc_to_width_for_non_filters(img_width, |
| mprb_per_img_width_fdepth, 3, |
| num_mprbs_needed_calc_result); |
| } |
| |
| void vpu_resource_calc_num_mprbs_for_fifo(const struct vpul_pu *pu, |
| __u32 img_width, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| if (pu->mprb_type == VPUH_MPRB_TYPE_4K) |
| num_mprbs_needed_calc_result->num_lrg_mprbs = pu->n_mprbs; |
| else |
| num_mprbs_needed_calc_result->num_sm_mprbs = pu->n_mprbs; |
| } |
| |
| static __s32 __vpu_resource_pu_get(struct vpu_hw_pu *pu_device, |
| struct vpul_pu *pu, __u32 flags) |
| { |
| __s32 ret = 0; |
| enum VPU_PU_TYPES block_id; |
| __u32 channel_id; |
| struct vpu_hw_block *block; |
| |
| BUG_ON(!pu); |
| BUG_ON(!pu_device); |
| |
| if (pu->instance >= VPU_PU_NUMBER) { |
| ret = -1; |
| goto p_err; |
| } |
| |
| block_id = pu_inst2type[pu->instance]; |
| block = &pu_device->table[block_id]; |
| |
| channel_id = pu->instance - block->start; |
| if (test_bit(VPUL_GRAPH_FLAG_FIXED, |
| (const unsigned long *) &flags)) { |
| if (test_bit(channel_id, block->preempted)) { |
| ret = -1; |
| goto p_err; |
| } |
| } else if (test_bit(channel_id, block->preempted)) { |
| channel_id = find_first_zero_bit(block->preempted, |
| block->total); |
| if (channel_id >= block->total) { |
| ret = -1; |
| goto p_err; |
| } |
| pu->instance = |
| (enum vpul_pu_instance)(block->start + channel_id); |
| } |
| |
| set_bit(channel_id, block->preempted); |
| set_bit(channel_id, block->pre_allocated); |
| |
| p_err: |
| return ret; |
| } |
| |
| static __s32 __vpu_resource_pu_put(struct vpu_hardware *vpu_hw, |
| struct vpul_pu *pu) |
| { |
| __s32 ret = 0; |
| enum VPU_PU_TYPES block_id; |
| __u32 channel_id; |
| struct vpu_hw_block *block; |
| struct vpu_hw_pu *pu_device; |
| |
| BUG_ON(!vpu_hw); |
| BUG_ON(!pu); |
| |
| pu_device = &vpu_hw->pu; |
| |
| if (pu->instance >= VPU_PU_NUMBER) { |
| ret = -1; |
| goto p_err; |
| } |
| |
| block_id = pu_inst2type[pu->instance]; |
| block = &pu_device->table[block_id]; |
| |
| channel_id = pu->instance - block->start; |
| if (channel_id >= block->total) { |
| /* |
| * vpu_err("channel_id %d is invalid(%d, %d)\n", channel_id, |
| * pu->instance, block->start); |
| */ |
| ret = 0 - channel_id; |
| goto p_err; |
| } |
| |
| pu->instance = (enum vpul_pu_instance)(block->start); |
| clear_bit(channel_id, block->allocated); |
| |
| p_err: |
| return ret; |
| } |
| |
| static void __vpu_resource_mprb_put(struct vpu_hardware *vpu_hw, |
| struct vpul_pu *pu) |
| { |
| __u32 i, j; |
| __u32 num_of_mprbs; |
| __u32 mprb_num; |
| struct vpu_hw_block *mprb_block; |
| struct vpu_hw_mprb *mprb_device; |
| |
| BUG_ON(!vpu_hw); |
| BUG_ON(!pu); |
| |
| mprb_device = &vpu_hw->mprb; |
| num_of_mprbs = pu->n_mprbs; |
| if (num_of_mprbs) { |
| j = 0; |
| for (i = 0; i < VPU_MAXIMAL_MPRB_CONNECTED; i++) { |
| mprb_num = pu->mprbs[i]; |
| if (mprb_num != NO_MPRB_CONNECTED) { |
| if (mprb_num < VPU_HW_NUM_LARGE_MPRBS) |
| mprb_block = |
| &mprb_device->table[MPRB_large]; |
| else { |
| mprb_block = |
| &mprb_device->table[MPRB_small]; |
| mprb_num -= VPU_HW_NUM_LARGE_MPRBS; |
| } |
| |
| clear_bit(mprb_num, mprb_block->allocated); |
| j++; |
| if (j == num_of_mprbs) |
| break; |
| } |
| } |
| } |
| } |
| |
| /** |
| * This function initializes the "preempted" flags in bitmaps for all PUs |
| * and MPRBs |
| * it is called for each subchain, before allocation of all PUs and MPRBs |
| * needed by the subchain |
| * the bits set in the bitmaps indicate resources that are not available for |
| * allocation |
| * this is done with taking into account the options selected by "flags" : |
| * - VPUL_GRAPH_FLAG_SHARED_AMONG_TASKS - resources allocated for other tasks |
| * will be regarded as "available" |
| * - VPUL_GRAPH_FLAG_SHARED_AMONG_SUBCHAINS - resources allocated to other |
| * subchains in this task will be regarded as "available" |
| */ |
| static void vpu_resource_init_preempted(struct vpu_hw_pu *pu_device, |
| struct vpu_hw_mprb *mprb_device, |
| __u32 flags) |
| { |
| __u32 i; |
| |
| for (i = 0; i < pu_device->total; ++i) |
| bitmap_copy(pu_device->table[i].preempted, |
| pu_device->table[i].allocated, |
| pu_device->table[i].total); |
| for (i = 0; i < mprb_device->total; ++i) |
| bitmap_copy(mprb_device->table[i].preempted, |
| mprb_device->table[i].allocated, |
| mprb_device->table[i].total); |
| |
| /* |
| * if (test_bit(VPUL_GRAPH_FLAG_SHARED_AMONG_TASKS, |
| * (const unsigned long int *)&flags)) { |
| * for (i = 0; i < pu_device->total; ++i) |
| * bitmap_zero(pu_device->table[i].preempted, |
| * pu_device->table[i].total); |
| * for (i = 0; i < mprb_device->total; ++i) |
| * bitmap_zero(mprb_device->table[i].preempted, |
| * mprb_device->table[i].total); |
| * } |
| */ |
| if (!test_bit(VPUL_GRAPH_FLAG_SHARED_AMONG_SUBCHAINS, |
| (const unsigned long *)&flags)) { |
| for (i = 0; i < pu_device->total; ++i) |
| bitmap_or(pu_device->table[i].preempted, |
| pu_device->table[i].preempted, |
| pu_device->table[i].pre_allocated, |
| pu_device->table[i].total); |
| for (i = 0; i < mprb_device->total; ++i) |
| bitmap_or(mprb_device->table[i].preempted, |
| mprb_device->table[i].preempted, |
| mprb_device->table[i].pre_allocated, |
| mprb_device->table[i].total); |
| } |
| } |
| |
| static __calc_nbr_of_mprbs_needed calc_nbr_of_mprbs_needed[VPU_PU_TYPES_NUMBER] |
| = { |
| #define VPU_PU_TYPE(a, b, c) b, |
| #include "lib/vpul_pu_types.def" |
| #undef VPU_PU_TYPE |
| }; |
| |
| static void __vpu_resource_calc_num_mprbs_needed( |
| struct vpul_pu *pu, |
| __u32 *calculated_sizes, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result) |
| { |
| enum VPU_PU_TYPES pu_type; |
| |
| BUG_ON((pu->instance) >= (VPU_PU_NUMBER)); |
| pu_type = pu_inst2type[pu->instance]; |
| BUG_ON(pu_type >= VPU_PU_TYPES_NUMBER); |
| calc_nbr_of_mprbs_needed[pu_type](pu, calculated_sizes[pu->in_size_idx], |
| num_mprbs_needed_calc_result); |
| } |
| |
| static void update_availability_matrix(__u8 *ram_port_row_in_matrix, |
| __u32 first_ram_port, |
| __u32 num_large_mprbs_needed, |
| __u32 num_small_mprbs_needed, |
| struct vpu_hw_mprb *mprb_device, |
| const __u32 *ram_ports) |
| { |
| /** |
| * adding rows to availability matrix |
| * each row will reflect a PU RAM port and the MPRBs available for this port |
| * input : |
| * num_large_mprbs_needed : specifies number of rows to add for large MPRBs |
| * (1 for each needed large MPRB) |
| * num_small_mprbs_needed : specifies number of rows to add for small MPRBs |
| * (1 for each needed small MPRB), these rows will follow the rows for large |
| * MPRBs (if both "large" and "small" MPRBs are required) |
| * the rows to be copied are specified by ram_ports |
| * ram_port_row_in_matrix : points to beginning of 1st row to be added |
| * first_ram_port : index in interconnect matrix to 1st RAM port of PU instance |
| * operation : |
| * the function copies num_mprbs_needed rows from interconnect matrix to |
| * availability matrix |
| * bits for all MPRBs marked as "preempted" will be cleared in the copied rows |
| * bits for MPRBs of type (1K/4K) other than needed one will be cleared as well |
| * in case mprb_size_type is specified as "both" : |
| * bits for "small" MPRBs will be cleared in first num_large_mprbs_needed rows |
| * bits for "large" MPRBs will be cleared in the other rows |
| */ |
| __u32 i, j; |
| __u8 *availability_data; |
| const __u8 *interconnect_data; |
| struct vpu_hw_block *mprb_block; |
| __u32 interconnect_row; |
| __u32 total_num_mprbs_needed; |
| |
| |
| total_num_mprbs_needed = num_large_mprbs_needed + |
| num_small_mprbs_needed; |
| |
| /* fill rows for ports used for large MPRBs (come first) */ |
| for (i = 0; i < num_large_mprbs_needed; i++) { |
| interconnect_row = first_ram_port + ram_ports[i]; |
| /* mark all small MPRBs as "unavailable" in these rows */ |
| availability_data = ram_port_row_in_matrix + |
| OFFSET_TO_SMALL_MEM_BLOCKS; |
| memset(availability_data, 0, VPU_HW_NUM_SMALL_MPRBS); |
| |
| availability_data = ram_port_row_in_matrix + |
| OFFSET_TO_LARGE_MEM_BLOCKS; |
| interconnect_data = |
| &interconnect_matrix[interconnect_row] |
| [OFFSET_TO_LARGE_MEM_BLOCKS]; |
| mprb_block = &mprb_device->table[MPRB_large]; |
| for (j = 0; j < VPU_HW_NUM_LARGE_MPRBS; j++) { |
| if ((*interconnect_data) && |
| (!test_bit(j, mprb_block->preempted))) |
| *availability_data = 1; |
| else |
| *availability_data = 0; |
| interconnect_data++; |
| availability_data++; |
| } |
| ram_port_row_in_matrix += VPU_HW_TOT_NUM_MPRBS; |
| } |
| |
| /* then fill next rows for ports used for small MPRBs */ |
| for (i = num_large_mprbs_needed; i < total_num_mprbs_needed; i++) { |
| interconnect_row = first_ram_port + ram_ports[i]; |
| /* mark all large MPRBs as "unavailable" in these rows */ |
| availability_data = ram_port_row_in_matrix + |
| OFFSET_TO_LARGE_MEM_BLOCKS; |
| memset(availability_data, 0, VPU_HW_NUM_LARGE_MPRBS); |
| |
| availability_data = ram_port_row_in_matrix + |
| OFFSET_TO_SMALL_MEM_BLOCKS; |
| interconnect_data = |
| &interconnect_matrix[interconnect_row] |
| [OFFSET_TO_SMALL_MEM_BLOCKS]; |
| mprb_block = &mprb_device->table[MPRB_small]; |
| for (j = 0; j < VPU_HW_NUM_SMALL_MPRBS; j++) { |
| if ((*interconnect_data) && |
| (!test_bit(j, mprb_block->preempted))) |
| *availability_data = 1; |
| else |
| *availability_data = 0; |
| interconnect_data++; |
| availability_data++; |
| } |
| ram_port_row_in_matrix += VPU_HW_TOT_NUM_MPRBS; |
| interconnect_row++; |
| } |
| } |
| |
| static __s32 prepare_mprb_alloc_for_pu( |
| __u32 cumul_num_mprbs, |
| struct result_of_calc_num_mprbs_needed *num_mprbs_needed_calc_result, |
| struct vpul_pu *pu, |
| __u8 *ram_port_row_in_matrix, |
| struct vpu_hw_mprb *mprb_device, |
| struct port_index_2_pu *port_index_to_pu) |
| { |
| __u32 first_ram_port; |
| __u32 num_mprbs_large; |
| __u32 num_mprbs_small; |
| __u32 number_of_ram_ports; |
| __u32 i; |
| __s32 ret = 0; |
| __u32 num_mprbs_needed; |
| const __u32 *mem_ports_array; |
| |
| mem_ports_array = num_mprbs_needed_calc_result->ram_ports_array; |
| num_mprbs_large = num_mprbs_needed_calc_result->num_lrg_mprbs; |
| num_mprbs_small = num_mprbs_needed_calc_result->num_sm_mprbs; |
| num_mprbs_needed = num_mprbs_large + num_mprbs_small; |
| |
| /* initialize all MPRB entries to "not in use" */ |
| for (i = 0; i < VPU_MAXIMAL_MPRB_CONNECTED; i++) |
| pu->mprbs[i] = NO_MPRB_CONNECTED; |
| |
| number_of_ram_ports = |
| pu_inst_2_ram_port[pu->instance].number_of_ram_ports; |
| |
| if (num_mprbs_needed > number_of_ram_ports) { |
| /* failure */ |
| ret = -1; |
| goto p_err; |
| } |
| if ((cumul_num_mprbs + num_mprbs_needed) > VPU_HW_TOT_NUM_MPRBS) { |
| /* failure */ |
| ret = -1; |
| goto p_err; |
| } |
| first_ram_port = pu_inst_2_ram_port[pu->instance].first_ram_port; |
| |
| if (num_mprbs_large) |
| pu->mprb_type = VPUH_MPRB_TYPE_4K; |
| else |
| pu->mprb_type = VPUH_MPRB_TYPE_1K; |
| |
| update_availability_matrix(ram_port_row_in_matrix, |
| first_ram_port, |
| num_mprbs_large, num_mprbs_small, |
| mprb_device, |
| mem_ports_array); |
| |
| for (i = 0; i < num_mprbs_needed; i++) { |
| port_index_to_pu[cumul_num_mprbs + i].pu = pu; |
| port_index_to_pu[cumul_num_mprbs + i].port_index_in_pu = |
| mem_ports_array[i]; |
| } |
| p_err: |
| return ret; |
| } |
| |
| static void allocate_mprb(__u32 port_num, __u32 mprb_num, |
| struct vpu_hw_mprb *mprb_device, |
| struct port_index_2_pu *port_index_to_pu) |
| { |
| struct vpu_hw_block *mprb_block; |
| struct vpul_pu *pu_for_this_port; |
| __u32 port_num_for_pu; |
| |
| /* retrieve PU descr for which this MPRB is allocated */ |
| pu_for_this_port = port_index_to_pu[port_num].pu; |
| port_num_for_pu = port_index_to_pu[port_num].port_index_in_pu; |
| /* write MPRB num to mprbs array for PU, at index = port index */ |
| pu_for_this_port->mprbs[port_num_for_pu] = mprb_num; |
| |
| if (mprb_num < VPU_HW_NUM_LARGE_MPRBS) |
| mprb_block = &mprb_device->table[MPRB_large]; |
| else { |
| mprb_block = &mprb_device->table[MPRB_small]; |
| mprb_num -= VPU_HW_NUM_LARGE_MPRBS; |
| } |
| set_bit(mprb_num, mprb_block->preempted); |
| set_bit(mprb_num, mprb_block->pre_allocated); |
| } |
| |
| /** |
| * this function is called after allocating an MPRB, in case of port with only |
| * 1 suitable MPRB |
| * its main task is to zero in availability matrix the row and column for |
| * port / MPRB which was allocated (specified at input as i and j) |
| * in the process, it also decrements "number of suitable MPRBs" for all ports |
| * for which the allocated MPRB was previously available; if, in this process, |
| * one of the "number of suitable MPRBs" values is decremented to 1, and its |
| * port index k in availability matrix is < current port index (which is i), |
| * this index will be the value returned by this function; otherwise, it will |
| * return value of i itself. |
| */ |
| static __u32 upd_avail_matrix_after_alloc_0( |
| __u8 availability_matrix[][VPU_HW_TOT_NUM_MPRBS], |
| __u32 total_num_mprbs_needed, |
| __u32 i, __u32 j, |
| __u32 *total_mprbs_per_port, |
| __u32 *total_ports_per_mprb) |
| { |
| __u32 retval; |
| __u32 k; |
| |
| retval = i; |
| total_ports_per_mprb[j] = 0; |
| for (k = 0; k < total_num_mprbs_needed; k++) { |
| if (availability_matrix[k][j]) { |
| availability_matrix[k][j] = 0; |
| total_mprbs_per_port[k]--; |
| /* retval shall be modified at most once */ |
| if ((total_mprbs_per_port[k] == 1) && (k < retval)) |
| retval = k; |
| } |
| } |
| return retval; |
| } |
| |
| /** |
| * this function is called after allocating an MPRB, in case of MPRB with only |
| * 1 port for which this MPRB is suitable |
| * its main task is to zero in availability matrix the row and column for |
| * port / MPRB which was allocated (specified at input as i and j) |
| * in the process, it also decrements "number of ports" for all MPRBs that |
| * could be used by this port (MPRBs for which there is a 1 in row i); if, in |
| * this process, one of the "number of ports" values is decremented to 1, and |
| * MPRB index k in availability matrix is < current MPRB index (which is j), |
| * the value returned by this function will be k; otherwise, it will return |
| * value of j itself. |
| */ |
| static __u32 upd_avail_matrix_after_alloc_1( |
| __u8 availability_matrix[][VPU_HW_TOT_NUM_MPRBS], |
| __u32 i, __u32 j, |
| __u32 *total_mprbs_per_port, |
| __u32 *total_ports_per_mprb) |
| { |
| __u32 retval; |
| __u32 k; |
| |
| retval = j; |
| total_mprbs_per_port[i] = 0; |
| for (k = 0; k < VPU_HW_TOT_NUM_MPRBS; k++) { |
| if (availability_matrix[i][k]) { |
| availability_matrix[i][k] = 0; |
| total_ports_per_mprb[k]--; |
| /* retval shall be modified at most once */ |
| if ((total_ports_per_mprb[k] == 1) && (k < retval)) |
| retval = k; |
| } |
| } |
| return retval; |
| } |
| |
| /** |
| * this function is called after allocating an MPRB, in case of MPRB with |
| * minimum number of ports (but > 1) for which this MPRB is suitable. |
| * its main task is to zero in availability matrix the row for the port |
| * to which this MPRB was allocated (specified at input as i) |
| * in the process, it also decrements "number of ports" for all MPRBs that |
| * could be used by this port (MPRBs for which there is a 1 in row i); if, in |
| * this process, one of the "number of ports" values is decremented to 1, the |
| * smallest value of index of such an MPRB is returned; otherwise, it returns |
| * value 0xFF. |
| */ |
| static __u32 upd_avail_matrix_after_alloc_2( |
| __u8 availability_matrix[][VPU_HW_TOT_NUM_MPRBS], |
| __u32 i, __u32 *total_mprbs_per_port, |
| __u32 *total_ports_per_mprb) |
| { |
| __u32 retval; |
| __u32 k; |
| |
| retval = 0xFF; |
| total_mprbs_per_port[i] = 0; |
| for (k = 0; k < VPU_HW_TOT_NUM_MPRBS; k++) { |
| if (availability_matrix[i][k]) { |
| availability_matrix[i][k] = 0; |
| total_ports_per_mprb[k]--; |
| if ((total_ports_per_mprb[k] == 1) && (retval == 0xFF)) |
| retval = k; |
| } |
| } |
| return retval; |
| } |
| |
| /** |
| * this function is called after allocating an MPRB, in case of MPRB with |
| * minimum number of ports (but > 1) for which this MPRB is suitable. |
| * its main task is to zero in availability matrix the column of the MPRB that |
| * was allocated (specified at input as j) |
| * in the process, it also decrements "number of suitable MPRBs" for all ports |
| * that could have used this MPRB (ports for which there is a 1 in column j); |
| * if, in this process, one of the "number of suitable MPRBs" values is |
| * decremented to 1, the smallest value of index of such a port is returned; |
| * otherwise, it returns value 0xFF. |
| */ |
| static __u32 upd_avail_matrix_after_alloc_3( |
| __u8 availability_matrix[][VPU_HW_TOT_NUM_MPRBS], |
| __u32 j, __u32 total_num_mprbs_needed, |
| __u32 *total_mprbs_per_port, |
| __u32 *total_ports_per_mprb) |
| { |
| __u32 retval; |
| __u32 k; |
| |
| retval = 0xFF; |
| total_ports_per_mprb[j] = 0; |
| for (k = 0; k < total_num_mprbs_needed; k++) { |
| if (availability_matrix[k][j]) { |
| availability_matrix[k][j] = 0; |
| total_mprbs_per_port[k]--; |
| if ((total_mprbs_per_port[k] == 1) && (retval == 0xFF)) |
| retval = k; |
| } |
| } |
| return retval; |
| } |
| |
| static __s32 __vpu_resource_mprbs_get( |
| __u8 availability_matrix[][VPU_HW_TOT_NUM_MPRBS], |
| __u32 total_num_mprbs_needed, |
| struct vpu_hw_mprb *mprb_device, |
| struct port_index_2_pu *port_index_to_pu) |
| { |
| /** |
| * used for comparing to total_num_mprbs_needed (successful |
| * completion), and also for back-tracking |
| */ |
| __u32 num_mprbs_allocated; |
| __u32 total_mprbs_per_port[VPU_HW_TOT_NUM_MPRBS]; |
| __u32 total_ports_per_mprb[VPU_HW_TOT_NUM_MPRBS]; |
| __u32 i, j, k; |
| __s32 ret = 0; |
| __u32 min_num_ports_per_mprb; |
| __u32 mprb_with_min_num_ports; |
| __u32 index_of_port_with_1_mprb; |
| __u32 index_of_mprb_with_1_port; |
| |
| num_mprbs_allocated = 0; |
| |
| BUG_ON( total_num_mprbs_needed > VPU_HW_TOT_NUM_MPRBS); |
| |
| /* calculate and store totals per row (for all ports) */ |
| for (i = 0; i < total_num_mprbs_needed; i++) { |
| total_mprbs_per_port[i] = 0; |
| for (j = 0; j < VPU_HW_TOT_NUM_MPRBS; j++) |
| total_mprbs_per_port[i] += |
| availability_matrix[i][j]; |
| } |
| |
| /* calculate and store totals per column (for all MPRBs) */ |
| for (j = 0; j < VPU_HW_TOT_NUM_MPRBS; j++) { |
| total_ports_per_mprb[j] = 0; |
| for (i = 0; i < total_num_mprbs_needed; i++) |
| total_ports_per_mprb[j] += |
| availability_matrix[i][j]; |
| } |
| |
| index_of_port_with_1_mprb = 0; |
| /** |
| * the following loop is executed as long as there are ports for which |
| * there is only 1 MPRB available (total_mprbs_per_port = 1) |
| */ |
| find_port_with_1_mprb: |
| BUG_ON( total_num_mprbs_needed > VPU_HW_TOT_NUM_MPRBS); |
| for (i = index_of_port_with_1_mprb; i < total_num_mprbs_needed;) { |
| k = i; |
| if (total_mprbs_per_port[i] == 1) { |
| for (j = 0; j < VPU_HW_TOT_NUM_MPRBS; j++) { |
| /* find THE mprb available for this port */ |
| if (availability_matrix[i][j]) { |
| allocate_mprb(i, j, mprb_device, |
| port_index_to_pu); |
| num_mprbs_allocated++; |
| if (num_mprbs_allocated == |
| total_num_mprbs_needed) |
| /* successfully completed */ |
| goto end_vpu_rsrc_mprbs_get; |
| /* remove MPRB from |
| * availability matrix |
| */ |
| k = upd_avail_matrix_after_alloc_0( |
| availability_matrix, |
| total_num_mprbs_needed, i, j, |
| total_mprbs_per_port, |
| total_ports_per_mprb); |
| break; |
| } |
| } |
| } |
| /** |
| * if found port #k for which number of suitable MPRBs was |
| * decremented to 1 : proceed from port #k |
| */ |
| if (k == i) |
| i++; |
| else |
| i = k; |
| } |
| index_of_mprb_with_1_port = 0; |
| |
| /** |
| * the following loop is executed as long as there are mprbs available |
| * to only 1 port (total_ports_per_mprb == 1) |
| */ |
| find_mprb_with_1_port: |
| BUG_ON( total_num_mprbs_needed > VPU_HW_TOT_NUM_MPRBS); |
| for (j = index_of_mprb_with_1_port; j < VPU_HW_TOT_NUM_MPRBS;) { |
| k = j; |
| if (total_ports_per_mprb[j] == 1) { |
| for (i = 0; i < total_num_mprbs_needed; i++) { |
| /* find THE port which can use this MPRB */ |
| if (availability_matrix[i][j]) { |
| allocate_mprb(i, j, mprb_device, |
| port_index_to_pu); |
| num_mprbs_allocated++; |
| if (num_mprbs_allocated == |
| total_num_mprbs_needed) |
| /* successfully completed */ |
| goto end_vpu_rsrc_mprbs_get; |
| k = upd_avail_matrix_after_alloc_1( |
| availability_matrix, i, j, |
| total_mprbs_per_port, |
| total_ports_per_mprb); |
| break; |
| } |
| } |
| } |
| /** |
| * if found MPRB #k with number of ports for which this MPRB is |
| * suitable was decremented to 1 : proceed from MPRB #k |
| */ |
| if (k == j) |
| j++; |
| else |
| j = k; |
| } |
| |
| /* find mprb with minimum number of ports */ |
| find_mprb_with_min_num_ports: |
| min_num_ports_per_mprb = 0xFF; |
| mprb_with_min_num_ports = 0xFF; |
| |
| for (j = 0; j < VPU_HW_TOT_NUM_MPRBS; j++) { |
| if ((total_ports_per_mprb[j] < min_num_ports_per_mprb) && |
| (total_ports_per_mprb[j] != 0)) { |
| min_num_ports_per_mprb = total_ports_per_mprb[j]; |
| mprb_with_min_num_ports = j; |
| } |
| } |
| if (mprb_with_min_num_ports == 0xFF) { |
| /* no more MPRBs available : alloc failed */ |
| ret = -1; |
| goto end_vpu_rsrc_mprbs_get; |
| } |
| j = mprb_with_min_num_ports; |
| /** |
| * found mprb with minimum number of ports (but non-zero) : allocate it |
| * to first suitable port |
| */ |
| for (i = 0; i < total_num_mprbs_needed; i++) { |
| if (availability_matrix[i][j]) |
| break; |
| } |
| allocate_mprb(i, j, mprb_device, port_index_to_pu); |
| num_mprbs_allocated++; |
| if (num_mprbs_allocated == total_num_mprbs_needed) |
| /* successfully completed */ |
| goto end_vpu_rsrc_mprbs_get; |
| |
| /* remove MPRB from availability matrix */ |
| availability_matrix[i][j] = 0; |
| |
| /* remove this row from availability matrix */ |
| index_of_mprb_with_1_port = upd_avail_matrix_after_alloc_2( |
| availability_matrix, i, |
| total_mprbs_per_port, total_ports_per_mprb); |
| |
| /* remove this column from availability matrix */ |
| index_of_port_with_1_mprb = upd_avail_matrix_after_alloc_3( |
| availability_matrix, j, total_num_mprbs_needed, |
| total_mprbs_per_port, total_ports_per_mprb); |
| |
| |
| /* found a port for which only 1 MPRB is available ? */ |
| if (index_of_port_with_1_mprb != 0xFF) |
| goto find_port_with_1_mprb; |
| /* found an MPRB available to only 1 port ? */ |
| if (index_of_mprb_with_1_port != 0xFF) |
| goto find_mprb_with_1_port; |
| goto find_mprb_with_min_num_ports; |
| end_vpu_rsrc_mprbs_get: |
| return ret; |
| } |
| |
| |
| static __u8 availability_matrix[VPU_HW_TOT_NUM_MPRBS][VPU_HW_TOT_NUM_MPRBS]; |
| static __u32 actual_sizes[VPUL_MAX_SIZES_OP]; |
| |
| static __s32 vpu_resource_get_for_vertex(struct vpul_task *task, |
| __u32 flags, |
| struct vpul_vertex *vertex, |
| const struct vpul_3dnn_process_base *proc_3dnn_base, |
| struct vpu_hw_pu *pu_device, |
| struct vpu_hw_mprb *mprb_device) |
| { |
| struct result_of_calc_num_mprbs_needed mprbs_needed; |
| __u32 j, k, subchain_cnt, pu_cnt, orig_pu_count; |
| __u32 num_mprbs_needed; |
| __u32 cumul_n_mprbs_needed; |
| struct vpul_subchain *subchain; |
| struct vpul_pu *pu; |
| __u32 flags_copy; |
| struct port_index_2_pu port_index_to_pu[VPU_HW_TOT_NUM_MPRBS]; |
| |
| /* used for accessing vpul_pu and updating its contents when allocating MPRB */ |
| __s32 ret = 0; |
| |
| subchain_cnt = vertex->num_of_subchains; |
| subchain = fst_vtx_sc_ptr(task, vertex); |
| if (vertex->vtype == VPUL_VERTEXT_3DNN_PROC) |
| ret = set_actual_sizes_3dnn(task, proc_3dnn_base, actual_sizes); |
| else |
| ret = set_actual_sizes(task, vertex, actual_sizes); |
| if (ret) |
| goto p_err; |
| for (j = 0; j < subchain_cnt; j++, subchain++) { |
| if (subchain->stype != VPUL_SUB_CH_CPU_OP) { |
| /* mark "preempted" (= non-allocatable) resources */ |
| vpu_resource_init_preempted(pu_device, mprb_device, flags); |
| orig_pu_count = subchain->num_of_pus; |
| if (!test_bit(VPUL_GRAPH_FLAG_DSBL_LATENCY_BALANCING, |
| (const unsigned long *) &flags)) { |
| ret = latency_balancing(task, vertex, subchain, actual_sizes); |
| if (ret) |
| goto p_err; |
| } |
| /* new count includes PUs added for delay balancing */ |
| pu_cnt = subchain->num_of_pus; |
| pu = fst_sc_pu_ptr(task, subchain); |
| cumul_n_mprbs_needed = 0; |
| for (k = 0; k < pu_cnt; k++, pu++) { |
| flags_copy = flags; |
| /* for PUs added for delay balancing : |
| * ignore VPUL_GRAPH_FLAG_FIXED |
| */ |
| if (k >= orig_pu_count) |
| clear_bit(VPUL_GRAPH_FLAG_FIXED, |
| (volatile unsigned long *)&flags_copy); |
| ret = __vpu_resource_pu_get(pu_device, pu, flags_copy); |
| if (ret) |
| goto p_err; |
| /** |
| * initializing result structure to default values, |
| * __vpu_resource_calc_num_mprbs_needed will update |
| * only members of this structure whose values are |
| * different from default values |
| */ |
| mprbs_needed.num_lrg_mprbs = 0; |
| mprbs_needed.num_sm_mprbs = 0; |
| mprbs_needed.ram_ports_array = consecutive_mem_ports; |
| |
| /* for PUs added for delay balancing : number of MPRBs needed |
| * already been calculated by delay balancing function |
| */ |
| if (k < orig_pu_count) |
| __vpu_resource_calc_num_mprbs_needed(pu, actual_sizes, &mprbs_needed); |
| else if (pu->mprb_type == VPUH_MPRB_TYPE_4K) |
| mprbs_needed.num_lrg_mprbs = pu->n_mprbs; |
| else |
| mprbs_needed.num_sm_mprbs = pu->n_mprbs; |
| |
| if (pu->instance == |
| ((flags >> VPUL_STATIC_ALLOC_PU_INSTANCE_LSB) & |
| VPUL_STATIC_ALLOC_PU_INSTANCE_MASK)) { |
| if (flags & VPUL_STATIC_ALLOC_LARGE_INSTEAD_SMALL_MPRB_MASK) |
| { |
| if (mprbs_needed.num_lrg_mprbs == 0) { |
| mprbs_needed.num_lrg_mprbs = mprbs_needed.num_sm_mprbs; |
| mprbs_needed.num_sm_mprbs = 0; |
| } |
| } |
| } |
| |
| num_mprbs_needed = mprbs_needed.num_lrg_mprbs + |
| mprbs_needed.num_sm_mprbs; |
| pu->n_mprbs = num_mprbs_needed; |
| if (num_mprbs_needed) { |
| ret = prepare_mprb_alloc_for_pu( |
| cumul_n_mprbs_needed, |
| &mprbs_needed, pu, |
| &availability_matrix[cumul_n_mprbs_needed][0], |
| mprb_device, |
| port_index_to_pu); |
| if (ret) |
| goto p_err; |
| cumul_n_mprbs_needed += num_mprbs_needed; |
| } |
| } |
| /** |
| * MPRBs allocation is performed after allocating all PU instances for |
| * subchain and calculating the number of MPRBs needed for each of them |
| */ |
| if (cumul_n_mprbs_needed) { |
| ret = __vpu_resource_mprbs_get(availability_matrix, |
| cumul_n_mprbs_needed, |
| mprb_device, |
| port_index_to_pu); |
| if (ret) |
| goto p_err; |
| } |
| } |
| } |
| p_err: |
| if (ret == 0) |
| ret = VPU_STATUS_SUCCESS; |
| else if (ret != VPU_STATUS_BAD_PARAMS) |
| ret = VPU_STATUS_FAILURE; |
| return ret; |
| } |
| |
| __s32 __vpu_resource_get(struct vpu_hardware *vpu_hw, struct vpul_task *task, __u32 flags) |
| { |
| __u32 i; |
| struct vpu_hw_pu *pu_device; |
| struct vpu_hw_mprb *mprb_device; |
| struct vpul_vertex *vertex; |
| const struct vpul_3dnn_process_base *proc_3dnn_base; |
| __s32 ret = VPU_STATUS_SUCCESS; |
| |
| if ((vpu_hw) && (task)) { |
| pu_device = &vpu_hw->pu; |
| mprb_device = &vpu_hw->mprb; |
| vertex = fst_vtx_ptr(task); |
| proc_3dnn_base = fst_3dnn_process_base_ptr(task); |
| |
| /* prepare */ |
| for (i = 0; i < pu_device->total; ++i) |
| bitmap_zero(pu_device->table[i].pre_allocated, |
| pu_device->table[i].total); |
| for (i = 0; i < mprb_device->total; ++i) |
| bitmap_zero(mprb_device->table[i].pre_allocated, |
| mprb_device->table[i].total); |
| |
| /* estimation */ |
| for (i = 0; i < task->t_num_of_vertices; i++, vertex++) { |
| /* skip vertex if not of type "process" |
| *(3DNN processes handled separately) |
| */ |
| if (vertex->vtype == VPUL_VERTEXT_PROC) { |
| ret = vpu_resource_get_for_vertex(task, |
| flags, |
| vertex, |
| NULL, |
| pu_device, |
| mprb_device); |
| if (ret != VPU_STATUS_SUCCESS) |
| break; |
| } |
| } |
| if (ret == VPU_STATUS_SUCCESS) { |
| for (i = 0; i < task->t_num_of_3dnn_process_bases; i++, proc_3dnn_base++) { |
| vertex = vertex_referencing_this_3dnn_proc_base(task, i); |
| if (vertex) { |
| ret = vpu_resource_get_for_vertex(task, |
| flags, |
| vertex, |
| proc_3dnn_base, |
| pu_device, |
| mprb_device); |
| if (ret != VPU_STATUS_SUCCESS) |
| break; |
| } |
| } |
| if (ret == VPU_STATUS_SUCCESS) { |
| /* acquire */ |
| for (i = 0; i < pu_device->total; ++i) |
| bitmap_or(pu_device->table[i].allocated, |
| pu_device->table[i].allocated, |
| pu_device->table[i].pre_allocated, |
| pu_device->table[i].total); |
| for (i = 0; i < mprb_device->total; ++i) |
| bitmap_or(mprb_device->table[i].allocated, |
| mprb_device->table[i].allocated, |
| mprb_device->table[i].pre_allocated, |
| mprb_device->table[i].total); |
| } |
| } |
| } else |
| ret = VPU_STATUS_BAD_PARAMS; |
| |
| return ret; |
| } |
| |
| __s32 __vpu_resource_put(struct vpu_hardware *vpu_hw, |
| struct vpul_task *task) |
| { |
| __u32 i, j, subchain_cnt, pu_cnt; |
| struct vpul_subchain *subchain; |
| struct vpul_pu *pu; |
| __s32 ret = 0; |
| |
| if ((!vpu_hw) || (!task)) { |
| ret = VPU_STATUS_BAD_PARAMS; |
| goto p_err; |
| } |
| |
| subchain_cnt = task->t_num_of_subchains; |
| subchain = fst_sc_ptr(task); |
| for (i = 0; i < subchain_cnt; i++, subchain++) { |
| pu_cnt = subchain->num_of_pus; |
| if(subchain->stype!=VPUL_SUB_CH_CPU_OP){ |
| pu = fst_sc_pu_ptr(task, subchain); |
| for (j = 0; j < pu_cnt; j++, pu++) { |
| ret = __vpu_resource_pu_put(vpu_hw, pu); |
| if (ret) |
| goto p_err; |
| __vpu_resource_mprb_put(vpu_hw, pu); |
| } |
| } |
| } |
| p_err: |
| if (ret == 0) |
| ret = VPU_STATUS_SUCCESS; |
| else if (ret != VPU_STATUS_BAD_PARAMS) |
| ret = VPU_STATUS_FAILURE; |
| return ret; |
| } |