/*
   forward-v4l2-utils.c - v4l2 utilities for SoftLab-NSK Forward video boards

   Copyright (C) 2017 - 2024 SoftLab-NSK <forward@softlab.tv>

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; either version 2
   of the License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
*/

#include "forward.h"
#include "forward-v4l2-utils.h"

#pragma GCC push_options
#pragma GCC optimize("O3")

static void uyuv2yuyv(const u8 *in_data, const u8 *out_data, size_t size)
{
	size_t i;
	const u64 *ip = (u64 *)in_data;
	u64 *op = (u64 *)out_data;
	for (i = 0; i < size; i += 8) {
		u64 w = *ip;
		*op = ((w >> 8) & 0x00FF00FF00FF00FFULL) | ((w << 8) & 0xFF00FF00FF00FF00ULL);

		ip++;
		op++;
	}
}

static void hdmi2yuv420(const u8 *in_data, const u8 *out_data, int outbytesperline, int lines)
{
	int i, j;
	int width = outbytesperline;
	int inbytesperline = width * 3 / 2;
	const u8 *ip = in_data;
	u32 *yp = (u32 *)out_data;
	u16 *cbp = (u16 *)(out_data + width * lines);
	u16 *crp = (u16 *)(out_data + width * lines + (width / 2) * (lines / 2));
	for (i = 0; i < lines; i++) {
		u16 *cp = (i & 0x1) ? crp : cbp;
		for (j = 0; j < inbytesperline; j += 6) {
			u64 cyy = *(u64 *)ip;
			ip += 6;

			*yp = ((cyy >> 8) & 0xFFFF) | ((cyy >> 16) & 0xFFFF0000);
			*cp = (cyy & 0xFF) | ((cyy >> 16) & 0xFF00);
			yp++;
			cp++;
		}
		if (i & 0x1)
			crp += outbytesperline / 4;
		else
			cbp += outbytesperline / 4;
	}
}

static void uyuv2v210(const u8 *in_data, const u8 *out_data, int instride, int outstride, int lines)
{
	int i, j;
	const u8 *inp = in_data;
	const u8 *outp = out_data;
	int pwidth = ((instride - 1) / 15 + 1) * 15;

	for (i = 0; i < lines; i++) {
		for (j = 0; j < pwidth; j += 15) {
			u64 v0 = ((u64 *)inp)[0];
			u64 v1 = ((u64 *)inp)[1];
			inp += 15;

			((u32 *)outp)[0] = (v0 >> 0) & 0x3FFFFFFF;
			((u32 *)outp)[1] = (v0 >> 30) & 0x3FFFFFFF;
			((u32 *)outp)[2] = ((v0 >> 60) & 0x0000000F) | ((v1 << 4) & 0x3FFFFFF0);
			((u32 *)outp)[3] = ((v1 >> 26) & 0x3FFFFFFF);
			outp += 16;
		}
		outp = out_data + i * outstride;
	}
}

static void uyuv2y210(const u8 *in_data, u8 *out_data, int instride, int outstride, int lines)
{
	int i, j;
	const u8 *inp = in_data;
	u8 *outp = out_data;

	for (i = 0; i < lines; i++) {
		u16 *outpw = (u16 *)outp;
		for (j = 0; j < instride; j += 5) {
			u64 v = *((u64 *)&inp[j]);
			outpw[0] = (v >> 4) & 0xFFC0;
			outpw[1] = (v << 6) & 0xFFC0;
			outpw[2] = (v >> 24) & 0xFFC0;
			outpw[3] = (v >> 14) & 0xFFC0;
			outpw += 4;
		}
		outp += outstride;
		inp += instride;
	}
}

static void v2102uyuv(const u8 *in_data, const u8 *out_data, int instride, int outstride, int lines)
{
	int i, j;
	const u8 *inp = in_data;
	const u8 *outp = out_data;
	int pwidth = ((outstride - 1) / 15 + 1) * 15;

	for (i = 0; i < lines; i++) {
		inp = in_data + i * instride;

		for (j = 0; j < pwidth; j += 15) {
			u32 v0 = ((u32 *)inp)[0];
			u32 v1 = ((u32 *)inp)[1];
			u32 v2 = ((u32 *)inp)[2];
			u32 v3 = ((u32 *)inp)[3];
			inp += 16;

			((u64 *)outp)[0] = ((v0 >> 0) & 0x000000003FFFFFFFULL) |
					   (((u64)v1 << 30) & 0x0FFFFFFFC0000000ULL) |
					   (((u64)v2 << 60) & 0xF000000000000000ULL);
			((u64 *)outp)[1] = ((v2 >> 4) & 0x0000000003FFFFFFULL) |
					   (((u64)v3 << 26) & 0x00FFFFFFFC000000ULL);
			outp += 15;
		}
	}
}

void forward_v4l2_interleave(struct vb2_buffer *buf, int offset[2], int bytesperline, int lines,
			     bool bff)
{
	u32 field_size = vb2_plane_size(buf, 0) / 4;
	u8 *start = (u8 *)vb2_plane_vaddr(buf, 0);
	u8 *end = start + vb2_plane_size(buf, 0) / 2;
	u8 *odd_p = start + field_size * 2 + offset[0];
	u8 *even_p = start + field_size * 3 + offset[1];
	u8 *r_p = start;
	int i;

	if (bff)
		swap(odd_p, even_p);

	for (i = 0; i < lines; i++) {
		if (r_p >= end)
			break;

		if (i % 2) {
			memcpy(r_p, odd_p, bytesperline);
			odd_p += bytesperline;
		} else {
			memcpy(r_p, even_p, bytesperline);
			even_p += bytesperline;
		}
		r_p += bytesperline;
	}
}

void forward_v4l2_input_color_convert(struct vb2_buffer *buf, int offsets[2], int inbytesperline,
				      int outbytesperline, int lines,
				      enum forward_v4l2_pixfmt_conv func)
{
	u32 size = vb2_plane_size(buf, 0);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
	bool even = to_vb2_v4l2_buffer(buf)->field == V4L2_FIELD_BOTTOM;
#else
	bool even = buf->v4l2_buf.field == V4L2_FIELD_BOTTOM;
#endif
	int offset = offsets[even ? 1 : 0];
	u8 *data = vb2_plane_vaddr(buf, 0) + offset;

	if (func == FORWARD_V4L2_CONV_UYVY_YUYV)
		uyuv2yuyv(data, vb2_plane_vaddr(buf, 0), size);
	else if (func == FORWARD_V4L2_CONV_HDMI420_YUV420)
		hdmi2yuv420(data + size / 2, vb2_plane_vaddr(buf, 0), outbytesperline, lines);
	else if (func == FORWARD_V4L2_CONV_10BIT_V210)
		uyuv2v210(data + size / 2, vb2_plane_vaddr(buf, 0), inbytesperline, outbytesperline,
			  lines);
	else if (func == FORWARD_V4L2_CONV_10BIT_16BIT)
		uyuv2y210(data + size / 2, vb2_plane_vaddr(buf, 0), inbytesperline, outbytesperline,
			  lines);
	else if (offset != 0)
		memmove(vb2_plane_vaddr(buf, 0), data, size - offset);
}

void forward_v4l2_output_color_convert(struct vb2_buffer *buf, int offsets[2], int inbytesperline,
				       int outbytesperline, int lines,
				       enum forward_v4l2_pixfmt_conv func)
{
	u32 size = vb2_plane_size(buf, 0);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
	bool even = to_vb2_v4l2_buffer(buf)->field == V4L2_FIELD_BOTTOM;
#else
	bool even = buf->v4l2_buf.field == V4L2_FIELD_BOTTOM;
#endif
	int offset = offsets[even ? 1 : 0];
	u8 *data = vb2_plane_vaddr(buf, 0) + offset;

	if (func == FORWARD_V4L2_CONV_10BIT_V210)
		v2102uyuv(data, vb2_plane_vaddr(buf, 0), inbytesperline, outbytesperline, lines);
	else if (offset != 0)
		memmove(data, vb2_plane_vaddr(buf, 0), size - offset);
}

void forward_v4l2_interleave_color_conv(struct vb2_buffer *buf, int offset[2], int inbytesperline,
					int outbytesperline, int lines,
					enum forward_v4l2_pixfmt_conv func, bool bff)
{
	u8 *start = (u8 *)vb2_plane_vaddr(buf, 0);
	u8 *end = start + vb2_plane_size(buf, 0);
	u8 *odd_p = start + offset[0];
	u8 *even_p = start + offset[1];
	u8 *r_p = start;
	int i;

	if (bff)
		swap(odd_p, even_p);

	for (i = 0; i < lines; i++) {
		if (r_p >= end)
			break;

		if (func == FORWARD_V4L2_CONV_UYVY_YUYV)
			uyuv2yuyv((i % 2) ? even_p : odd_p, r_p, inbytesperline);
		else if (func == FORWARD_V4L2_CONV_10BIT_V210)
			uyuv2v210((i % 2) ? even_p : odd_p, r_p, inbytesperline, outbytesperline,
				  1);
		else
			memcpy(r_p, (i % 2) ? even_p : odd_p, inbytesperline);

		if (i % 2)
			even_p += inbytesperline;
		else
			odd_p += inbytesperline;
		r_p += outbytesperline;
	}
}

void forward_v4l2_deinterleave_color_convert(struct vb2_buffer *buf, int offset[2],
					     int inbytesperline, int outbytesperline, int lines,
					     enum forward_v4l2_pixfmt_conv func, bool bff)
{
	u8 *start = (u8 *)vb2_plane_vaddr(buf, 0);
	u8 *end = start + vb2_plane_size(buf, 0);
	u8 *odd_p = start + offset[0];
	u8 *even_p = start + offset[1];
	u8 *i_p = start;
	int i;

	if (bff)
		swap(odd_p, even_p);

	for (i = 0; i < lines; i++) {
		if ((odd_p >= end) || (even_p >= end))
			break;

		if (func == FORWARD_V4L2_CONV_10BIT_V210)
			v2102uyuv(i_p, (i % 2) ? even_p : odd_p, inbytesperline, outbytesperline,
				  1);
		else
			memcpy((i % 2) ? even_p : odd_p, i_p, inbytesperline);

		if (i % 2) {
			even_p += outbytesperline;
		} else {
			odd_p += outbytesperline;
		}
		i_p += inbytesperline;
	}
}

void forward_v4l2_unpack10b16b(const u8 *in, u8 *out, int words)
{
	int i;
	const u8 *inp = in;
	u16 *outp = (u16 *)out;

	for (i = 0; i < words; i += 4) {
		u64 v = *((u64 *)inp);
		outp[0] = (v >> 0) & 0x3FF;
		outp[1] = (v >> 10) & 0x3FF;
		outp[2] = (v >> 20) & 0x3FF;
		outp[3] = (v >> 30) & 0x3FF;
		outp += 4;
		inp += 5;
	}
}
void forward_v4l2_pack16b10b(const u8 *in, u8 *out, int words)
{
	int i;
	const u16 *inp = (u16 *)in;
	u8 *outp = out;

	for (i = 0; i < words; i += 4) {
		u64 v = *((u64 *)inp);
		outp[0] = ((v >> 0) & 0xFF);
		outp[1] = ((v >> 8) & 0x3) | ((v >> 14) & 0xFC);
		outp[2] = ((v >> 22) & 0xF) | ((v >> 28) & 0xF0);
		outp[3] = ((v >> 36) & 0x3F) | ((v >> 42) & 0xC0);
		outp[4] = ((v >> 50) & 0xFF);
		outp += 5;
		inp += 4;
	}
}
void forward_v4l2_set16b10b(uint64_t v, u8 *out, int words)
{
	int i;
	u8 *outp = out;

	for (i = 0; i < words; i += 4) {
		outp[0] = ((v >> 0) & 0xFF);
		outp[1] = ((v >> 8) & 0x3) | ((v >> 14) & 0xFC);
		outp[2] = ((v >> 22) & 0xF) | ((v >> 28) & 0xF0);
		outp[3] = ((v >> 36) & 0x3F) | ((v >> 42) & 0xC0);
		outp[4] = ((v >> 50) & 0xFF);
		outp += 5;
	}
}

#pragma GCC pop_options
