using UnityEngine;
using Unity.Barracuda;

namespace Mediapipe.PoseLandmark
{
    public class PoseLandmarker : System.IDisposable
    {
        #region public variables
        /*
        Pose landmark result Buffer.
            'outputBuffer' is array of float4 type.
            0~32 index datas are pose landmark.
            Check below Mediapipe document about relation between index and landmark position.
            https://google.github.io/mediapipe/solutions/pose#pose-landmark-model-blazepose-ghum-3d

            Each data factors are
            x: x cordinate value of pose landmark ([0, 1]).
            y: y cordinate value of pose landmark ([0, 1]).
            z: Landmark depth with the depth at the midpoint of hips being the origin.
               The smaller the value the closer the landmark is to the camera. ([0, 1]).
               This value is full body mode only.
            w: The score of whether the landmark position is visible ([0, 1]).

            33 index data is the score whether human pose is visible ([0, 1]).
            This data is (score, 0, 0, 0).
        */
        public ComputeBuffer outputBuffer;
        /*
        Pose world landmark result Buffer.
            'worldLandmarkBuffer' is array of float4 type.
            0~32 index datas are pose world landmark.

            Each data factors are
            x, y and z: Real-world 3D coordinates in meters with the origin at the center between hips.
            w: The score of whether the world landmark position is visible ([0, 1]).

            33 index data is the score whether human pose is visible ([0, 1]).
            This data is (score, 0, 0, 0).
        */
        public ComputeBuffer worldLandmarkBuffer;
        // Pose segmentation.
        public RenderTexture segmentationRT;
        // Pose landmark point counts.
        public int vertexCount => BODY_VERTEX_COUNT;
        #endregion
        
        #region constant number
        // Input image size defined by pose landmark network model.
        const int IMAGE_SIZE = 256;
        // Pose landmark point counts.
        // Defined by full body neural network model.
        const int BODY_VERTEX_COUNT = 33;
        // Output vector length of network model. 
        const int BODY_LD_LEN = 195;
        // World landmark output vector length of network model. 
        const int WORLD_LD_LEN = 117;
        // Pose segmentation texture size.
        const int SEGMENTATION_SIZE = 128;
        #endregion

        #region private variable
        ComputeShader preProcessCS;
        ComputeShader postProcessCS;
        ComputeBuffer networkInputBuffer;
        NNModel liteModel;
        NNModel fullModel;
        Model model;
        IWorker woker;
        PoseLandmarkModel selectedModel;
        #endregion
        
        #region public method
        public PoseLandmarker(PoseLandmarkResource resource, PoseLandmarkModel poseLandmarkModel = PoseLandmarkModel.full){
            preProcessCS = resource.preProcessCS;
            postProcessCS = resource.postProcessCS;
            liteModel = resource.liteModel;
            fullModel = resource.fullModel;

            networkInputBuffer = new ComputeBuffer(IMAGE_SIZE * IMAGE_SIZE * 3, sizeof(float));
            segmentationRT = new RenderTexture(SEGMENTATION_SIZE, SEGMENTATION_SIZE, 0, RenderTextureFormat.ARGB32);
            outputBuffer = new ComputeBuffer(vertexCount + 1, sizeof(float) * 4);
            worldLandmarkBuffer = new ComputeBuffer(vertexCount + 1, sizeof(float) * 4);

            // Initialize related with mode which full body or upper body.
            ExchangeModel(poseLandmarkModel);
        }

        public void ProcessImage(Texture inputTexture, PoseLandmarkModel poseLandmarkModel = PoseLandmarkModel.full){
            // Resize `inputTexture` texture to network model image size.
            preProcessCS.SetTexture(0, "_inputTexture", inputTexture);
            preProcessCS.SetBuffer(0, "_output", networkInputBuffer);
            preProcessCS.Dispatch(0, IMAGE_SIZE / 8, IMAGE_SIZE / 8, 1);

            ProcessImage(networkInputBuffer, poseLandmarkModel);
        }

        public void ProcessImage(ComputeBuffer input, PoseLandmarkModel poseLandmarkModel = PoseLandmarkModel.full){
            if(selectedModel != poseLandmarkModel){
                // Reinitialize variables related with modes if mode of this frame was changed from previous mode.
                ExchangeModel(poseLandmarkModel);
            }

            //Execute neural network model.
            var inputTensor = new Tensor(1, IMAGE_SIZE, IMAGE_SIZE, 3, input);
            woker.Execute(inputTensor);
            inputTensor.Dispose();

            // Convert 4 dimensions Tensor to 1 dimension ComputeBuffer.
            var poseFlagBuffer = TensorToBuffer("Identity_1", 1);
            var landmarkBuffer = TensorToBuffer("Identity", BODY_LD_LEN);
            var worldLandmarkRawBuffer = TensorToBuffer("Identity_4", WORLD_LD_LEN);
            
            // Get final results of pose landmark.
            postProcessCS.SetInt("_keypointCount", vertexCount);
            postProcessCS.SetBuffer(0, "_poseFlag", poseFlagBuffer);
            postProcessCS.SetBuffer(0, "_Landmark", landmarkBuffer);
            postProcessCS.SetBuffer(0, "_LandmarkWorld", worldLandmarkRawBuffer);
            postProcessCS.SetBuffer(0, "_Output", outputBuffer);
            postProcessCS.SetBuffer(0, "_OutputWorld", worldLandmarkBuffer);
            postProcessCS.Dispatch(0, 1, 1, 1);

            // Set pose landmark segmentation texture.
            var segTemp = CopyOutputToTempRT("Identity_2", SEGMENTATION_SIZE, SEGMENTATION_SIZE);
            Graphics.Blit(segTemp, segmentationRT);
            RenderTexture.ReleaseTemporary(segTemp);
        }

        public void Dispose(){
            networkInputBuffer?.Dispose();
            outputBuffer?.Dispose();
            worldLandmarkBuffer?.Dispose();
            segmentationRT.Release();
            woker?.Dispose();
        }
        #endregion

        #region private method
        // Reinitialize variables related with modes.
        void ExchangeModel(PoseLandmarkModel poseLandmarkModel){
            woker?.Dispose();

            // Switch neural network models.
            NNModel nnModel;
            switch(poseLandmarkModel){
                case PoseLandmarkModel.lite:
                    nnModel = liteModel;
                    break;
                case PoseLandmarkModel.full:
                    nnModel = fullModel;
                    break;
                default:
                    nnModel = fullModel;
                    break;
            }
            model = ModelLoader.Load(nnModel);
            woker = model.CreateWorker();

            // Switch control flag.
            selectedModel = poseLandmarkModel;
        }
        
        // Extract the vector in the 4 dimensions Tensor as a Compute Buffer.
        ComputeBuffer TensorToBuffer(string name, int length){
            var shape = new TensorShape(length);
            var tensor = woker.PeekOutput(name).Reshape(shape);
            var buffer = ((ComputeTensorData)tensor.data).buffer;
            tensor.Dispose();
            return buffer;
        }

        // Exchange network output tensor to RenderTexture.
        RenderTexture CopyOutputToTempRT(string name, int w, int h)
        {
            var rtFormat = RenderTextureFormat.ARGB32;
            var shape = new TensorShape(1, h, w, 1);
            var rt = RenderTexture.GetTemporary(w, h, 0, rtFormat);
            var tensor = woker.PeekOutput(name).Reshape(shape);
            tensor.ToRenderTexture(rt);
            tensor.Dispose();
            return rt;
        }
        #endregion
    }
}