import cv2
import requests
import os
import sys
import mediapipe as mp
import numpy as np
import urllib.request

from PIL import Image

output_file_deploy_core = "harveys_tags_core_2905_2.csv"

# Point to a writable cache directory (change this as needed)
os.environ['TRANSFORMERS_CACHE'] = '/var/www/.cache'  # or '/home/youruser/.cache/huggingface'

# Optional: HF_HOME (for other huggingface data)
os.environ['HF_HOME'] = '/var/www/html/advu'

from transformers import BlipProcessor, BlipForQuestionAnswering

processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base", use_fast=True) #Salesforce/blip2-opt-2.7b
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")


#img_url = 'https://freemans.scene7.com/is/image/OttoUK/800h/craghoppers-kids-sundon-insulated-waterproof-jacket~16E559FRSP_W03.jpg' 
img_url = sys.argv[3]
pid = sys.argv[2]
csv_idx = sys.argv[1]

raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
  

question = "Is there a human body in the image? If yes, respond 'yes'. Otherwise, respond 'no'." #ee the eye
inputs = processor(raw_image, question, return_tensors="pt")
out1 = model.generate(**inputs)
answer = processor.decode(out1[0], skip_special_tokens=True)
print(f"Answer: {answer}")

question = "Is there a human body visible in the image, and is the shoulders body part also visible? If yes, respond 'yes'. Otherwise, respond 'no'" #ee the eye
inputs = processor(raw_image, question, return_tensors="pt")
out2 = model.generate(**inputs)
answer = processor.decode(out2[0], skip_special_tokens=True)
print(f"Answer: {answer}")

question = "Is there a human body top half only or lower half only visible in the image? If yes, respond 'top'. Otherwise, respond 'lower'." #ee the eye
inputs = processor(raw_image, question, return_tensors="pt")
out3 = model.generate(**inputs)
answer = processor.decode(out3[0], skip_special_tokens=True)
print(f"Answer: {answer}")

question = "Explain human pose is front or behind?." #ee the eye
inputs = processor(raw_image, question, return_tensors="pt")
out4 = model.generate(**inputs)
answer = processor.decode(out4[0], skip_special_tokens=True)
print(f"Answer: {answer}")
 

# Step 1: Face detection
### FACE DETECT

# Step 2: Read image from URL into numpy array
resp = urllib.request.urlopen(img_url)
image_array = np.asarray(bytearray(resp.read()), dtype=np.uint8)

# Step 3: Decode the image (now it's in OpenCV BGR format)
image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)

# Step 4: Convert to RGB
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  
# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

with mp_pose.Pose(static_image_mode=True, model_complexity=1, enable_segmentation=False) as pose:
    results = pose.process(image_rgb)

    if results.pose_landmarks:
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        is_human_avail = 'is_human' 
    else:
        is_human_avail = 'none' 
        
        
face_cascade    = cv2.CascadeClassifier("haarcascade_fullbody.xml")  
face_detect_score_final = 0
face_detected = 'none' 

mp_face = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

 
with mp_face.FaceDetection(min_detection_confidence=0.2) as face_detection:
    results = face_detection.process(image_rgb)
    if results.detections:
        for detection in results.detections:
            mp_drawing.draw_detection(image, detection)
            face_detect_score_arr = detection.score
            face_detect_score_final = face_detect_score_arr[0]

#print(face_detect_score_final)  
face_detect_score_final = round(face_detect_score_final, 2)

if face_detect_score_final > 0.75:
    face_detected = 'is human'
     
with open(output_file_deploy_core,"a") as outfiledc:
    outfiledc.write(str(pid)+","+csv_idx+","+img_url+","+str(processor.decode(out1[0]))+','+str(processor.decode(out2[0]))+","+str(face_detected)+","+str(is_human_avail)+","+str(processor.decode(out1[0]))+","+str(processor.decode(out1[0]))+"\n")
    