from pathlib import Path
import os
import logging
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
logger = logging.getLogger(__name__)

from .pred_features_util import assign_gw_difficulty, add_GW1_lagged_data, add_GW_lagged_data, map_team_to_details

STATIC_FEATURES= [ "expected_assists_per_90","expected_goals_per_90","expected_goals_conceded_per_90","id","element_type","team",]
POS = {1 : 'GK', 2: 'DEF', 3: 'MID', 4: 'FWD'}
BASE_DIR = Path('data/raw/2025-26')

def get_players_static_data(gw:int):
    all_players_df = pd.read_csv(f"{BASE_DIR}/players_raw.csv")
    stat_data = all_players_df.loc[:, STATIC_FEATURES]
    # if os.path.exists(f"{BASE_DIR}/gws/xP{gw}.csv"):
    #     stat_data = stat_data.drop(columns=["ep_next"]).rename(columns={"ep_this":"xP"})
    # else:
    #     stat_data = stat_data.drop(columns=["ep_this"]).rename(columns={"ep_next":"xP"})
    # #stat_data["ict_index"] =(stat_data["ict_index"] / 38).round(1)

    return stat_data



    
def get_prediction_input(gw:int):
    prediction_input_data = None
    players_static_data = get_players_static_data(gw)
    logger.info(f"Static data fetched for GW{gw} with shape {players_static_data.shape}")
    players_static_data = players_static_data.rename(columns={"expected_assists_per_90":"XA","expected_goals_per_90":"XG","expected_goals_conceded_per_90":"XGC","element_type":"position"})
    logger.info(f"Static data columns renamed for GW{gw}")
    players_static_data = map_team_to_details(players_static_data)
    players_static_data = assign_gw_difficulty(players_static_data,gw)
    players_static_data['position'] = players_static_data['position'].apply(lambda x: POS.get(x, 'Unknown'))
    if gw == 1:
        prediction_input_data =  add_GW1_lagged_data(players_static_data)

        
    else:
        prediction_input_data =  add_GW_lagged_data(players_static_data,gw)
    
    prediction_input_data['position'] = prediction_input_data['position'].apply(str)
    # need to transform season


    X_dict = prediction_input_data.to_dict("records")

    # Create the DictVectorizer object: dv
    dv = DictVectorizer(sparse=False, separator='_')

    # Apply dv on df: df_encoded
    X_encoded = dv.fit_transform(X_dict)

    prediction_input_data = pd.DataFrame(X_encoded, columns=dv.feature_names_)


    return prediction_input_data

    #players_prediction_data = add_players_lagged_data(players_static_data,gw)
if __name__ == "__main__" :
    get_prediction_input(4)
    