from pathlib import Path

import pandas as pd
from sklearn.calibration import LabelEncoder
from sklearn.feature_extraction import DictVectorizer

from .data_preprocess import player_lag_features
BASE_DIR = Path('data/raw/2025-26')
GWS_BASE_DIR = Path('data/raw/2025-26/gws')
TEAMS= pd.read_csv(f"{BASE_DIR}/teams.csv")
FIXTURES = pd.read_csv(f"{BASE_DIR}/fixtures.csv")
BASIC_FEATURES = [
    'id','name','was_home','gw','minutes','goals_scored','assists','goals_conceded','clean_sheets','yellow_cards','red_cards','own_goals','saves','penalties_saved','penalties_missed' 
]
LAG_FEATURES = [
 'assists_pg_last_1',
 'assists_pg_last_2',
 'assists_pg_last_3',
 'assists_pg_last_5',
 'clean_sheets_pg_last_1',
 'clean_sheets_pg_last_2',
 'clean_sheets_pg_last_3',
 'clean_sheets_pg_last_5',
 'goals_conceded_pg_last_1',
 'goals_conceded_pg_last_2',
 'goals_conceded_pg_last_3',
 'goals_conceded_pg_last_5',
 'goals_scored_pg_last_1',
 'goals_scored_pg_last_2',
 'goals_scored_pg_last_3',
 'goals_scored_pg_last_5',
 'minutes_last_2',
 'minutes_last_4',
 'own_goals_pg_last_5',
 'penalties_missed_pg_last_5',
 'penalties_saved_pg_last_5',
 'red_cards_pg_last_5',
 'saves_pg_last_1',
 'saves_pg_last_3',
 'saves_pg_last_5',
 'yellow_cards_pg_last_1',
 'yellow_cards_pg_last_2',
 'yellow_cards_pg_last_4'
 ]

def assign_gw_difficulty(players: pd.DataFrame) -> pd.DataFrame:
    fixtures = FIXTURES.copy()
    home_teams = fixtures.loc[0:10 ,['team_h','team_h_difficulty','team_a','team_a_difficulty']]
    teams_gw_diff = {}
    for index, row in home_teams.iterrows():
        teams_gw_diff[row['team_h']] = row['team_h_difficulty']
        teams_gw_diff[row['team_a']] = row['team_a_difficulty']
    players["team_gw_diff"] = players.apply(lambda row: teams_gw_diff[row["team"]], axis=1)
    return players
def add_was_home_feature(players: pd.DataFrame) -> pd.DataFrame:
    fixtures = FIXTURES.copy()
    home_teams = fixtures.loc[0:10 ,'team_h']
    players["was_home"] = players.apply(lambda row: 1 if row['team'] in home_teams.values else 0, axis=1)
    return players
    
    
def map_team_to_details( players:pd.DataFrame) -> pd.DataFrame:
    teams = TEAMS[['id',"strength_overall_home","strength_overall_away"]].rename(columns={'id': "team",'strength_overall_home': "strength_h",'strength_overall_away': "strength_a"})
    players_df = players.copy()
    players_df = pd.merge(
    players_df,
    teams,
    on='team',
    how='left',
)
    players_df['team'] = players_df['team'].astype('Int64')
    return players_df

def add_dummy_features(players: pd.DataFrame) -> pd.DataFrame:
    # Add dummy features for lagged features
    for feature in LAG_FEATURES:
        players[feature] = 0.0
    return players

def get_players_previous_gws_data(curr_gw : int) -> pd.DataFrame:
    players = pd.DataFrame()
    for gw in range(max(1, curr_gw - 5), curr_gw):
        try :
                players_gw_df = pd.read_csv(f"{GWS_BASE_DIR}/gw{gw}.csv")
                players = pd.concat([players,players_gw_df],ignore_index=True)
        except:
            continue    # Renamee and  Filter Unneeded columns for model training    
    players = players.rename(columns={'element': 'id',  'expected_assists' : "XA",'round':'gw',
       'expected_goals': 'XG',
       'expected_goals_conceded':'XGC'})
    players =  players.drop(columns=['expected_goal_involvements', 'creativity','influence', 'kickoff_time', 'transfers_balance', 'transfers_in',
       'transfers_out', 'value','modified','selected', 'starts', 'team_a_score','position', 'team_h_score', 'threat'],errors='ignore')
    return players


def add_GW1_lagged_data(static_data: pd.DataFrame):
    """ Create lagged data for Gameweek 1
    This function should create the necessary lagged features for the first gameweek
    """
    working_df = static_data.copy()

    # Create lagged data for Gameweek 1
    # This function should create the necessary lagged features for the first gameweek
    
    working_df = add_was_home_feature(working_df)
    working_df = add_dummy_features(working_df).sort_index(axis=1).sort_values(by=['id'])

    return working_df


def add_GW_lagged_data(static_data: pd.DataFrame,gw:int):
    """ Create lagged data for Gameweek 1
    This function should create the necessary lagged features for the first gameweek
    """
    working_df = static_data.copy()

    previous_gws_data = get_players_previous_gws_data(gw)[BASIC_FEATURES]

    previous_gws_data, _ = player_lag_features(previous_gws_data, [{'goals_scored': [ 1, 2, 3, 5]},{'assists': [ 1, 2, 3, 5]},
                                                               {'goals_conceded': [ 1, 2, 3, 5]},
                                                               {'clean_sheets': [ 1, 2, 3, 5]},
                                                               {'yellow_cards': [  1,2, 4]},{'red_cards': [  5]},{'own_goals': [ 5]},
                                                               {'saves': [ 1, 3, 5]},{'penalties_saved': [ 5]},{'penalties_missed': [ 5]} ],for_pred=True)

    previous_gws_data = previous_gws_data[previous_gws_data["gw"] == gw - 1 ][["id",'was_home']+ LAG_FEATURES].fillna(0)
    working_df = pd.merge(
        working_df,
        previous_gws_data,
        on='id',
        how='left',
    )
    return working_df
