from pathlib import Path
import pandas as pd
from typing import List

PROCESSED_DATA_DIR = Path('data/processed')
TRAIN_DATA_DIR = Path('data/train')
directory = Path('data/raw')

# List all folders inside the directory
folders = [item.name for item in directory.iterdir() if item.is_dir()]

season_paths = [ directory / folder for folder in folders ]

season_names = [ f"{folder.split('-')[0] [2:]}{folder.split('-')[1]}" for folder in folders ]

def get_teams (season_paths: List[str],season_names: List[str]) -> pd.DataFrame:
    """
    Return each team that played in the league in the Following seasons season 
    """
    teams = pd.DataFrame()
    for i in range(len(season_paths)):
        team_id_season_feature = f"id_{season_names[i]}"
        team_str_h_feature = f"strength_h_{season_names[i]}"
        team_str_a_feature = f"strength_a_{season_names[i]}"
        teams_seasons_df = pd.read_csv(f"{season_paths[i]}/teams.csv")
        current_season_data = teams_seasons_df[['name', 'code', 'id',"strength_overall_home","strength_overall_away"]].rename(columns={'id': team_id_season_feature,'strength_overall_home': team_str_h_feature,'strength_overall_away': team_str_a_feature})

        if i == 0:
            # For the first season, initialize the 'teams' DataFrame
            teams = current_season_data.copy()
        else:
            # For subsequent seasons, perform an outer merge
            # 'outer' merge ensures all teams from both DataFrames are included.
            teams = pd.merge(teams, current_season_data,
                             on=['name', 'code'],
                             how='outer')

        # Identify all columns that store team IDs for different seasons
        id_cols = [col for col in teams.columns if col.startswith('id_') or  col.startswith('strength_')]
        for col in id_cols:
            teams[col] = teams[col].fillna(0).astype(int)
    
    teams.to_csv(f"{PROCESSED_DATA_DIR}/teams.csv", index=False)
    return teams

def get_players(season_paths: List[str],season_names: List[str]) -> pd.DataFrame:
    """
    Return players stats per gameweek for each season 
    """
    # read in player information for each season and add to list
    players = pd.DataFrame()
    for idx in range(len(season_paths) ):
        players_season_df = pd.DataFrame()
        for gw in range(1, 39):
            try :
                 players_gw_df = pd.read_csv(f"{season_paths[idx]}/gws/gw{gw}.csv")
                 players_season_df = pd.concat([players_season_df,players_gw_df],ignore_index=True)
            except:
                continue
        players_season_df["season"] = season_names[idx]
        players = pd.concat([players,players_season_df],ignore_index=True)
     # Renamee and  Filter Unneeded columns for model training    
    players = players.rename(columns={'element': 'id',  'expected_assists' : "XA",'round':'gw',
       'expected_goals': 'XG',
       'expected_goals_conceded':'XGC'})
    players =  players.drop(columns=['expected_goal_involvements', 'creativity','influence', 'kickoff_time', 'transfers_balance', 'transfers_in',
       'transfers_out', 'value','modified', 'mng_clean_sheets', 'mng_draw', 'mng_goals_scored',
       'mng_loss', 'mng_underdog_draw', 'mng_underdog_win', 'mng_win','selected', 'starts', 'team_a_score', 'team_h_score', 'threat'],errors='ignore')
    return players
# Mapping Team Names to Ids    
def get_details_by_season(row):
    
    season_col_name = f"id_{row['season']}"
    season_str_h_name = f"strength_h_{row['season']}"
    season_str_a_name = f"strength_a_{row['season']}"
    team_id = row[season_col_name]
    team_str_h = row[season_str_h_name]
    team_str_a = row[season_str_a_name]
    # Check if the season-specific ID column exists in the merged DataFrame
    return pd.Series([team_id, team_str_h, team_str_a], 
                     index=["team","strength_h","strength_a"])

def map_team_to_details(teams:pd.DataFrame, players:pd.DataFrame) -> pd.DataFrame:
    players_df = players.copy()
    players_df = pd.merge(
    players_df,
    teams,
    left_on='team',
    right_on='name',
    how='left',
    suffixes=('', '_map') # To avoid column name clashes (e.g., 'name' and 'name_map')
)
    players_df[["team","strength_h","strength_a"]] = players_df.apply(get_details_by_season, axis=1)
    players_df['team'] = players_df['team'].astype('Int64')
    features_to_be_dropped = [
    feature
    for season in season_names
    for feature in [f"id_{season}", f"strength_h_{season}", f"strength_a_{season}"]
]
    players_df =  players_df.drop(columns=['code', 'name_map']+ features_to_be_dropped ,errors='ignore')
    players_df.to_csv(f"{PROCESSED_DATA_DIR}/players.csv", index=False)

    return players_df

def assign_gw_difficulty(row):
    is_home = row['was_home']
    h_diff = row["team_h_difficulty"]
    a_diff  = row["team_a_difficulty"]
    # Check if the season-specific ID column exists in the merged DataFrame
    if is_home:
        return pd.Series([h_diff, a_diff], 
                     index=['team_gw_diff', 'opponnent_gw_diff'])
    else :
        return pd.Series([a_diff, h_diff], 
                     index=['team_gw_diff', 'opponnent_gw_diff'])
    
# Adding Team And GW Contextual data 
def add_contextual_data(season_paths: List[str],season_names: List[str], players : pd.DataFrame, teams : pd.DataFrame) -> pd.DataFrame:
    """
    Create The DataSet to train the model
    - takes each player record and add Contextual Data regarding team / opponent strenght and GW FDR for each gameweek per season
    """
    model_dataset_df = pd.DataFrame()
    players_df = players.copy()

    for idx, path in enumerate(season_paths):
  
        current_season = season_names[idx]
        player_season = players_df[players_df['season'] == current_season]
        season_fixtures_df = pd.read_csv(f"{path}/fixtures.csv") [['event','id', 'team_h_difficulty','team_a_difficulty','kickoff_time']]
        player_season = pd.merge(
            player_season,
            season_fixtures_df,
            left_on=["gw","fixture"],
            right_on=["event","id"],
            how='left',
            suffixes=('', '_map')
            )
        player_season[["team_gw_diff","opponnent_gw_diff"]]  =  player_season.apply(assign_gw_difficulty, axis=1)
        player_season  =  player_season.drop(columns=['event','id_map',"team_h_difficulty","team_a_difficulty"])
        model_dataset_df  =  pd.concat([model_dataset_df,player_season],ignore_index=True)

    return model_dataset_df
    

def prepare_data():
    """Prepare the data for model training by extracting team and player information, mapping teams to their details, and adding contextual data.
    """
    all_teams = get_teams(season_paths, season_names)
    all_players = get_players(season_paths, season_names)
    all_players = all_players[all_players["position"] != "AM"]


    all_players = map_team_to_details(all_teams,all_players)
    model_train_df = add_contextual_data(season_paths,season_names,all_players,all_teams)
    model_train_df.to_csv(f"{TRAIN_DATA_DIR}/train_v1.csv", index=False)
    print("Done")

if __name__ == "__main__":
    prepare_data()