o
    >h                  
   @   s  d dl mZ d dlZd dlmZ edZedZedZdd e	 D Z
d	d e
D Zd
d e
D Zdee dee dejfddZdee dee dejfddZdd ZdejdejdejfddZdd Zdee dee dejdejdejf
ddZdd Zedkre  dS dS )    )PathN)Listzdata/processedz
data/trainzdata/rawc                 C   s   g | ]	}|  r|jqS  )is_dirname).0itemr   r   ;/var/www/html/fantasy/fantasy_model/scripts/prepare_data.py
<listcomp>
   s    r
   c                 C   s   g | ]}t | qS r   )	directoryr   folderr   r   r	   r
      s    c                 C   s4   g | ]}| d d dd  | d d  qS )-r      N   )splitr   r   r   r	   r
      s   4 season_pathsseason_namesreturnc                 C   s   t  }tt| D ]\}d||  }d||  }d||  }t | |  d}|g d j|||dd}|dkrB| }nt j||d	d
gdd}dd |jD }	|	D ]}
||
 	d
t||
< qWq
|jt ddd |S )zU
    Return each team that played in the league in the Following seasons season 
    id_strength_h_strength_a_z
/teams.csv)r   codeidstrength_overall_homestrength_overall_away)r   r   r   columnsr   r   r   outer)onhowc                 S   s$   g | ]}| d s| dr|qS )r   Z	strength_)
startswith)r   colr   r   r	   r
   '   s   $ zget_teams.<locals>.<listcomp>Findex)pd	DataFramerangelenread_csvrenamecopymerger   fillnaastypeintto_csvPROCESSED_DATA_DIR)r   r   teamsiZteam_id_season_featureZteam_str_h_featureZteam_str_a_featureZteams_seasons_dfZcurrent_season_dataid_colsr"   r   r   r	   	get_teams   s&   
r5   c                 C   s   t  }tt| D ]:}t  }tddD ]}zt | |  d| d}t j||gdd}W q   Y q|| |d< t j||gdd}q
|jdd	d
dddd}|jg ddd}|S )z<
    Return players stats per gameweek for each season 
    r   '   z/gws/gwz.csvTignore_indexseasonr   XAgwXGXGC)elementexpected_assistsroundexpected_goalsexpected_goals_concededr   )expected_goal_involvements
creativity	influencekickoff_timetransfers_balancetransfers_intransfers_outvaluemodifiedZmng_clean_sheetsZmng_drawZmng_goals_scoredZmng_lossZmng_underdog_drawZmng_underdog_winZmng_winselectedstartsteam_a_scoreteam_h_scorethreatignorer   errors)r%   r&   r'   r(   r)   concatr*   drop)r   r   playersidxZplayers_season_dfr;   players_gw_dfr   r   r	   get_players.   s&   


rY   c                 C   sZ   d| d  }d| d  }d| d  }| | }| | }| | }t j|||gg ddS )Nr   r9   r   r   team
strength_h
strength_ar#   r%   Series)rowZseason_col_nameZseason_str_h_nameZseason_str_a_nameZteam_idZ
team_str_hZ
team_str_ar   r   r	   get_details_by_seasonG   s   ra   r2   rV   c                 C   s   |  }tj|| ddddd}|jtdd|g d< |d d	|d< d
d tD }|jddg| dd}|jt	 ddd |S )Nr[   r   left _mapleft_onright_onr    suffixesr   axisrZ   Int64c                 S   s2   g | ]}d | d| d| fD ]}|qqS )r   r   r   r   )r   r9   featurer   r   r	   r
   _   s    z'map_team_to_details.<locals>.<listcomp>r   Zname_maprQ   rR   z/players.csvFr#   )
r+   r%   r,   applyra   r.   r   rU   r0   r1   )r2   rV   
players_dfZfeatures_to_be_droppedr   r   r	   map_team_to_detailsS   s"   rp   c                 C   sH   | d }| d }| d }|rt j||gddgdS t j||gddgdS )Nwas_hometeam_h_difficultyteam_a_difficultyteam_gw_diffopponnent_gw_diffr#   r^   )r`   Zis_homeZh_diffZa_diffr   r   r	   assign_gw_difficultyi   s   

rv   c              	   C   s   t  }| }t| D ]G\}}|| }||d |k }	t | dg d }
t j|	|
ddgddgdd	d
}	|	jtdd|	ddg< |	jg dd}	t j	||	gdd}q|S )z
    Create The DataSet to train the model
    - takes each player record and add Contextual Data regarding team / opponent strenght and GW FDR for each gameweek per season
    r9   z/fixtures.csv)eventr   rr   rs   rF   r;   fixturerw   r   rb   rc   rf   r   rj   rt   ru   )rw   id_maprr   rs   r   Tr7   )
r%   r&   r+   	enumerater)   r,   rn   rv   rU   rT   )r   r   rV   r2   Zmodel_dataset_dfro   rW   pathZcurrent_seasonZplayer_seasonZseason_fixtures_dfr   r   r	   add_contextual_datav   s$   r|   c                  C   s\   t tt} ttt}||d dk }t| |}ttt|| }|jt ddd td dS )zPrepare the data for model training by extracting team and player information, mapping teams to their details, and adding contextual data.
    positionAMz/train_v1.csvFr#   DoneN)	r5   r   r   rY   rp   r|   r0   TRAIN_DATA_DIRprint)Z	all_teamsZall_playersZmodel_train_dfr   r   r	   prepare_data   s   


r   __main__)pathlibr   pandasr%   typingr   r1   r   r   iterdirZfoldersr   r   strr&   r5   rY   ra   rp   rv   r|   r   __name__r   r   r   r	   <module>   s&      ,
