
    -
h              
          S SK Jr  S SKrS SKJr  \" S5      r\" S5      r\" S5      r\R                  5        V s/ s H%  o R                  5       (       d  M  U R                  PM'     sn r\ Vs/ s H  n\U-  PM
     snr\ Vs/ s H/  oR                  S5      S    SS  UR                  S5      S	    3PM1     snrS
\\   S\\   S\R"                  4S jrS
\\   S\\   S\R"                  4S jrS rS\R"                  S\R"                  S\R"                  4S jrS rS
\\   S\\   S\R"                  S\R"                  S\R"                  4
S jrS r\S:X  a  \" 5         ggs  sn f s  snf s  snf )    )PathN)Listzdata/processedz
data/trainzdata/raw-      season_pathsseason_namesreturnc                 ~   [         R                  " 5       n[        [        U 5      5       H  nSX    3nSX    3nSX    3n[         R                  " X    S35      nU/ SQ   R                  XEUS.S9nUS:X  a  UR                  5       nO[         R                  " X(S	S
/SS9nUR                   V	s/ s H2  oR                  S5      (       d  U	R                  S5      (       d  M0  U	PM4     n
n	U
 H+  n	X)   R                  S5      R                  [        5      X)'   M-     M     UR                  [         S3SS9  U$ s  sn	f )zM
Return each team that played in the league in the Following seasons season 
id_strength_h_strength_a_z
/teams.csv)namecodeidstrength_overall_homestrength_overall_away)r   r   r   columnsr   r   r   outer)onhow	strength_Findex)pd	DataFramerangelenread_csvrenamecopymerger   
startswithfillnaastypeintto_csvPROCESSED_DATA_DIR)r   r	   teamsiteam_id_season_featureteam_str_h_featureteam_str_a_featureteams_seasons_dfcurrent_season_datacolid_colss              =g:\Projects\Python Projects\fpl_model\scripts\prepare_data.py	get_teamsr4      sO    LLNE3|$%#&|&7!8*<?*;<*<?*;<;;,/):*'EF./uv}}  Nd  j|  G}}  ~6',,.E HHU!' 0!(*E
 #(--i-3>>%3H3HS^^\gMh3-iC**1-44S9EJ ' &, 
LL&'z2%L@L js   1/D:$D:c                    [         R                  " 5       n[        [        U 5      5       H}  n[         R                  " 5       n[        SS5       H8  n [         R                  " X    SU S35      n[         R
                  " XF/SS9nM:     X   US'   [         R
                  " X$/SS9nM     UR                  SS	S
SSS.S9nUR                  / SQSS9nU$ !    M  = f)z4
Return players stats per gameweek for each season 
r   '   z/gws/gwz.csvTignore_indexseasonr   XAgwXGXGC)elementexpected_assistsroundexpected_goalsexpected_goals_concededr   )expected_goal_involvements
creativity	influencekickoff_timetransfers_balancetransfers_intransfers_outvaluemodifiedmng_clean_sheetsmng_drawmng_goals_scoredmng_lossmng_underdog_drawmng_underdog_winmng_winselectedstartsteam_a_scoreteam_h_scorethreatignorer   errors)r   r   r   r   r    concatr!   drop)r   r	   playersidxplayers_season_dfr;   players_gw_dfs          r3   get_playersra   .   s    
 llnGS&(LLN2,B!#0A/B'"T-R!S%'YY0A/P^b%c"  '3&7(#))W7TJ ) nnTZ^!&&(n )G || %F NV  WG Ns   4CCc                 |    SU S    3nSU S    3nSU S    3nX   nX   nX   n[         R                  " XEU// SQS9$ )Nr   r9   r   r   team
strength_h
strength_ar   r   Series)rowseason_col_nameseason_str_h_nameseason_str_a_nameteam_id
team_str_h
team_str_as          r3   get_details_by_seasonrp   G   sg    CM?+O%c(m_5%c(m_5"G'J'J99g:6=? ?    r*   r]   c           	      x   UR                  5       n[        R                  " UU SSSSS9nUR                  [        SS9U/ SQ'   US   R                  S	5      US'   [         VVs/ s H  nS
U 3SU 3SU 34  H  nUPM     M     nnnUR                  SS/U-   SS9nUR                  [         S3SS9  U$ s  snnf )Nrd   r   left _mapleft_onright_onr   suffixesr   axisrc   Int64r   r   r   r   name_maprX   rY   z/players.csvFr   )
r"   r   r#   applyrp   r&   r	   r\   r(   r)   )r*   r]   
players_dfr9   featurefeatures_to_be_droppeds         r3   map_team_to_detailsr   S   s    J	J 6@5E5EF[bc5E5dJ12#F+227;Jv &Nk&$:k&<RS S   
 //6:*>@V*V_g/hJ+,L9Gs   #"B6c                     U S   nU S   nU S   nU(       a  [         R                  " X#/SS/S9$ [         R                  " X2/SS/S9$ )Nwas_hometeam_h_difficultyteam_a_difficultyteam_gw_diffopponnent_gw_diffr   rg   )ri   is_homeh_diffa_diffs       r3   assign_gw_difficultyr   i   sh    *oG$%F%&Fyy&)*,?@B 	B yy&)*,?@B 	Brq   c           
         [         R                  " 5       nUR                  5       n[        U 5       H  u  pgX   nXUS   U:H     n	[         R                  " U S35      / SQ   n
[         R
                  " U	U
SS/SS/SS	S
9n	U	R                  [        SS9U	SS/'   U	R                  / SQS9n	[         R                  " XI/SS9nM     U$ )z
Create The DataSet to train the model
- takes each player record and add Contextual Data regarding team / opponent strenght and GW FDR for each gameweek per season
r9   z/fixtures.csv)eventr   r   r   rF   r;   fixturer   r   rs   rt   rw   r   r{   r   r   )r   id_mapr   r   r   Tr7   )
r   r   r"   	enumerater    r#   r   r   r\   r[   )r   r	   r]   r*   model_dataset_dfr   r^   pathcurrent_seasonplayer_seasonseason_fixtures_dfs              r3   add_contextual_datar   v   s    
 ||~J|,	%*"h#7>#IJ[[D6)?@  CI  J)$d^! AN@S@SThop@S@q~&9:;',,5o,pYY(8'GUYZ -" rq   c                      [        [        [        5      n [        [        [        5      nXS   S:g     n[	        X5      n[        [        [        X5      nUR                  [         S3SS9  [        S5        g)zPrepare the data for model training by extracting team and player information, mapping teams to their details, and adding contextual data.
    positionAMz/train_v1.csvFr   DoneN)	r4   r   r	   ra   r   r   r(   TRAIN_DATA_DIRprint)	all_teamsall_playersmodel_train_dfs      r3   prepare_datar      sj     ,5IlL9K*5=>K &i<K(l;YN^,M:%H	&Mrq   __main__)pathlibr   pandasr   typingr   r)   r   	directoryiterdiris_dirr   foldersr   splitr	   strr   r4   ra   rp   r   r   r   r   __name__)itemfolders   00r3   <module>r      s     *+ l#	 "+!2!2!4
F!49499!4
F29;'V#';T[]T[&LL%a(!"-.v||C/@/C.DET[]T#Y T#Y 2<< <d3i d3i BLL 2
?bll BLL R\\ ,
Bd3i d3i SUS_S_ ikiuiu z|  {G  {G 8 zN k G;]s   E'E'.E,6E1