#Load the data and display the head.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('/content/2021-2022 Football Player Stats.csv')
df.head()

#Filter by the position of MF ('Midfielder')
midfielder_df = df[df['Pos'].isin(['MF'])]
#Display results
midfielder_df

#Perform the filter for the appropriate stats
midfielder_df = midfielder_df.loc[:, ['Rk', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'MP',
                                      '90s', 'Goals', 'PasTotCmp', 'PasTotAtt', 'PasTotCmp%',
                                      'PasTotDist', 'PasTotPrgDist', 'PasShoCmp', 'PasShoAtt', 'PasShoCmp%',
                                      'PasMedCmp', 'PasMedAtt', 'PasMedCmp%', 'PasLonCmp', 'PasLonAtt', 'PasLonCmp%',
                                      'Assists', 'PasAss', 'Pas3rd', 'PPA', 'CrsPA', 'PasProg', 'PasAtt',
                                      'PasLive', 'PasDead', 'PasFK', 'TB', 'Sw', 'PasCrs', 'TI', 'CK',
                                      'CkIn', 'CkOut', 'CkStr', 'PasCmp', 'SCA']]
#Display results
midfielder_df

#Here, we're taking the average of every stat in order for us to be able to see
#how each stat compares amongst the other leagues. By doing so, we'll
#be able to rank the stats based on highest to lowest in the future.
averages_by_league = midfielder_df.groupby('Comp').mean(numeric_only=True)
print(averages_by_league)

                     Rk    MP   90s  Goals  PasTotCmp  PasTotAtt  PasTotCmp%  \
Comp                                                                           
Bundesliga     1,564.01 18.15 12.94   0.07      33.29      42.28       74.89   
La Liga        1,556.12 20.81 13.96   0.07      38.26      46.73       80.02   
Ligue 1        1,416.02 20.05 14.20   0.06      44.04      52.51       82.92   
Premier League 1,379.84 19.64 15.36   0.10      38.40      46.21       80.86   
Serie A        1,467.47 19.54 13.28   0.07      39.85      48.02       81.12   

                PasTotDist  PasTotPrgDist  PasShoCmp  ...   TB   Sw  PasCrs  \
Comp                                                  ...                     
Bundesliga          599.35         163.18      15.34  ... 0.07 1.23    0.65   
La Liga             725.50         185.33      16.38  ... 0.15 1.86    0.85   
Ligue 1             821.23         194.16      19.09  ... 0.09 1.75    0.76   
Premier League      707.00         171.67      17.10  ... 0.11 1.45    0.76   
Serie A             738.26         189.97      16.89  ... 0.07 1.46    0.79   

                 TI   CK  CkIn  CkOut  CkStr  PasCmp  SCA  
Comp                                                       
Bundesliga     0.22 0.49  0.24   0.18   0.01   33.29 2.27  
La Liga        0.22 0.65  0.23   0.25   0.05   38.26 2.09  
Ligue 1        0.26 0.59  0.24   0.22   0.03   44.04 2.20  
Premier League 0.29 0.57  0.28   0.18   0.03   38.40 2.00  
Serie A        0.23 0.60  0.24   0.25   0.02   39.85 2.01  

[5 rows x 38 columns]

# Loading the data from the CSV file into a DataFrame
Price_df = pd.read_csv('/content/combined_file.csv')

# Selecting the relevant columns from the DataFrame
filtered_price_df = Price_df[['name', 'league', 'nationality', 'short_pos', 'market_value']]

#Filtering the data to only include rows where 'short_pos' is one of 'CM', 'DM', or 'AM'
#This narrows down the data to players in central (CM), defensive (DM), and attacking (AM) midfield positions
filtered_price_df = filtered_price_df[filtered_price_df['short_pos'].isin(['CM','DM','AM'])]

filtered_price_df = filtered_price_df.drop_duplicates(subset='name', keep='first')

# Displaying the first few rows of the filtered data
filtered_price_df.head()

# Grouping the filtered data by 'league' and calculate the average 'market_value' for each league
mean_market_values = filtered_price_df.groupby('league')['market_value'].mean().reset_index()

# Renaming columns for clarity: 'league' to 'League' and the mean 'market_value' to 'Average Market Value'
mean_market_values.columns = ['League', 'Average Market Value']

# Setting the display format for floats to show two decimal places with commas for thousands
pd.options.display.float_format = '{:,.2f}'.format

# Sorting the mean market values in ascending order and reset the index for a tidy display
mean_market_values = mean_market_values.sort_values(by='Average Market Value', ascending=True).reset_index(drop=True)

# Cleaning up some league names for consistency and readability
# Replacing '1 Bundesliga' with 'Bundesliga' and 'Laliga' with 'La Liga'
mean_market_values['League'] = mean_market_values['League'].replace({
    '1 Bundesliga': 'Bundesliga',
    'Laliga': 'La Liga'
})
# Showing the final DataFrame
mean_market_values

#Here, we're actually ranking the data on each stat
ranked_leagues = averages_by_league.rank(ascending=False)
print(ranked_leagues)
#Set the columns for the new table and sort it by the overall rank
ranked_leagues['Composite Score'] = ranked_leagues.mean(axis = 1)
ranked_leagues['Overall Rank'] = ranked_leagues['Composite Score'].rank(ascending=True)
ranked_leagues_sorted = ranked_leagues.sort_values(by='Overall Rank')
print(ranked_leagues_sorted[['Composite Score', 'Overall Rank']])

                 Rk   MP  90s  Goals  PasTotCmp  PasTotAtt  PasTotCmp%  \
Comp                                                                     
Bundesliga     1.00 5.00 5.00   3.00       5.00       5.00        5.00   
La Liga        2.00 1.00 3.00   2.00       4.00       3.00        4.00   
Ligue 1        4.00 2.00 2.00   5.00       1.00       1.00        1.00   
Premier League 5.00 3.00 1.00   1.00       3.00       4.00        3.00   
Serie A        3.00 4.00 4.00   4.00       2.00       2.00        2.00   

                PasTotDist  PasTotPrgDist  PasShoCmp  ...   TB   Sw  PasCrs  \
Comp                                                  ...                     
Bundesliga            5.00           5.00       5.00  ... 5.00 5.00    5.00   
La Liga               3.00           3.00       4.00  ... 1.00 1.00    1.00   
Ligue 1               1.00           1.00       1.00  ... 3.00 2.00    3.00   
Premier League        4.00           4.00       2.00  ... 2.00 4.00    4.00   
Serie A               2.00           2.00       3.00  ... 4.00 3.00    2.00   

                 TI   CK  CkIn  CkOut  CkStr  PasCmp  SCA  
Comp                                                       
Bundesliga     5.00 5.00  3.00   5.00   5.00    5.00 1.00  
La Liga        4.00 1.00  5.00   2.00   1.00    4.00 3.00  
Ligue 1        2.00 3.00  4.00   3.00   2.00    1.00 2.00  
Premier League 1.00 4.00  1.00   4.00   3.00    3.00 5.00  
Serie A        3.00 2.00  2.00   1.00   4.00    2.00 4.00  

[5 rows x 38 columns]
                Composite Score  Overall Rank
Comp                                         
Ligue 1                    1.97          1.00
Serie A                    2.66          2.00
La Liga                    2.74          3.00
Premier League             3.26          4.00
Bundesliga                 4.37          5.00

#Selected stats for the radar chart
graphed_stats = ['90s', 'PasCrs', 'PasTotCmp%', 'PasMedCmp%', 'PasLonCmp%', 'Assists']

#Get the Premier League's rank for these stats
premier_league_ranks = ranked_leagues.loc['Premier League', graphed_stats]

#Normalize ranks: 1 (best) corresponds to 5 bars, 5 (worst) corresponds to 1 bar
#To achieve this, subtract rank from the maximum rank and then normalize
num_leagues = len(ranked_leagues)
normalized_ranks = (num_leagues + 1 - premier_league_ranks)  # Inverse the ranks

#Number of variables we're plotting (should match the number of stats)
num_vars = len(graphed_stats)

#Compute angle for each stat in the radar chart
angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()

#The radar chart requires the plot to be a closed shape
angles += angles[:1]  # Close the circle

#Normalize ranks for radar chart and close the shape by repeating the first value
normalized_ranks = normalized_ranks.tolist()
normalized_ranks += normalized_ranks[:1]  # Close the circle

#Create the radar chart
fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))

#Plot data
ax.fill(angles, normalized_ranks, color='blue', alpha=0.25)
ax.plot(angles, normalized_ranks, color='blue', linewidth=2)

#Set the labels for each angle on the radar chart
ax.set_xticks(angles[:-1])  # Match the number of labels with the stats
ax.set_xticklabels(graphed_stats)

#Set the title of the radar chart
ax.set_title('Premier League Ranking in Midfielder Stats', size=16, color='blue', y=1.1)

#Set the range for the radial axis (1 to 5)
ax.set_ylim(1, num_leagues)  # Adjust the range according to number of leagues

#Reverse the radial axis so that rank 1 is on the outer edge and rank 5 is in the center
ax.set_yticks(range(1, num_leagues + 1))
ax.set_yticklabels([str(i) for i in range(num_leagues, 0, -1)])  # Labels should be in reverse order
ax.set_ylim(1, num_leagues)

#Show the plot
plt.show()

import seaborn as sns
import matplotlib.pyplot as plt

#Merging two DataFrames: `mean_market_values` and `ranked_leagues_sorted`
#Matching on 'League' from `mean_market_values` with the index of `ranked_leagues_sorted`
comparison_df = mean_market_values.merge(ranked_leagues_sorted, left_on='League', right_index=True)

#Setting up the figure size for the plot to ensure readability
plt.figure(figsize=(10, 6))

#Creating a bar plot of 'Average Market Value' for each league in `comparison_df`
#This bar plot uses a light blue color to represent average market values
ax1 = sns.barplot(x='League', y='Average Market Value', data=comparison_df, color='skyblue')
ax1.set_ylabel('Average Market Value (in Millions)')

#Using `plt.twinx()` to add a second y-axis for the next plot, sharing the x-axis with the bar plot
plt.twinx()

#Creating a point plot to represent the 'Composite Score' for each league
#Adjusting the marker size with Matplotlib's `markersize` parameter
sns.pointplot(
    x='League',
    y='Composite Score',
    data=comparison_df,
    color='red',
    label='Composite Score Averaged',
    markers='o',
    linestyles='-',
    linewidth=1.5
)

#Adding a title for the plot for clarity
plt.title('Comparison of League Composite Scores and Average Market Values')

#Labeling the y-axis on the right to represent 'Composite Score'
plt.ylabel('Composite Score (Red Points)')

#Labeling the x-axis to show 'League'
plt.xlabel('League')

#Adding a legend in the upper left to identify each plot component
plt.legend(loc='upper left')

#Display the combined bar and point plot
plt.show()

#Setting up the figure size for the plot, making it wider and taller for readability
plt.figure(figsize=(10, 6))

#Creating a box plot to show the distribution of 'market_value' across different 'league' categories
#Assigning `league` to `hue` to explicitly use the palette
sns.boxplot(data=filtered_price_df, x='league', y='market_value', hue='league', palette='Set2', dodge=False)

#Setting a title for the plot to clarify its purpose
plt.title('Distribution of Market Values for Midfielders by League')

#Labeling the x-axis to indicate that it represents different leagues
plt.xlabel('League')

#Labeling the y-axis to represent market value
plt.ylabel('Market Value (in Millions)')

#Rotating the x-axis labels by 45 degrees to make them more readable if they are long
plt.xticks(rotation=45)

#Removing the redundant legend
plt.legend([],[], frameon=False)

#Displaying the plot
plt.show()

#Calculating average market value by nationality and sort by descending order
avg_market_value_by_nationality = filtered_price_df.groupby('nationality')['market_value'].mean().nlargest(10).sort_values(ascending=False)
#Plotting the average market value of top nationalities
avg_market_value_by_nationality.plot(kind='bar', color='skyblue')
plt.title("Top 10 Nationalities by Average Market Value")
plt.xlabel("Nationality")
plt.ylabel("Average Market Value In Euros (In 10s of Millions)")
plt.show()

import matplotlib.pyplot as plt
import seaborn as sns

#Plotting the values
fig, ax1 = plt.subplots(figsize=(10, 6))

#Bar plot for PasTotCmp%
sns.barplot(x='League', y='PasTotCmp%', data=comparison_df, color='skyblue', ax=ax1)
ax1.set_ylabel('PasTotCmp%')

#Overlaying a line plot for Average Market Value on a second y-axis
ax2 = ax1.twinx()
sns.pointplot(
    x='League',
    y='Average Market Value',
    data=comparison_df,
    color='red',
    markers='o',  # Marker style
    ax=ax2
)

#Customizing marker size and line width using Matplotlib's Line2D parameters
for line in ax2.lines:
    line.set_markersize(8)  # Set marker size
    line.set_linewidth(1.5)  # Set line width

# Customizing cap size for error bars if they exist
for cap in ax2.collections:
    cap.set_sizes([40])  # Adjust the size of the error bar caps

ax2.set_ylabel('Average Market Value (in millions)')

# Adding title and showing plot
plt.title("Comparison of PasTotCmp% and Average Market Value by League")
plt.show()

#Filter the columns from
Price_df = Price_df.rename(columns={'name': 'Player'})
df_filtered = df[
    ['Rk', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'MP',
     '90s', 'Goals', 'PasTotCmp', 'PasTotAtt', 'PasTotCmp%',
     'PasTotDist', 'PasTotPrgDist', 'PasShoCmp', 'PasShoAtt', 'PasShoCmp%',
     'PasMedCmp', 'PasMedAtt', 'PasMedCmp%', 'PasLonCmp', 'PasLonAtt', 'PasLonCmp%',
     'Assists', 'PasAss', 'Pas3rd', 'PPA', 'CrsPA', 'PasProg', 'PasAtt',
     'PasLive', 'PasDead', 'PasFK', 'TB', 'Sw', 'PasCrs', 'TI', 'CK',
     'CkIn', 'CkOut', 'CkStr', 'PasCmp', 'SCA']
]

#Add the market_value column from Price_df
Price_df_filtered = Price_df[['Player', 'market_value']]

#Merge the two dataframes on 'Player'
final_df = pd.merge(df_filtered, Price_df_filtered, on='Player', how='inner')

#Display the first few rows of the resulting dataframe
print(final_df.head())

   Rk         Player Nation   Pos          Squad        Comp  MP   90s  Goals  \
0   5    Charles Abi    FRA    FW  Saint-Étienne     Ligue 1   1  0.50   0.00   
1   8  Tammy Abraham    ENG    FW           Roma     Serie A  37 34.30   0.50   
2   8  Tammy Abraham    ENG    FW           Roma     Serie A  37 34.30   0.50   
3   9     Luis Abram    PER    DF        Granada     La Liga   8  6.20   0.00   
4  18     Amine Adli    FRA  FWMF     Leverkusen  Bundesliga  25 14.00   0.21   

   PasTotCmp  ...   Sw  PasCrs   TI   CK  CkIn  CkOut  CkStr  PasCmp  SCA  \
0       4.00  ... 0.00    2.00 0.00 0.00  0.00   0.00   0.00    4.00 0.00   
1      14.60  ... 0.32    0.70 0.03 0.00  0.00   0.00   0.00   14.60 2.33   
2      14.60  ... 0.32    0.70 0.03 0.00  0.00   0.00   0.00   14.60 2.33   
3      31.30  ... 1.13    0.00 0.32 0.00  0.00   0.00   0.00   31.30 0.48   
4      28.00  ... 0.36    1.86 0.71 0.14  0.00   0.00   0.00   28.00 3.36   

   market_value  
0  2,400,000.00  
1 38,000,000.00  
2 38,000,000.00  
3  7,000,000.00  
4  9,000,000.00  

[5 rows x 44 columns]

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

#Features and target variable (Assists as target)
features = [
    'PasTotCmp', 'PasTotAtt', 'PasShoCmp', 'PasShoAtt', 'PasAss',   'PasTotCmp%', 'PasMedCmp%', 'PasShoCmp%', 'PasTotPrgDist', 'PasProg', 'PPA', 'PasCrs', 'CrsPA', 'MP', 'Comp' # Keep 'Comp' for League
]
target = 'Assists'

#Drop rows with missing data in selected features or target
final_df = final_df.dropna(subset=features + [target])

#Separate input features and target
X = final_df[features]
y = final_df[target]

#Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Standardize numerical features (don't scale 'Comp' as it's categorical)
scaler = StandardScaler()
X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()

#Apply scaling to the numerical columns only (exclude 'Comp')
X_train_scaled[features[:-1]] = scaler.fit_transform(X_train[features[:-1]])  # Exclude 'Comp'
X_test_scaled[features[:-1]] = scaler.transform(X_test[features[:-1]])  # Exclude 'Comp'

#Build the regression model
model = LinearRegression()
model.fit(X_train_scaled[features[:-1]], y_train)

#Predict assists
y_train_pred = model.predict(X_train_scaled[features[:-1]])
y_test_pred = model.predict(X_test_scaled[features[:-1]])


#Add predicted values to the test set for comparison, along with the League ('Comp')
test_results = pd.DataFrame({
    'Actual': y_test,
    'Predicted': y_test_pred,
    'League': X_test['Comp']
})

#Group by league and calculate the average actual and predicted assists
league_avg = test_results.groupby('League')[['Actual', 'Predicted']].mean()

#Plotting the results for each league
plt.figure(figsize=(12, 6))

#Bar chart for actual vs predicted assists by league
bar_width = 0.35
index = np.arange(len(league_avg))

plt.bar(index, league_avg['Actual'], bar_width, label='Actual', color='blue')
plt.bar(index + bar_width, league_avg['Predicted'], bar_width, label='Predicted', color='orange')

plt.xlabel('League')
plt.ylabel('Average Number of Assists')
plt.title('Average Predicted vs Actual Assists by League')
plt.xticks(index + bar_width / 2, league_avg.index, rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

#Set of the leagues you're interested in
leagues = ['Premier League', 'La Liga', 'Serie A', 'Bundesliga', 'Ligue 1']

#Store average actual and predicted values for each league
average_actual_values = []
average_predicted_values = []

#Create a figure for the bar chart
plt.figure(figsize=(12, 6))

#Map league names to their pre-calculated average market values
league_average_market_values = dict(zip(mean_market_values['League'], mean_market_values['Average Market Value']))

#Initialize the scaler
scaler = StandardScaler()

#Iterate through each league and perform the analysis
for i, league in enumerate(leagues):
    #Filter data for the current league and midfielders
    league_df = final_df[(final_df['Comp'] == league) & (final_df['Pos'].str.contains('M'))]

    #Set Player column as the index for reference
    league_df = league_df.set_index('Player')

    #Select features and target variable
    features = ['Assists', 'PasTotCmp', 'PasTotPrgDist', 'PasLonCmp', 'PPA', 'SCA', 'CrsPA']
    target = 'market_value'

    #Drop rows with missing data
    league_df = league_df.dropna(subset=features + [target])

    #Separate input features (X) and target variable (y)
    X = league_df[features]
    y = league_df[target]

    #Standardize the features
    X_scaled = scaler.fit_transform(X)

    #Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    #Train the regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    #Predict market values
    y_pred = model.predict(X_test)

    #Calculate the average predicted market value
    avg_predicted = np.mean(y_pred)

    #Use the pre-calculated average market value for this league as the "actual" value
    avg_actual = league_average_market_values[league]

    #Store the average values for each league
    average_actual_values.append(avg_actual)
    average_predicted_values.append(avg_predicted)

    #Print model evaluation for the league
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"{league} - Mean Absolute Error: {mae:.2f}, R² Score: {r2:.2f}")

#Bar chart comparing average actual and predicted market values for all leagues
index = np.arange(len(leagues))
bar_width = 0.35

plt.bar(index, average_actual_values, bar_width, label='Average Actual', color='blue')
plt.bar(index + bar_width, average_predicted_values, bar_width, label='Average Predicted', color='orange')

plt.xlabel('Leagues')
plt.ylabel('Market Value')
plt.title('Average Actual vs Predicted Market Values for Midfielders')
plt.xticks(index + bar_width / 2, leagues)
plt.legend()
plt.tight_layout()
plt.show()

#Calculate percentage error for each league
for league in leagues:
    league_df = final_df[(final_df['Comp'] == league) & (final_df['Pos'].str.contains('M'))]
    league_df = league_df.dropna(subset=features + [target])

    X = league_df[features]
    y = league_df[target]

    #Standardize the features
    X_scaled = scaler.fit_transform(X)

    #Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    #Train the regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    #Predict market values
    y_pred = model.predict(X_test)

    #Evaluate the model
    mae = mean_absolute_error(y_test, y_pred)
    average_market_value = league_average_market_values[league]
    percentage_error = (mae / average_market_value) * 100
    print(f"{league} - Percentage Error: {percentage_error:.2f}%")

Premier League - Mean Absolute Error: 10177437.19, R² Score: 0.51
La Liga - Mean Absolute Error: 3777996.06, R² Score: 0.27
Serie A - Mean Absolute Error: 5468569.43, R² Score: 0.19
Bundesliga - Mean Absolute Error: 4639366.79, R² Score: -0.39
Ligue 1 - Mean Absolute Error: 8697361.18, R² Score: -0.21

Premier League - Percentage Error: 149.40%
La Liga - Percentage Error: 47.56%
Serie A - Percentage Error: 150.80%
Bundesliga - Percentage Error: 125.84%
Ligue 1 - Percentage Error: 255.21%

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

!ls /content/drive/My\ Drive/Colab\ Notebooks/

'Copy of Demo-01.ipynb'  'Copy of Lab08.ipynb'			 ProjectMilestone1
'Copy of Demo-02.ipynb'  'Copy of Lab09.ipynb'			 ProjectMilestone1Submissio.html
'Copy of Demo-05.ipynb'  'Copy of Lab10.ipynb'			 ProjectMilestone1Submission
'Copy of Demo-08.ipynb'  'Copy of Lab11.ipynb'			 ProjectMilestone2
'Copy of Demo-10.ipynb'  'Copy of Lab12.ipynb'			 Projectmilestone.html
'Copy of Lab01.ipynb'	  CopyofProjectMilestone1		 ProjectMilestone.html
'Copy of Lab02.ipynb'	  CopyofProjectMilestone.html		 Untitled
'Copy of Lab03.ipynb'	 'Copy of Untitled0.ipynb'		 Untitled0.ipynb
'Copy of Lab04.ipynb'	 'Final Copy of Project Milestone 1'	'Untitled (1)'
'Copy of Lab05.ipynb'	 'Final Milestone Draft'		'Untitled (2)'
'Copy of Lab06.ipynb'	 'Joshua Allison Data Science Project'
'Copy of Lab07.ipynb'	  Projectmilestone1

%%shell
jupyter nbconvert --to html /content/drive/My\ Drive/Colab\ Notebooks/FinalMilestoneDraft

[NbConvertApp] WARNING | pattern '/content/drive/My Drive/Colab Notebooks/Final' matched no files
[NbConvertApp] WARNING | pattern 'Milestone' matched no files
[NbConvertApp] WARNING | pattern 'Draft' matched no files
This application is used to convert notebook files (*.ipynb)
        to various other formats.

        WARNING: THE COMMANDLINE INTERFACE MAY CHANGE IN FUTURE RELEASES.

Options
=======
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
    <cmd> --help-all

--debug
    set log level to logging.DEBUG (maximize logging output)
    Equivalent to: [--Application.log_level=10]
--show-config
    Show the application's configuration (human-readable format)
    Equivalent to: [--Application.show_config=True]
--show-config-json
    Show the application's configuration (json format)
    Equivalent to: [--Application.show_config_json=True]
--generate-config
    generate default config file
    Equivalent to: [--JupyterApp.generate_config=True]
-y
    Answer yes to any questions instead of prompting.
    Equivalent to: [--JupyterApp.answer_yes=True]
--execute
    Execute the notebook prior to export.
    Equivalent to: [--ExecutePreprocessor.enabled=True]
--allow-errors
    Continue notebook execution even if one of the cells throws an error and include the error message in the cell output (the default behaviour is to abort conversion). This flag is only relevant if '--execute' was specified, too.
    Equivalent to: [--ExecutePreprocessor.allow_errors=True]
--stdin
    read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'
    Equivalent to: [--NbConvertApp.from_stdin=True]
--stdout
    Write notebook output to stdout instead of files.
    Equivalent to: [--NbConvertApp.writer_class=StdoutWriter]
--inplace
    Run nbconvert in place, overwriting the existing notebook (only
            relevant when converting to notebook format)
    Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory=]
--clear-output
    Clear output of current file and save in place,
            overwriting the existing notebook.
    Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory= --ClearOutputPreprocessor.enabled=True]
--coalesce-streams
    Coalesce consecutive stdout and stderr outputs into one stream (within each cell).
    Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory= --CoalesceStreamsPreprocessor.enabled=True]
--no-prompt
    Exclude input and output prompts from converted document.
    Equivalent to: [--TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True]
--no-input
    Exclude input cells and output prompts from converted document.
            This mode is ideal for generating code-free reports.
    Equivalent to: [--TemplateExporter.exclude_output_prompt=True --TemplateExporter.exclude_input=True --TemplateExporter.exclude_input_prompt=True]
--allow-chromium-download
    Whether to allow downloading chromium if no suitable version is found on the system.
    Equivalent to: [--WebPDFExporter.allow_chromium_download=True]
--disable-chromium-sandbox
    Disable chromium security sandbox when converting to PDF..
    Equivalent to: [--WebPDFExporter.disable_sandbox=True]
--show-input
    Shows code input. This flag is only useful for dejavu users.
    Equivalent to: [--TemplateExporter.exclude_input=False]
--embed-images
    Embed the images as base64 dataurls in the output. This flag is only useful for the HTML/WebPDF/Slides exports.
    Equivalent to: [--HTMLExporter.embed_images=True]
--sanitize-html
    Whether the HTML in Markdown cells and cell outputs should be sanitized..
    Equivalent to: [--HTMLExporter.sanitize_html=True]
--log-level=<Enum>
    Set the log level by value or name.
    Choices: any of [0, 10, 20, 30, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL']
    Default: 30
    Equivalent to: [--Application.log_level]
--config=<Unicode>
    Full path of a config file.
    Default: ''
    Equivalent to: [--JupyterApp.config_file]
--to=<Unicode>
    The export format to be used, either one of the built-in formats
            ['asciidoc', 'custom', 'html', 'latex', 'markdown', 'notebook', 'pdf', 'python', 'qtpdf', 'qtpng', 'rst', 'script', 'slides', 'webpdf']
            or a dotted object name that represents the import path for an
            ``Exporter`` class
    Default: ''
    Equivalent to: [--NbConvertApp.export_format]
--template=<Unicode>
    Name of the template to use
    Default: ''
    Equivalent to: [--TemplateExporter.template_name]
--template-file=<Unicode>
    Name of the template file to use
    Default: None
    Equivalent to: [--TemplateExporter.template_file]
--theme=<Unicode>
    Template specific theme(e.g. the name of a JupyterLab CSS theme distributed
    as prebuilt extension for the lab template)
    Default: 'light'
    Equivalent to: [--HTMLExporter.theme]
--sanitize_html=<Bool>
    Whether the HTML in Markdown cells and cell outputs should be sanitized.This
    should be set to True by nbviewer or similar tools.
    Default: False
    Equivalent to: [--HTMLExporter.sanitize_html]
--writer=<DottedObjectName>
    Writer class used to write the
                                        results of the conversion
    Default: 'FilesWriter'
    Equivalent to: [--NbConvertApp.writer_class]
--post=<DottedOrNone>
    PostProcessor class used to write the
                                        results of the conversion
    Default: ''
    Equivalent to: [--NbConvertApp.postprocessor_class]
--output=<Unicode>
    Overwrite base name use for output files.
                Supports pattern replacements '{notebook_name}'.
    Default: '{notebook_name}'
    Equivalent to: [--NbConvertApp.output_base]
--output-dir=<Unicode>
    Directory to write output(s) to. Defaults
                                  to output to the directory of each notebook. To recover
                                  previous default behaviour (outputting to the current
                                  working directory) use . as the flag value.
    Default: ''
    Equivalent to: [--FilesWriter.build_directory]
--reveal-prefix=<Unicode>
    The URL prefix for reveal.js (version 3.x).
            This defaults to the reveal CDN, but can be any url pointing to a copy
            of reveal.js.
            For speaker notes to work, this must be a relative path to a local
            copy of reveal.js: e.g., "reveal.js".
            If a relative path is given, it must be a subdirectory of the
            current directory (from which the server is run).
            See the usage documentation
            (https://nbconvert.readthedocs.io/en/latest/usage.html#reveal-js-html-slideshow)
            for more details.
    Default: ''
    Equivalent to: [--SlidesExporter.reveal_url_prefix]
--nbformat=<Enum>
    The nbformat version to write.
            Use this to downgrade notebooks.
    Choices: any of [1, 2, 3, 4]
    Default: 4
    Equivalent to: [--NotebookExporter.nbformat_version]

Examples
--------

    The simplest way to use nbconvert is

            > jupyter nbconvert mynotebook.ipynb --to html

            Options include ['asciidoc', 'custom', 'html', 'latex', 'markdown', 'notebook', 'pdf', 'python', 'qtpdf', 'qtpng', 'rst', 'script', 'slides', 'webpdf'].

            > jupyter nbconvert --to latex mynotebook.ipynb

            Both HTML and LaTeX support multiple output templates. LaTeX includes
            'base', 'article' and 'report'.  HTML includes 'basic', 'lab' and
            'classic'. You can specify the flavor of the format used.

            > jupyter nbconvert --to html --template lab mynotebook.ipynb

            You can also pipe the output to stdout, rather than a file

            > jupyter nbconvert mynotebook.ipynb --stdout

            PDF is generated via latex

            > jupyter nbconvert mynotebook.ipynb --to pdf

            You can get (and serve) a Reveal.js-powered slideshow

            > jupyter nbconvert myslides.ipynb --to slides --post serve

            Multiple notebooks can be given at the command line in a couple of
            different ways:

            > jupyter nbconvert notebook*.ipynb
            > jupyter nbconvert notebook1.ipynb notebook2.ipynb

            or you can specify the notebooks list in a config file, containing::

                c.NbConvertApp.notebooks = ["my_notebook.ipynb"]

            > jupyter nbconvert --config mycfg.py

To see all available configurables, use `--help-all`.

---------------------------------------------------------------------------
CalledProcessError                        Traceback (most recent call last)
<ipython-input-5-05722dd1f946> in <cell line: 1>()
----> 1 get_ipython().run_cell_magic('shell', '', 'jupyter nbconvert --to html /content/drive/My\\ Drive/Colab\\ Notebooks/Final Milestone Draft\n')

/usr/local/lib/python3.10/dist-packages/google/colab/_shell.py in run_cell_magic(self, magic_name, line, cell)
    332     if line and not cell:
    333       cell = ' '
--> 334     return super().run_cell_magic(magic_name, line, cell)
    335 
    336 

/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
   2471             with self.builtin_trap:
   2472                 args = (magic_arg_s, cell)
-> 2473                 result = fn(*args, **kwargs)
   2474             return result
   2475 

/usr/local/lib/python3.10/dist-packages/google/colab/_system_commands.py in _shell_cell_magic(args, cmd)
    110   result = _run_command(cmd, clear_streamed_output=False)
    111   if not parsed_args.ignore_errors:
--> 112     result.check_returncode()
    113   return result
    114 

/usr/local/lib/python3.10/dist-packages/google/colab/_system_commands.py in check_returncode(self)
    135   def check_returncode(self):
    136     if self.returncode:
--> 137       raise subprocess.CalledProcessError(
    138           returncode=self.returncode, cmd=self.args, output=self.output
    139       )

CalledProcessError: Command 'jupyter nbconvert --to html /content/drive/My\ Drive/Colab\ Notebooks/Final Milestone Draft
' returned non-zero exit status 255.

Comparing Stats for Soccer Players Among Top 5 Leagues¶

Introduction¶

Motivation¶

Data Sources¶

ETL (Extraction, Transform, and Load)¶

First Dataset¶

Load the Dataset¶

Filter By Position¶

Filter By Stats¶

Filter by League and Take Average¶

Second Datset¶

Mean Market Value Per League¶

EDA (Exploratory Data Analysis)¶

Ranked Average¶

Graphing Stats from the Premier League¶

Average Market Value vs Composite Scores Graph¶

Distribution of Market Values for Midfielders by League¶

Top 10 Nationalities by Average Market Value¶

Comparison of PasTotCmp% and Average Market Value by League¶

Model Development¶

Unified Dataset¶

Predicted Assists¶

Predicted Market Values¶

Model Purposes Reiterated¶

Model 1: Predicting Assists from Performance Metrics¶

Model 2: Predicting Market Value from Performance Metrics¶

Conclusion¶

	Rk	Player	Nation	Pos	Squad	Comp	Age	Born	MP	Starts	...	Off	Crs	TklW	PKcon	OG	Recov	AerWon	AerLost	AerWon%
0	1	Max Aarons	ENG	DF	Norwich City	Premier League	22.00	2000	34	32	...	0.03	1.41	1.16	0.06	0.03	5.53	0.47	1.59	22.70
1	2	Yunis Abdelhamid	MAR	DF	Reims	Ligue 1	34.00	1987	34	34	...	0.00	0.06	1.39	0.03	0.00	6.77	2.02	1.36	59.80
2	3	Salis Abdul Samed	GHA	MF	Clermont Foot	Ligue 1	22.00	2000	31	29	...	0.00	0.36	1.24	0.00	0.00	8.76	0.88	0.88	50.00
3	4	Laurent Abergel	FRA	MF	Lorient	Ligue 1	29.00	1993	34	34	...	0.03	0.79	2.23	0.00	0.00	8.87	0.43	0.43	50.00
4	5	Charles Abi	FRA	FW	Saint-Étienne	Ligue 1	22.00	2000	1	1	...	0.00	2.00	0.00	0.00	0.00	4.00	2.00	0.00	100.00

	League	Average Market Value
0	Ligue 1	3,407,936.51
1	Serie A	3,626,284.72
2	Bundesliga	3,686,842.11
3	Premier League	6,812,295.08
4	La Liga	7,944,444.44