Would you Survive?

yn = input("Would you like to install package dependencies? (y/n)")

if yn == "y":
    print("Installing dependencies")
    !pip3 install pandas
    !pip3 install numpy
    !pip3 install scikit-learn
else:
    print("Moving on")

Moving on

print("Importing packages")
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import pdb
import pickle  # Import pickle for model saving

Importing packages


/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.25.2
  warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"

print("Loading, pre-processing, and splitting data")

# Load the data
data_train = pd.read_csv('./train.csv')

# Fill missing values for Age with the mean
data_train['Age'].fillna(data_train['Age'].mean(), inplace=True)

# Encode 'Sex' column
data_train['Sex'] = data_train['Sex'].map({'male': 0, 'female': 1})

# Select features and target variable
X = data_train[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
y = data_train['Survived']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Loading, pre-processing, and splitting data

print("Training model")

# Initialize and train the logistic regression model
log_reg = LogisticRegression(solver='liblinear')
log_reg.fit(X_train, y_train)

print("Training completed")

save_path = "logistic_regression_model.pkl"
print("Saving model to: " + save_path)
# Save the model to a file using pickle
with open(save_path, 'wb') as model_file:
    pickle.dump(log_reg, model_file)

# Load the model back from the file
with open(save_path, 'rb') as model_file:
    loaded_model = pickle.load(model_file)

print("Finished saving trained model")

# pdb.set_trace()

# Make predictions on the test set using the loaded model

repeat = input("Would you like to create a character and see whether you would have survived the Titanic?")

if repeat.lower() == "y":
    userInput = [int(input("What class would your character have been when they lived? (1, 2, 3),")), 
                int(input("Is your character male (1) or female (0)? ")), 
                int(input("How old is your character?")),
                int(input("How many siblings does your character have aboard? ")),
                int(input("How many parents or children does your character have aboard? ")),
                int(input("How expensive was your characters ship ticket? "))]

    y_pred = loaded_model.predict([userInput])

    if y_pred == 1:
        print(y_pred[0])
        print("Your character would have survived!")
    else:
        print(y_pred[0])
        print("Your character would not have survived :(")

Training model
Training completed
Saving model to: logistic_regression_model.pkl
Finished saving trained model


0
Your character would not have survived :(


/home/dudeamabobby/.local/lib/python3.10/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names
  warnings.warn(