# Let's use MLFlow to deploy and Test our classic Iris dataset. We will use k-nearest neighbors algorithm (KNN)
# This is dataset is the "Hello World" in ML

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import datasets

from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

# Load Iris dataset from sklearn
iris = datasets.load_iris()

# Start MLFlow Server in Conda environment
# mlflow server --backend-store-uri file:///Users/coool/Documents/MLFlow_Git/mlflow-database


## Load dataset into a dataframe

iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target
iris_df['species'] = iris.target_names[iris.target]
iris_df.drop('target', axis=1, inplace=True)


# Check data

iris_df.head()


## EDA - Exploratory Data Analysis

sns.pairplot(iris_df, hue='species', diag_kind='kde')
plt.show()


## Train, test Split and fit the model

X = iris_df.drop(['species'], axis=1)
y = iris_df['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

knn = KNeighborsClassifier(n_neighbors = 9)
knn.fit(X, y)

KNeighborsClassifier(n_neighbors=9)

KNeighborsClassifier(n_neighbors=9)


# Hyper parameter tuning. Let's keep it simple

# experimenting with different n values
#k_range = [3, 5, 10,50]
k_range = list(range(1,10))
scores = []
for k in k_range:
    knn_hp = KNeighborsClassifier(n_neighbors=k)
    knn_hp.fit(X_train, y_train)
    y_pred = knn_hp.predict(X_test)
    scores.append(metrics.accuracy_score(y_test, y_pred))
    
plt.plot(k_range, scores)
plt.xlabel('Value of k for KNN')
plt.ylabel('Accuracy Score')
plt.title('Accuracy Scores for Values of k of k-Nearest-Neighbors')
plt.show()


## Sample data to pick for Testing

pd.merge(X_test, y_test, left_index=True, right_index=True)


# Test samples list. This is array of indices pointing towards X_test
# This list is used to test predictions throught this notebook
# We can pick any index values from the above list

test_samples=[1, 134, 84, 21, 128, 48]


# Simple test of predictions to actual values. - No MLFlow So far

results_df = pd.DataFrame()
results_df["Predictions"] = knn.predict(X_test.loc[test_samples])     ## Prediction
results_df["Actuals"] = y_test.loc[test_samples].values
results_df


import pickle
from urllib.parse import urlparse

import mlflow
from mlflow.models import infer_signature
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

print("MLFlow Tracking DB:", mlflow.get_tracking_uri())

MLFlow Tracking DB: http://localhost:5000


mlflow.set_experiment(experiment_name="Sreenivas-KNN-Iris")

<Experiment: artifact_location='mlflow-artifacts:/627570918674180859', creation_time=1694915262298, experiment_id='627570918674180859', last_update_time=1694915262298, lifecycle_stage='active', name='Sreenivas-KNN-Iris', tags={}>


with mlflow.start_run():
    mlflow.doctor()

System information: Windows 10.0.22621
Python version: 3.10.11
MLflow version: 2.7.0
MLflow module location: C:\Users\coool\anaconda3\envs\mflow27_1\lib\site-packages\mlflow\__init__.py
Tracking URI: http://localhost:5000
Registry URI: http://localhost:5000
Active experiment ID: 627570918674180859
Active run ID: 3c6ec1fe03a946098a7c3fe1fcc0b4e4
Active run artifact URI: mlflow-artifacts:/627570918674180859/3c6ec1fe03a946098a7c3fe1fcc0b4e4/artifacts
MLflow environment variables: 
  MLFLOW_TRACKING_URI: http://localhost:5000
MLflow dependencies: 
  Flask: 2.3.3
  Jinja2: 3.1.2
  alembic: 1.12.0
  click: 8.1.7
  cloudpickle: 2.2.1
  databricks-cli: 0.17.7
  docker: 6.1.3
  entrypoints: 0.4
  gitpython: 3.1.36
  importlib-metadata: 6.8.0
  markdown: 3.4.4
  matplotlib: 3.8.0
  numpy: 1.24.3
  packaging: 23.1
  pandas: 2.1.0
  protobuf: 4.24.3
  psutil: 5.9.0
  pyarrow: 13.0.0
  pytz: 2023.3.post1
  pyyaml: 6.0.1
  querystring-parser: 1.2.4
  requests: 2.31.0
  scikit-learn: 1.3.0
  scipy: 1.11.2
  sqlalchemy: 2.0.20
  sqlparse: 0.4.4
  virtualenv: 20.24.5
  waitress: 2.1.2


# Register the model in ML Flow. We are NOT building the model. We are using the model built in this notebook 

with mlflow.start_run() as knn_iris_run:
    
    score = knn.score(X, y)
    print(f"Score: {score}")
    
    mlflow.log_metric("score", score)
    predictions = knn.predict(X)
    signature = infer_signature(X_test, predictions)

    model_info = mlflow.sklearn.log_model(
                knn, "model", registered_model_name="KNNIrisModel", signature=signature)  ## Log model in MLFlow
    
    
    print(f"Model saved in run: {knn_iris_run.info.run_uuid}, run_name: {mlflow.active_run().info.run_name}")

Score: 0.98

Registered model 'KNNIrisModel' already exists. Creating a new version of this model...
2023/09/17 17:05:28 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: KNNIrisModel, version 25

Model saved in run: f846fd2633324456a73f53aef1fd6103, run_name: funny-sow-770

Created version '25' of model 'KNNIrisModel'.


## The model should be now be visible in MLFlow UI

model_info.model_uri

'runs:/f846fd2633324456a73f53aef1fd6103/model'


## Make predictions on Model using runId

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

## Predict and display
results_logged_df = pd.DataFrame()
results_logged_df["Predictions"] = loaded_model.predict(X_test.loc[test_samples])   ## Prediction
results_logged_df["Actuals"] = y_test.loc[test_samples].values
results_logged_df


## Capture the version number using model_info, so that we can move the newly created version into Staging
## Get Curr(ent) version on the model

from mlflow import MlflowClient
client = MlflowClient()

filter_string = f"run_id='{model_info.run_id}'"
results = client.search_model_versions(filter_string)
curr_version = results[0].version 

print(f"Model URI: {model_info.model_uri}, Model version: {curr_version}")

Model URI: runs:/f846fd2633324456a73f53aef1fd6103/model, Model version: 25


# Promote the model to Staging

client.transition_model_version_stage(
    name="KNNIrisModel", version=curr_version, stage="Staging"
)

<ModelVersion: aliases=[], creation_timestamp=1694988328701, current_stage='Staging', description='', last_updated_timestamp=1694988334019, name='KNNIrisModel', run_id='f846fd2633324456a73f53aef1fd6103', run_link='', source='mlflow-artifacts:/627570918674180859/f846fd2633324456a73f53aef1fd6103/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='25'>


# Test the model in Staging

staged_model = mlflow.pyfunc.load_model(model_uri=f"models:/KNNIrisModel/Staging")  ## Get Staged Model

## Predict and display
results_staging_df = pd.DataFrame()
results_staging_df["Predictions"] = staged_model.predict(X_test.loc[test_samples])   ## Prediction
results_staging_df["Actuals"] = y_test.loc[test_samples].values
results_staging_df


prod_model = mlflow.pyfunc.load_model(model_uri=f"models:/KNNIrisModel/Production")  ## Get Prod Model

## Predict and display
results_prod_df = pd.DataFrame()
results_prod_df["Predictions"] = prod_model.predict(X_test.loc[test_samples])  ## Prediction
results_prod_df["Actuals"] = y_test.loc[test_samples].values
results_prod_df


## Serve the model, run this command in conda environment. Once is model is being served, run this cell
#    mlflow models serve -m "models:/KNNIrisModel/Production" --port 5002
## The model is available on pot 5002. It looks like we will need a seperate port for each model

import requests
import json

message_body = {
    "dataframe_split" : {
        "columns" : [],
        "data": []
    }
}
message_body["dataframe_split"]["columns" ]=list(X_test)
message_body["dataframe_split"]["data" ]=X_test.loc[test_samples].values.tolist()

json_object = json.dumps(message_body)

headers = {'Content-Type': 'application/json'}
r = requests.post('http://localhost:5002/invocations',
                  headers=headers,
                  data = json_object)                                                 ## Prediction

print(f"Status code: {r.status_code},  Response: {r.text}");

Status code: 200,  Response: {"predictions": ["setosa", "virginica", "versicolor", "setosa", "virginica", "setosa"]}


## Display predictions from REST API calls

results_rest_df = pd.DataFrame()
results_rest_df["Predictions"] = pd.read_json(r.text)
results_rest_df["Actuals"] = y_test.loc[test_samples].values
results_rest_df


# Create a multi-index for columns
columns = pd.MultiIndex.from_tuples([
    ('Dev', 'Predictions'), ('Dev', 'Actuals'),
    ('Staging', 'Predictions'), ('Staging', 'Actuals'),
    ('Prod', 'Predictions'), ('Prod', 'Actuals'),
    ('REST API', 'Predictions'), ('REST API', 'Actuals')
])

results_final = pd.DataFrame(columns=columns)

results_final['Dev'] = results_logged_df
results_final['Staging'] = results_staging_df
results_final['Prod'] = results_prod_df
results_final['REST API'] = results_rest_df

print("\nPredictions vs Actuals at each stage. Note: Model score is 0.98")
print("-"*80)
results_final

Predictions vs Actuals at each stage. Note: Model score is 0.98
--------------------------------------------------------------------------------


import sys
print(sys.version)

3.10.11 | packaged by Anaconda, Inc. | (main, May 16 2023, 00:55:32) [MSC v.1916 64 bit (AMD64)]

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	species
82	5.8	2.7	3.9	1.2	versicolor
134	6.1	2.6	5.6	1.4	virginica
114	5.8	2.8	5.1	2.4	virginica
42	4.4	3.2	1.3	0.2	setosa
109	7.2	3.6	6.1	2.5	virginica
57	4.9	2.4	3.3	1.0	versicolor
1	4.9	3.0	1.4	0.2	setosa
70	5.9	3.2	4.8	1.8	versicolor
25	5.0	3.0	1.6	0.2	setosa
84	5.4	3.0	4.5	1.5	versicolor
66	5.6	3.0	4.5	1.5	versicolor
133	6.3	2.8	5.1	1.5	virginica
102	7.1	3.0	5.9	2.1	virginica
107	7.3	2.9	6.3	1.8	virginica
26	5.0	3.4	1.6	0.4	setosa
23	5.1	3.3	1.7	0.5	setosa
123	6.3	2.7	4.9	1.8	virginica
130	7.4	2.8	6.1	1.9	virginica
21	5.1	3.7	1.5	0.4	setosa
12	4.8	3.0	1.4	0.1	setosa
71	6.1	2.8	4.0	1.3	versicolor
128	6.4	2.8	5.6	2.1	virginica
48	5.3	3.7	1.5	0.2	setosa
72	6.3	2.5	4.9	1.5	versicolor
88	5.6	3.0	4.1	1.3	versicolor
148	6.2	3.4	5.4	2.3	virginica
74	6.4	2.9	4.3	1.3	versicolor
96	5.7	2.9	4.2	1.3	versicolor
63	6.1	2.9	4.7	1.4	versicolor
132	6.4	2.8	5.6	2.2	virginica

MLFlow starts from here¶

Model is tested and logged. Let's move it to Staging and Test¶

Use UI to promote the model into Production and then run the next cell¶

UI can be accessed http://localhost:5000 ¶

Finally let's use REST Service to invoke the model and get prediction¶

Finally consolidate all predictions for display¶

	Dev		Staging		Prod		REST API
	Predictions	Actuals	Predictions	Actuals	Predictions	Actuals	Predictions	Actuals
0	setosa	setosa	setosa	setosa	setosa	setosa	setosa	setosa
1	virginica	virginica	virginica	virginica	virginica	virginica	virginica	virginica
2	versicolor	versicolor	versicolor	versicolor	versicolor	versicolor	versicolor	versicolor
3	setosa	setosa	setosa	setosa	setosa	setosa	setosa	setosa
4	virginica	virginica	virginica	virginica	virginica	virginica	virginica	virginica
5	setosa	setosa	setosa	setosa	setosa	setosa	setosa	setosa

MLFlow starts from here¶

Model is tested and logged. Let's move it to Staging and Test¶

Use UI to promote the model into Production and then run the next cell¶

UI can be accessed http://localhost:5000¶

Finally let's use REST Service to invoke the model and get prediction¶

Finally consolidate all predictions for display¶

UI can be accessed http://localhost:5000 ¶