{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"PS-Imbalanced-learning.ipynb","provenance":[],"authorship_tag":"ABX9TyP519V+EEAWyn0Bsi+yqm94"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"B3u1UXj6Cq-s"},"outputs":[],"source":["!pip install river"]},{"cell_type":"code","source":["import collections\n","from river import datasets\n","from river import linear_model\n","from river import metrics\n","from river import evaluate\n","from river import preprocessing\n","from river import optim\n","from river import imblearn"],"metadata":{"id":"I_2I_U_ODIkQ","executionInfo":{"status":"ok","timestamp":1653230432016,"user_tz":-60,"elapsed":243,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}}},"execution_count":42,"outputs":[]},{"cell_type":"code","source":["X_y = datasets.CreditCard()\n","\n","counts = collections.Counter(y for _, y in X_y)\n","\n","for c, count in counts.items():\n","    print(f'{c}: {count} ({count / sum(counts.values()):.5%})')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"frYl54-wC2rT","executionInfo":{"status":"ok","timestamp":1653230439299,"user_tz":-60,"elapsed":7010,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"bcf1ec42-85d1-49c6-94e2-cc73175e0dd6"},"execution_count":43,"outputs":[{"output_type":"stream","name":"stdout","text":["0: 284315 (99.82725%)\n","1: 492 (0.17275%)\n"]}]},{"cell_type":"code","source":["X_y"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"sMgeGbHyI0AU","executionInfo":{"status":"ok","timestamp":1653230439299,"user_tz":-60,"elapsed":11,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"a16594c1-a806-4d30-ad62-03cc7d4bbb19"},"execution_count":44,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Credit card frauds.\n","\n","The datasets contains transactions made by credit cards in September 2013 by european\n","cardholders. This dataset presents transactions that occurred in two days, where we have 492\n","frauds out of 284,807 transactions. The dataset is highly unbalanced, the positive class\n","(frauds) account for 0.172% of all transactions.\n","\n","It contains only numerical input variables which are the result of a PCA transformation.\n","Unfortunately, due to confidentiality issues, we cannot provide the original features and more\n","background information about the data. Features V1, V2, ... V28 are the principal components\n","obtained with PCA, the only features which have not been transformed with PCA are 'Time' and\n","'Amount'. Feature 'Time' contains the seconds elapsed between each transaction and the first\n","transaction in the dataset. The feature 'Amount' is the transaction Amount, this feature can be\n","used for example-dependant cost-senstive learning. Feature 'Class' is the response variable and\n","it takes value 1 in case of fraud and 0 otherwise.\n","\n","      Name  CreditCard                                                     \n","      Task  Binary classification                                          \n","   Samples  284,807                                                        \n","  Features  30                                                             \n","    Sparse  False                                                          \n","      Path  /root/river_data/CreditCard/creditcard.csv                     \n","       URL  https://maxhalford.github.io/files/datasets/creditcardfraud.zip\n","      Size  143.84 MB                                                      \n","Downloaded  True                                                           "]},"metadata":{},"execution_count":44}]},{"cell_type":"markdown","source":["## Baseline model"],"metadata":{"id":"xb4rm5i0H0Q_"}},{"cell_type":"code","source":["model = (\n","    preprocessing.StandardScaler() |\n","    linear_model.LogisticRegression()\n",")\n","\n","#metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC()))\n","metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC(),metrics.Precision(),metrics.Recall()))"],"metadata":{"id":"9VR8v4VkC6CQ","executionInfo":{"status":"ok","timestamp":1653230439300,"user_tz":-60,"elapsed":9,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}}},"execution_count":45,"outputs":[]},{"cell_type":"code","source":["model"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":177},"id":"QAqm-yUSJ1EL","executionInfo":{"status":"ok","timestamp":1653230439301,"user_tz":-60,"elapsed":9,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"0c8ebd51-efae-459a-c32f-970f5b8efae2"},"execution_count":46,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Pipeline (\n","  StandardScaler (\n","    with_std=True\n","  ),\n","  LogisticRegression (\n","    optimizer=SGD (\n","      lr=Constant (\n","        learning_rate=0.01\n","      )\n","    )\n","    loss=Log (\n","      weight_pos=1.\n","      weight_neg=1.\n","    )\n","    l2=0.\n","    intercept_init=0.\n","    intercept_lr=Constant (\n","      learning_rate=0.01\n","    )\n","    clip_gradient=1e+12\n","    initializer=Zeros ()\n","  )\n",")"],"text/html":["<div><div class=\"component pipeline\"><details class=\"component estimator\"><summary><pre class=\"estimator-name\">StandardScaler</pre></summary><code class=\"estimator-params\">\n","{'counts': Counter(),\n"," 'means': defaultdict(&lt;class 'float'&gt;, {}),\n"," 'vars': defaultdict(&lt;class 'float'&gt;, {}),\n"," 'with_std': True}\n","\n","</code></details><details class=\"component estimator\"><summary><pre class=\"estimator-name\">LogisticRegression</pre></summary><code class=\"estimator-params\">\n","{'_weights': {},\n"," '_y_name': None,\n"," 'clip_gradient': 1000000000000.0,\n"," 'initializer': Zeros (),\n"," 'intercept': 0.0,\n"," 'intercept_init': 0.0,\n"," 'intercept_lr': Constant({'learning_rate': 0.01}),\n"," 'l2': 0.0,\n"," 'loss': Log({'weight_pos': 1.0, 'weight_neg': 1.0}),\n"," 'optimizer': SGD({'lr': Constant({'learning_rate': 0.01}), 'n_iterations': 0})}\n","\n","</code></details></div><style scoped>\n",".estimator {\n","    padding: 1em;\n","    border-style: solid;\n","    background: white;\n","}\n","\n",".pipeline {\n","    display: flex;\n","    flex-direction: column;\n","    align-items: center;\n","    background: linear-gradient(#000, #000) no-repeat center / 3px 100%;\n","}\n","\n",".union {\n","    display: flex;\n","    flex-direction: row;\n","    align-items: center;\n","    justify-content: center;\n","    padding: 1em;\n","    border-style: solid;\n","    background: white\n","}\n","\n",".wrapper {\n","    display: flex;\n","    flex-direction: column;\n","    align-items: center;\n","    justify-content: center;\n","    padding: 1em;\n","    border-style: solid;\n","    background: white;\n","}\n","\n",".wrapper > .estimator {\n","    margin-top: 1em;\n","}\n","\n","/* Vertical spacing between steps */\n","\n",".component + .component {\n","    margin-top: 2em;\n","}\n","\n",".union > .estimator {\n","    margin-top: 0;\n","}\n","\n",".union > .pipeline {\n","    margin-top: 0;\n","}\n","\n","/* Spacing within a union of estimators */\n","\n",".union > .component + .component {\n","    margin-left: 1em;\n","}\n","\n","/* Typography */\n","\n",".estimator-params {\n","    display: block;\n","    white-space: pre-wrap;\n","    font-size: 120%;\n","    margin-bottom: -1em;\n","}\n","\n",".estimator > code,\n",".wrapper > details > code {\n","    background-color: white !important;\n","}\n","\n",".estimator-name {\n","    display: inline;\n","    margin: 0;\n","    font-size: 130%;\n","}\n","\n","/* Toggle */\n","\n","summary {\n","    display: flex;\n","    align-items:center;\n","    cursor: pointer;\n","}\n","\n","summary > div {\n","    width: 100%;\n","}\n","</style></div>"]},"metadata":{},"execution_count":46}]},{"cell_type":"code","source":["evaluate.progressive_val_score(X_y, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-x6AbDVlG85P","executionInfo":{"status":"ok","timestamp":1653230522328,"user_tz":-60,"elapsed":83034,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"3988e6bc-3f48-46ca-d512-51b7802130e9"},"execution_count":47,"outputs":[{"output_type":"execute_result","data":{"text/plain":["F1: 79.51%, ROCAUC: 89.11%, Precision: 88.53%, Recall: 72.15%"]},"metadata":{},"execution_count":47}]},{"cell_type":"markdown","source":["## Resampling Methods"],"metadata":{"id":"pqnn2QebH7cL"}},{"cell_type":"code","source":["## Undersampling\n","\n","model = (\n","    preprocessing.StandardScaler() |\n","    imblearn.RandomUnderSampler(\n","        classifier=linear_model.LogisticRegression(),\n","        desired_dist={0: .8, 1: .2},\n","    )\n",")\n","\n","#metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC()))\n","metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC(),metrics.Precision(),metrics.Recall()))"],"metadata":{"id":"isZCAFFWIBkd","executionInfo":{"status":"ok","timestamp":1653230522329,"user_tz":-60,"elapsed":11,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}}},"execution_count":48,"outputs":[]},{"cell_type":"code","source":["model"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":246},"id":"c50L1CinJH8z","executionInfo":{"status":"ok","timestamp":1653230522330,"user_tz":-60,"elapsed":9,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"55a36740-4976-4ef1-9aa3-1cf17b0c9fb6"},"execution_count":49,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Pipeline (\n","  StandardScaler (\n","    with_std=True\n","  ),\n","  RandomUnderSampler (\n","    classifier=LogisticRegression (\n","      optimizer=SGD (\n","        lr=Constant (\n","          learning_rate=0.01\n","        )\n","      )\n","      loss=Log (\n","        weight_pos=1.\n","        weight_neg=1.\n","      )\n","      l2=0.\n","      intercept_init=0.\n","      intercept_lr=Constant (\n","        learning_rate=0.01\n","      )\n","      clip_gradient=1e+12\n","      initializer=Zeros ()\n","    )\n","    desired_dist={0: 0.8, 1: 0.2}\n","    seed=None\n","  )\n",")"],"text/html":["<div><div class=\"component pipeline\"><details class=\"component estimator\"><summary><pre class=\"estimator-name\">StandardScaler</pre></summary><code class=\"estimator-params\">\n","{'counts': Counter(),\n"," 'means': defaultdict(&lt;class 'float'&gt;, {}),\n"," 'vars': defaultdict(&lt;class 'float'&gt;, {}),\n"," 'with_std': True}\n","\n","</code></details><div class=\"component wrapper\"><details><summary><pre class=\"estimator-name\">RandomUnderSampler</pre></summary><code class=\"estimator-params\">\n","{'_actual_dist': Counter(),\n"," '_pivot': None,\n"," '_rng': &lt;random.Random object at 0x7766120&gt;,\n"," 'classifier': LogisticRegression (\n","  optimizer=SGD (\n","    lr=Constant (\n","      learning_rate=0.01\n","    )\n","  )\n","  loss=Log (\n","    weight_pos=1.\n","    weight_neg=1.\n","  )\n","  l2=0.\n","  intercept_init=0.\n","  intercept_lr=Constant (\n","    learning_rate=0.01\n","  )\n","  clip_gradient=1e+12\n","  initializer=Zeros ()\n","),\n"," 'desired_dist': {0: 0.8, 1: 0.2},\n"," 'seed': None}\n","\n","</code></details><details class=\"component estimator\"><summary><pre class=\"estimator-name\">LogisticRegression</pre></summary><code class=\"estimator-params\">\n","{'_weights': {},\n"," '_y_name': None,\n"," 'clip_gradient': 1000000000000.0,\n"," 'initializer': Zeros (),\n"," 'intercept': 0.0,\n"," 'intercept_init': 0.0,\n"," 'intercept_lr': Constant({'learning_rate': 0.01}),\n"," 'l2': 0.0,\n"," 'loss': Log({'weight_pos': 1.0, 'weight_neg': 1.0}),\n"," 'optimizer': SGD({'lr': Constant({'learning_rate': 0.01}), 'n_iterations': 0})}\n","\n","</code></details></div></div><style scoped>\n",".estimator {\n","    padding: 1em;\n","    border-style: solid;\n","    background: white;\n","}\n","\n",".pipeline {\n","    display: flex;\n","    flex-direction: column;\n","    align-items: center;\n","    background: linear-gradient(#000, #000) no-repeat center / 3px 100%;\n","}\n","\n",".union {\n","    display: flex;\n","    flex-direction: row;\n","    align-items: center;\n","    justify-content: center;\n","    padding: 1em;\n","    border-style: solid;\n","    background: white\n","}\n","\n",".wrapper {\n","    display: flex;\n","    flex-direction: column;\n","    align-items: center;\n","    justify-content: center;\n","    padding: 1em;\n","    border-style: solid;\n","    background: white;\n","}\n","\n",".wrapper > .estimator {\n","    margin-top: 1em;\n","}\n","\n","/* Vertical spacing between steps */\n","\n",".component + .component {\n","    margin-top: 2em;\n","}\n","\n",".union > .estimator {\n","    margin-top: 0;\n","}\n","\n",".union > .pipeline {\n","    margin-top: 0;\n","}\n","\n","/* Spacing within a union of estimators */\n","\n",".union > .component + .component {\n","    margin-left: 1em;\n","}\n","\n","/* Typography */\n","\n",".estimator-params {\n","    display: block;\n","    white-space: pre-wrap;\n","    font-size: 120%;\n","    margin-bottom: -1em;\n","}\n","\n",".estimator > code,\n",".wrapper > details > code {\n","    background-color: white !important;\n","}\n","\n",".estimator-name {\n","    display: inline;\n","    margin: 0;\n","    font-size: 130%;\n","}\n","\n","/* Toggle */\n","\n","summary {\n","    display: flex;\n","    align-items:center;\n","    cursor: pointer;\n","}\n","\n","summary > div {\n","    width: 100%;\n","}\n","</style></div>"]},"metadata":{},"execution_count":49}]},{"cell_type":"code","source":["evaluate.progressive_val_score(X_y, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7qomydTuIBW_","executionInfo":{"status":"ok","timestamp":1653230588998,"user_tz":-60,"elapsed":66675,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"5bec5957-7979-46c0-8aee-f09541e55d13"},"execution_count":50,"outputs":[{"output_type":"execute_result","data":{"text/plain":["F1: 38.55%, ROCAUC: 95.08%, Precision: 24.97%, Recall: 84.55%"]},"metadata":{},"execution_count":50}]},{"cell_type":"code","source":["## Oversampling\n","\n","model = (\n","    preprocessing.StandardScaler() |\n","    imblearn.RandomOverSampler(\n","        classifier=linear_model.LogisticRegression(),\n","        desired_dist={0: .8, 1: .2},\n","    )\n",")\n","#metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC()))\n","metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC(),metrics.Precision(),metrics.Recall()))"],"metadata":{"id":"VzG0pHYkIBHZ","executionInfo":{"status":"ok","timestamp":1653230588999,"user_tz":-60,"elapsed":10,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}}},"execution_count":51,"outputs":[]},{"cell_type":"code","source":["model"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":246},"id":"5wM130OKJwx4","executionInfo":{"status":"ok","timestamp":1653230589000,"user_tz":-60,"elapsed":9,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"69956f78-27c1-48f8-fbf7-575e7833625c"},"execution_count":52,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Pipeline (\n","  StandardScaler (\n","    with_std=True\n","  ),\n","  RandomOverSampler (\n","    classifier=LogisticRegression (\n","      optimizer=SGD (\n","        lr=Constant (\n","          learning_rate=0.01\n","        )\n","      )\n","      loss=Log (\n","        weight_pos=1.\n","        weight_neg=1.\n","      )\n","      l2=0.\n","      intercept_init=0.\n","      intercept_lr=Constant (\n","        learning_rate=0.01\n","      )\n","      clip_gradient=1e+12\n","      initializer=Zeros ()\n","    )\n","    desired_dist={0: 0.8, 1: 0.2}\n","    seed=None\n","  )\n",")"],"text/html":["<div><div class=\"component pipeline\"><details class=\"component estimator\"><summary><pre class=\"estimator-name\">StandardScaler</pre></summary><code class=\"estimator-params\">\n","{'counts': Counter(),\n"," 'means': defaultdict(&lt;class 'float'&gt;, {}),\n"," 'vars': defaultdict(&lt;class 'float'&gt;, {}),\n"," 'with_std': True}\n","\n","</code></details><div class=\"component wrapper\"><details><summary><pre class=\"estimator-name\">RandomOverSampler</pre></summary><code class=\"estimator-params\">\n","{'_actual_dist': Counter(),\n"," '_pivot': None,\n"," '_rng': &lt;random.Random object at 0x7766c20&gt;,\n"," 'classifier': LogisticRegression (\n","  optimizer=SGD (\n","    lr=Constant (\n","      learning_rate=0.01\n","    )\n","  )\n","  loss=Log (\n","    weight_pos=1.\n","    weight_neg=1.\n","  )\n","  l2=0.\n","  intercept_init=0.\n","  intercept_lr=Constant (\n","    learning_rate=0.01\n","  )\n","  clip_gradient=1e+12\n","  initializer=Zeros ()\n","),\n"," 'desired_dist': {0: 0.8, 1: 0.2},\n"," 'seed': None}\n","\n","</code></details><details class=\"component estimator\"><summary><pre class=\"estimator-name\">LogisticRegression</pre></summary><code class=\"estimator-params\">\n","{'_weights': {},\n"," '_y_name': None,\n"," 'clip_gradient': 1000000000000.0,\n"," 'initializer': Zeros (),\n"," 'intercept': 0.0,\n"," 'intercept_init': 0.0,\n"," 'intercept_lr': Constant({'learning_rate': 0.01}),\n"," 'l2': 0.0,\n"," 'loss': Log({'weight_pos': 1.0, 'weight_neg': 1.0}),\n"," 'optimizer': SGD({'lr': Constant({'learning_rate': 0.01}), 'n_iterations': 0})}\n","\n","</code></details></div></div><style scoped>\n",".estimator {\n","    padding: 1em;\n","    border-style: solid;\n","    background: white;\n","}\n","\n",".pipeline {\n","    display: flex;\n","    flex-direction: column;\n","    align-items: center;\n","    background: linear-gradient(#000, #000) no-repeat center / 3px 100%;\n","}\n","\n",".union {\n","    display: flex;\n","    flex-direction: row;\n","    align-items: center;\n","    justify-content: center;\n","    padding: 1em;\n","    border-style: solid;\n","    background: white\n","}\n","\n",".wrapper {\n","    display: flex;\n","    flex-direction: column;\n","    align-items: center;\n","    justify-content: center;\n","    padding: 1em;\n","    border-style: solid;\n","    background: white;\n","}\n","\n",".wrapper > .estimator {\n","    margin-top: 1em;\n","}\n","\n","/* Vertical spacing between steps */\n","\n",".component + .component {\n","    margin-top: 2em;\n","}\n","\n",".union > .estimator {\n","    margin-top: 0;\n","}\n","\n",".union > .pipeline {\n","    margin-top: 0;\n","}\n","\n","/* Spacing within a union of estimators */\n","\n",".union > .component + .component {\n","    margin-left: 1em;\n","}\n","\n","/* Typography */\n","\n",".estimator-params {\n","    display: block;\n","    white-space: pre-wrap;\n","    font-size: 120%;\n","    margin-bottom: -1em;\n","}\n","\n",".estimator > code,\n",".wrapper > details > code {\n","    background-color: white !important;\n","}\n","\n",".estimator-name {\n","    display: inline;\n","    margin: 0;\n","    font-size: 130%;\n","}\n","\n","/* Toggle */\n","\n","summary {\n","    display: flex;\n","    align-items:center;\n","    cursor: pointer;\n","}\n","\n","summary > div {\n","    width: 100%;\n","}\n","</style></div>"]},"metadata":{},"execution_count":52}]},{"cell_type":"code","source":["evaluate.progressive_val_score(X_y, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cw-NU1TGJMV_","executionInfo":{"status":"ok","timestamp":1653230675457,"user_tz":-60,"elapsed":86463,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"73670940-971d-43d3-8637-45b4553a5664"},"execution_count":53,"outputs":[{"output_type":"execute_result","data":{"text/plain":["F1: 47.27%, ROCAUC: 91.81%, Precision: 33.36%, Recall: 81.10%"]},"metadata":{},"execution_count":53}]},{"cell_type":"code","source":["## Under and oversampling\n","\n","model = (\n","    preprocessing.StandardScaler() |\n","    imblearn.RandomSampler(\n","        classifier=linear_model.LogisticRegression(),\n","        desired_dist={0: .8, 1: .2},\n","        sampling_rate=.1,\n","    )\n",")\n","#metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC()))\n","metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC(),metrics.Precision(),metrics.Recall()))"],"metadata":{"id":"3CJwYDMlHBca","executionInfo":{"status":"ok","timestamp":1653231108927,"user_tz":-60,"elapsed":7,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}}},"execution_count":64,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(X_y, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3OGKRaTXKc0r","executionInfo":{"status":"ok","timestamp":1653231178114,"user_tz":-60,"elapsed":69191,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"664f32d3-bf1c-4c51-f088-c18b9e89990c"},"execution_count":65,"outputs":[{"output_type":"execute_result","data":{"text/plain":["F1: 36.32%, ROCAUC: 94.23%, Precision: 23.15%, Recall: 84.15%"]},"metadata":{},"execution_count":65}]},{"cell_type":"markdown","source":["## Loss weighting and imbalance-aware losses"],"metadata":{"id":"WyCGbGbiKq81"}},{"cell_type":"code","source":[""],"metadata":{"id":"8dW5tZipKedY","executionInfo":{"status":"ok","timestamp":1653230741675,"user_tz":-60,"elapsed":6,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}}},"execution_count":55,"outputs":[]},{"cell_type":"code","source":["## Positive class weights 5x negative class\n","model = (\n","    preprocessing.StandardScaler() |\n","    linear_model.LogisticRegression(\n","        loss=optim.losses.Log(weight_pos=5)\n","    )\n",")\n","#metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC()))\n","metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC(),metrics.Precision(),metrics.Recall()))"],"metadata":{"id":"vPC2tnIrDQJf","executionInfo":{"status":"ok","timestamp":1653230741676,"user_tz":-60,"elapsed":6,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}}},"execution_count":56,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(X_y, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hrU-S1NLDv1t","executionInfo":{"status":"ok","timestamp":1653230824789,"user_tz":-60,"elapsed":83118,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"f3dcf118-13b5-4a92-fcf1-5d9110fedcf6"},"execution_count":57,"outputs":[{"output_type":"execute_result","data":{"text/plain":["F1: 77.33%, ROCAUC: 91.43%, Precision: 74.76%, Recall: 80.08%"]},"metadata":{},"execution_count":57}]},{"cell_type":"code","source":["## Focal loss for imbalanced classification\n","\n","model = (\n","    preprocessing.StandardScaler() |\n","    linear_model.LogisticRegression(loss=optim.losses.BinaryFocalLoss(2, 1))\n",")\n","\n","#metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC()))\n","metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC(),metrics.Precision(),metrics.Recall()))"],"metadata":{"id":"9AiEVtrhKz-z","executionInfo":{"status":"ok","timestamp":1653230824790,"user_tz":-60,"elapsed":6,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}}},"execution_count":58,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(X_y, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"5lWMKZQELSbL","executionInfo":{"status":"ok","timestamp":1653230907808,"user_tz":-60,"elapsed":83023,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"911369f4-b3c4-4147-be78-442ce01211f3"},"execution_count":59,"outputs":[{"output_type":"execute_result","data":{"text/plain":["F1: 79.12%, ROCAUC: 91.31%, Precision: 86.68%, Recall: 72.76%"]},"metadata":{},"execution_count":59}]},{"cell_type":"markdown","source":["## Mixing it all together"],"metadata":{"id":"RDK-WJR-LZaU"}},{"cell_type":"code","source":["model = (\n","    preprocessing.StandardScaler() |\n","    imblearn.RandomSampler(\n","        classifier=linear_model.LogisticRegression(loss=optim.losses.BinaryFocalLoss(2, 1)),\n","        desired_dist={0: .8, 1: .2},\n","        sampling_rate=.1,\n","))\n","metric = metrics.Metrics(metrics=(metrics.F1(),metrics.ROCAUC(),metrics.Precision(),metrics.Recall()))"],"metadata":{"id":"qpLT4lgFLS6U","executionInfo":{"status":"ok","timestamp":1653231038892,"user_tz":-60,"elapsed":359,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}}},"execution_count":62,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(X_y, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8saQ3cXqMAeV","executionInfo":{"status":"ok","timestamp":1653231108927,"user_tz":-60,"elapsed":69372,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"c587fc5a-d2c5-4d47-e364-b03d2fca09ad"},"execution_count":63,"outputs":[{"output_type":"execute_result","data":{"text/plain":["F1: 32.50%, ROCAUC: 95.97%, Precision: 20.13%, Recall: 84.35%"]},"metadata":{},"execution_count":63}]},{"cell_type":"code","source":[""],"metadata":{"id":"Q3r55uhwMBWa","executionInfo":{"status":"ok","timestamp":1653230974648,"user_tz":-60,"elapsed":10,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}}},"execution_count":61,"outputs":[]}]}