# "utiml" and "mldr" packages for multi-label classification in R
# https://journal.r-project.org/archive/2018/RJ-2018-041/RJ-2018-041.pdf
# https://github.com/rivolli/utiml
# https://cran.r-project.org/web/packages/mldr/vignettes/mldr.pdf
# https://github.com/fcharte/mldr
library(utiml)
Loading required package: mldr
Enter mldrGUI() to launch mldr's web-based GUI
Loading required package: parallel
Loading required package: ROCR
library(mldr)
# following package offers benchmarks for multi-label classification
library(mldr.datasets)
Attaching package: ‘mldr.datasets’
The following object is masked _by_ ‘.GlobalEnv’:
ng20
The following object is masked from ‘package:stats’:
density
set.seed(123)
ng20 Dataset
summary(ng20)
# ng20, a corpus with 19300 documents, 1006 words and 20 multi-labels
# Ken Lang, "Newsweeder: Learning to filter netnews", 12th ICML Conference
ng20$labels
# not practical, but part of its corpus can be viewed doing
ng20corpus <- ng20$dataset
# ng20's bag of words
dim(ng20corpus)
[1] 19300 1028
# colnames(ng20corpus)
# consult the help of the following function
ng20 <- remove_skewness_labels(ng20, 10)
# label bat plot
plot(ng20, type = "LB")
# visual relations among labels
plot(ng20, type = "LC")
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
# create a holdout partition: train and predict to evaluate
# I created a small test partition as the prediction step takes a long time
ds <- create_holdout_partition(ng20, c(train = 0.90, test = 0.10))
# an external GUI interface to explore the "ng20" dataset
# mldrGUI() # press "escape" to exit GUI
Binary Relevance Naive Bayes
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
ds$test, predictionsBR,
c("example-based")
)
resultsBRPerExamples
accuracy F1 hamming-loss precision recall
0.13449404 0.21632577 0.52443005 0.13462829 0.97694301
subset-accuracy
0.01398964
resultsBRPerLabel <- multilabel_evaluate(
ds$test, predictionsBR,
c("label-based")
)
resultsBRPerLabel
macro-AUC macro-F1 macro-precision macro-recall micro-AUC
0.72405297 0.16150400 0.08811967 0.97653916 0.72425348
micro-F1 micro-precision micro-recall
0.16091192 0.08767730 0.97684952
Classifier Chain Naive Bayes
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(ng20$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
ds$test, predictionsCC,
c("example-based")
)
resultsCCPerExamples
accuracy F1 hamming-loss precision recall
0.13706745 0.22035917 0.50878238 0.13720169 0.97590674
subset-accuracy
0.01398964
resultsCCPerLabel <- multilabel_evaluate(
ds$test, predictionsCC,
c("label-based")
)
resultsCCPerLabel
macro-AUC macro-F1 macro-precision macro-recall micro-AUC
0.73162938 0.16577219 0.09068529 0.97540831 0.73184878
micro-F1 micro-precision micro-recall
0.16490199 0.09006038 0.97584298
birds Dataset
# "birds" dataset to predict the set of birds species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(birds)
birds$labels
birds <- remove_skewness_labels(birds, 10)
# Type "birds$" for its list of attributes
plot(birds, type = "LB")
# visual relations among labels
plot(birds, type = "LC")
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(birds, c(train = 0.66, test = 0.34))
# an external GUI interface to explore the "birds" dataset
# mldrGUI() # press "escape" to exit GUI
Binary Relevance Naive Bayes
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
ds$test, predictionsBR,
c("example-based")
)
resultsBRPerExamples
accuracy F1 hamming-loss precision recall
0.07904289 0.12978222 0.69041096 0.08127791 0.44406393
subset-accuracy
0.00000000
resultsBRPerLabel <- multilabel_evaluate(
ds$test, predictionsBR,
c("label-based")
)
resultsBRPerLabel
macro-AUC macro-F1 macro-precision macro-recall micro-AUC
0.57931277 0.13520211 0.07721916 0.85064474 0.56851048
micro-F1 micro-precision micro-recall
0.13236419 0.07172471 0.85643564
Classifier Chain Naive Bayes
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(birds$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
ds$test, predictionsCC,
c("example-based")
)
resultsCCPerExamples
accuracy F1 hamming-loss precision recall
0.07944093 0.13031361 0.69406393 0.08168517 0.44406393
subset-accuracy
0.00000000
resultsCCPerLabel <- multilabel_evaluate(
ds$test, predictionsCC,
c("label-based")
)
resultsCCPerLabel
macro-AUC macro-F1 macro-precision macro-recall micro-AUC
0.58031688 0.13302412 0.07543980 0.85064474 0.57556851
micro-F1 micro-precision micro-recall
0.13175933 0.07136964 0.85643564
enron Dataset
# PROPOSED EXERCISE
# Complete a similar work, for the popular Enron-Corpus of e-mails
# To create the corpus and the associated dataFrame
# A description of its labels appears in the following lists:
# https://bailando.berkeley.edu/enron/enron_categories.txt
# https://data.world/brianray/enron-email-dataset
# where for example "label 2.13" in the lists is "label B.B13" for enron-labels
enron = enron()
Looking for datasetenronin the download directory
Looking for datasetenrononline...
Downloading datasetenron
trying URL 'https://cometa.ujaen.es/public/full/enron.rds'
Content type 'text/plain' length 227057 bytes (221 KB)
downloaded 221 KB
summary(enron)
enron$labels
enron <- remove_skewness_labels(enron, 10)
# Type "enron$" for its list of attributes
plot(enron, type = "LB")
# visual relations among labels
plot(enron, type = "LC")
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(enron, c(train = 0.66, test = 0.34))
# an external GUI interface to explore the "enron" dataset
# mldrGUI() # press "escape" to exit GUI
Binary Relevance Naive Bayes
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
ds$test, predictionsBR,
c("example-based")
)
resultsBRPerExamples
accuracy F1 hamming-loss precision recall
0.226812312 0.342346750 0.242906489 0.281762099 0.693615840
subset-accuracy
0.001727116
resultsBRPerLabel <- multilabel_evaluate(
ds$test, predictionsBR,
c("label-based")
)
resultsBRPerLabel
macro-AUC macro-F1 macro-precision macro-recall micro-AUC
0.6431262 0.1991138 0.2018842 0.4658723 0.7537622
micro-F1 micro-precision micro-recall
0.3043222 0.1980077 0.6571719
Classifier Chain Naive Bayes
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(enron$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
ds$test, predictionsCC,
c("example-based")
)
resultsCCPerExamples
accuracy F1 hamming-loss precision recall
0.232044302 0.348937174 0.228678345 0.283524350 0.719364257
subset-accuracy
0.003454231
resultsCCPerLabel <- multilabel_evaluate(
ds$test, predictionsCC,
c("label-based")
)
resultsCCPerLabel
macro-AUC macro-F1 macro-precision macro-recall micro-AUC
0.6350099 0.2054387 0.2075607 0.4349603 0.7707133
micro-F1 micro-precision micro-recall
0.3150634 0.2051323 0.6788747
emotions Dataset
# "emotions" dataset to predict the set of emotions species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(emotions)
emotions$labels
emotions <- remove_skewness_labels(emotions, 10)
# Type "emotions$" for its list of attributes
plot(emotions, type = "LB")
# visual relations among labels
plot(emotions, type = "LC")
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(emotions, c(train = 0.66, test = 0.34))
# an external GUI interface to explore the "emotions" dataset
# mldrGUI() # press "escape" to exit GUI
Binary Relevance Naive Bayes
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
ds$test, predictionsBR,
c("example-based")
)
resultsBRPerExamples
accuracy F1 hamming-loss precision recall
0.5045380 0.6050684 0.2739274 0.5594059 0.7351485
subset-accuracy
0.1732673
resultsBRPerLabel <- multilabel_evaluate(
ds$test, predictionsBR,
c("label-based")
)
resultsBRPerLabel
macro-AUC macro-F1 macro-precision macro-recall micro-AUC
0.7765587 0.6232502 0.5503731 0.7237345 0.7864504
micro-F1 micro-precision micro-recall
0.6261261 0.5504950 0.7258486
Classifier Chain Naive Bayes
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(emotions$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
ds$test, predictionsCC,
c("example-based")
)
resultsCCPerExamples
accuracy F1 hamming-loss precision recall
0.5153465 0.6146393 0.2656766 0.5730198 0.7400990
subset-accuracy
0.1782178
resultsCCPerLabel <- multilabel_evaluate(
ds$test, predictionsCC,
c("label-based")
)
resultsCCPerLabel
macro-AUC macro-F1 macro-precision macro-recall micro-AUC
0.7794148 0.6292523 0.5617664 0.7229584 0.7890771
micro-F1 micro-precision micro-recall
0.6332574 0.5616162 0.7258486
genbase Dataset
# "genbase" dataset to predict the set of genbase species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(genbase)
genbase$labels
genbase <- remove_skewness_labels(genbase, 10)
# Type "genbase$" for its list of attributes
plot(genbase, type = "LB")
# visual relations among labels
plot(genbase, type = "LC")
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(genbase, c(train = 0.66, test = 0.34))
# an external GUI interface to explore the "genbase" dataset
# mldrGUI() # press "escape" to exit GUI
Binary Relevance Naive Bayes
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
ds$test, predictionsBR,
c("example-based")
)
resultsBRPerExamples
accuracy F1 hamming-loss precision recall
0.9622222 0.9751111 0.0050000 0.9622222 1.0000000
subset-accuracy
0.9200000
resultsBRPerLabel <- multilabel_evaluate(
ds$test, predictionsBR,
c("label-based")
)
resultsBRPerLabel
macro-AUC macro-F1 macro-precision macro-recall micro-AUC
1.0000000 0.9322917 0.9007353 1.0000000 0.9991798
micro-F1 micro-precision micro-recall
0.9683099 0.9385666 1.0000000
Classifier Chain Naive Bayes
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(genbase$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
ds$test, predictionsCC,
c("example-based")
)
resultsCCPerExamples
accuracy F1 hamming-loss precision recall
0.954074074 0.969777778 0.006111111 0.954074074 1.000000000
subset-accuracy
0.902222222
resultsCCPerLabel <- multilabel_evaluate(
ds$test, predictionsCC,
c("label-based")
)
resultsCCPerLabel
macro-AUC macro-F1 macro-precision macro-recall micro-AUC
0.9997185 0.9211806 0.8890595 1.0000000 0.9986942
micro-F1 micro-precision micro-recall
0.9615385 0.9259259 1.0000000
---
title: "Multi-Label Classification in R"
output:
  html_notebook: 
    toc: yes
    toc_float: yes
    number_sections: yes
---

```{r}
# "utiml" and "mldr" packages for multi-label classification in R
# https://journal.r-project.org/archive/2018/RJ-2018-041/RJ-2018-041.pdf
# https://github.com/rivolli/utiml
# https://cran.r-project.org/web/packages/mldr/vignettes/mldr.pdf
# https://github.com/fcharte/mldr
library(utiml)
library(mldr)
# following package offers benchmarks for multi-label classification
library(mldr.datasets)
set.seed(123)
```

# ng20 Dataset

```{r}
summary(ng20)
# ng20, a corpus with 19300 documents, 1006 words and 20 multi-labels
# Ken Lang, "Newsweeder: Learning to filter netnews", 12th ICML Conference
ng20$labels
# not practical, but part of its corpus can be viewed doing
ng20corpus <- ng20$dataset
# ng20's bag of words
dim(ng20corpus)
# colnames(ng20corpus)
# consult the help of the following function
ng20 <- remove_skewness_labels(ng20, 10)
# label bat plot
plot(ng20, type = "LB")
# visual relations among labels
plot(ng20, type = "LC")
# create a holdout partition: train and predict to evaluate
# I created a small test partition as the prediction step takes a long time
ds <- create_holdout_partition(ng20, c(train = 0.90, test = 0.10))
```

```{r}
# an external GUI interface to explore the "ng20" dataset
# mldrGUI() # press "escape" to exit GUI
```

![NG20 labels](../images/ng20_labels.png)

![NG20 labelsets](../images/ng20_labelsets.png)

![NG20 concurrence](../images/ng20_concurrence.png)

## Binary Relevance Naive Bayes

```{r}
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
```

## Classifier Chain Naive Bayes

```{r}
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(ng20$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
```

# birds Dataset

```{r}
# "birds" dataset to predict the set of birds species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(birds)
birds$labels
birds <- remove_skewness_labels(birds, 10)
# Type "birds$" for its list of attributes
plot(birds, type = "LB")
# visual relations among labels
plot(birds, type = "LC")
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(birds, c(train = 0.66, test = 0.34))
```

```{r}
# an external GUI interface to explore the "birds" dataset
# mldrGUI() # press "escape" to exit GUI
```

![Birds labels](../images/birds_labels.png)

![Birds labelsets](../images/birds_labelsets.png)

![Birds concurrence](../images/birds_concurrence.png)

## Binary Relevance Naive Bayes

```{r}
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
```

## Classifier Chain Naive Bayes

```{r}	
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(birds$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
```

# enron Dataset

```{r}
# PROPOSED EXERCISE
# Complete a similar work, for the popular Enron-Corpus of e-mails
# To create the corpus and the associated dataFrame
# A description of its labels appears in the following lists:
# https://bailando.berkeley.edu/enron/enron_categories.txt
# https://data.world/brianray/enron-email-dataset
# where for example "label 2.13" in the lists  is "label B.B13" for enron-labels
enron = enron()
summary(enron)
enron$labels
enron <- remove_skewness_labels(enron, 10)
# Type "enron$" for its list of attributes
plot(enron, type = "LB")
# visual relations among labels
plot(enron, type = "LC")
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(enron, c(train = 0.66, test = 0.34))
```

```{r}
# an external GUI interface to explore the "enron" dataset
# mldrGUI() # press "escape" to exit GUI
```

![Enron labels](../images/enron_labels.png)

![Enron labelsets](../images/enron_labelsets.png)

![Enron concurrence](../images/enron_concurrence.png)

## Binary Relevance Naive Bayes

```{r}
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
```

## Classifier Chain Naive Bayes

```{r}	
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(enron$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
```

# emotions Dataset

```{r}
# "emotions" dataset to predict the set of emotions species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(emotions)
emotions$labels
emotions <- remove_skewness_labels(emotions, 10)
# Type "emotions$" for its list of attributes
plot(emotions, type = "LB")
# visual relations among labels
plot(emotions, type = "LC")
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(emotions, c(train = 0.66, test = 0.34))
```

```{r}
# an external GUI interface to explore the "emotions" dataset
# mldrGUI() # press "escape" to exit GUI
```

![Emotions labels](../images/emotions_labels.png)

![Emotions labelsets](../images/emotions_labelsets.png)

![Emotions concurrence](../images/emotions_concurrence.png)

## Binary Relevance Naive Bayes

```{r}
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
```

## Classifier Chain Naive Bayes

```{r}	
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(emotions$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
```

# genbase Dataset

```{r}
# "genbase" dataset to predict the set of genbase species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(genbase)
genbase$labels
genbase <- remove_skewness_labels(genbase, 10)
# Type "genbase$" for its list of attributes
plot(genbase, type = "LB")
# visual relations among labels
plot(genbase, type = "LC")
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(genbase, c(train = 0.66, test = 0.34))
```

```{r}
# an external GUI interface to explore the "genbase" dataset
# mldrGUI() # press "escape" to exit GUI
```

![Genbase labels](../images/genbase_labels.png)

![Genbase labelsets](../images/genbase_labelsets.png)

![Genbase concurrence](../images/genbase_concurrence.png)

## Binary Relevance Naive Bayes

```{r}
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
```

## Classifier Chain Naive Bayes

```{r}	
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(genbase$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
```