# "utiml" and "mldr" packages for multi-label classification in R
# https://journal.r-project.org/archive/2018/RJ-2018-041/RJ-2018-041.pdf
# https://github.com/rivolli/utiml
# https://cran.r-project.org/web/packages/mldr/vignettes/mldr.pdf
# https://github.com/fcharte/mldr
library(utiml)
Loading required package: mldr
Enter mldrGUI() to launch mldr's web-based GUI
Loading required package: parallel
Loading required package: ROCR
library(mldr)
# following package offers benchmarks for multi-label classification
library(mldr.datasets)

Attaching package: ‘mldr.datasets’

The following object is masked _by_ ‘.GlobalEnv’:

    ng20

The following object is masked from ‘package:stats’:

    density
set.seed(123)

1 ng20 Dataset

summary(ng20)
# ng20, a corpus with 19300 documents, 1006 words and 20 multi-labels
# Ken Lang, "Newsweeder: Learning to filter netnews", 12th ICML Conference
ng20$labels
# not practical, but part of its corpus can be viewed doing
ng20corpus <- ng20$dataset
# ng20's bag of words
dim(ng20corpus)
[1] 19300  1028
# colnames(ng20corpus)
# consult the help of the following function
ng20 <- remove_skewness_labels(ng20, 10)
# label bat plot
plot(ng20, type = "LB")

# visual relations among labels
plot(ng20, type = "LC")
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.

# create a holdout partition: train and predict to evaluate
# I created a small test partition as the prediction step takes a long time
ds <- create_holdout_partition(ng20, c(train = 0.90, test = 0.10))
# an external GUI interface to explore the "ng20" dataset
# mldrGUI() # press "escape" to exit GUI

NG20 labels

NG20 labelsets

NG20 concurrence

1.1 Binary Relevance Naive Bayes

# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
       accuracy              F1    hamming-loss       precision          recall 
     0.13449404      0.21632577      0.52443005      0.13462829      0.97694301 
subset-accuracy 
     0.01398964 
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
      macro-AUC        macro-F1 macro-precision    macro-recall       micro-AUC 
     0.72405297      0.16150400      0.08811967      0.97653916      0.72425348 
       micro-F1 micro-precision    micro-recall 
     0.16091192      0.08767730      0.97684952 

1.2 Classifier Chain Naive Bayes

# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(ng20$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
       accuracy              F1    hamming-loss       precision          recall 
     0.13706745      0.22035917      0.50878238      0.13720169      0.97590674 
subset-accuracy 
     0.01398964 
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
      macro-AUC        macro-F1 macro-precision    macro-recall       micro-AUC 
     0.73162938      0.16577219      0.09068529      0.97540831      0.73184878 
       micro-F1 micro-precision    micro-recall 
     0.16490199      0.09006038      0.97584298 

2 birds Dataset

# "birds" dataset to predict the set of birds species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(birds)
birds$labels
birds <- remove_skewness_labels(birds, 10)
# Type "birds$" for its list of attributes
plot(birds, type = "LB")

# visual relations among labels
plot(birds, type = "LC")
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.

# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(birds, c(train = 0.66, test = 0.34))
# an external GUI interface to explore the "birds" dataset
# mldrGUI() # press "escape" to exit GUI

Birds labels

Birds labelsets

Birds concurrence

2.1 Binary Relevance Naive Bayes

# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
       accuracy              F1    hamming-loss       precision          recall 
     0.07904289      0.12978222      0.69041096      0.08127791      0.44406393 
subset-accuracy 
     0.00000000 
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
      macro-AUC        macro-F1 macro-precision    macro-recall       micro-AUC 
     0.57931277      0.13520211      0.07721916      0.85064474      0.56851048 
       micro-F1 micro-precision    micro-recall 
     0.13236419      0.07172471      0.85643564 

2.2 Classifier Chain Naive Bayes

# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(birds$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
       accuracy              F1    hamming-loss       precision          recall 
     0.07944093      0.13031361      0.69406393      0.08168517      0.44406393 
subset-accuracy 
     0.00000000 
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
      macro-AUC        macro-F1 macro-precision    macro-recall       micro-AUC 
     0.58031688      0.13302412      0.07543980      0.85064474      0.57556851 
       micro-F1 micro-precision    micro-recall 
     0.13175933      0.07136964      0.85643564 

3 enron Dataset

# PROPOSED EXERCISE
# Complete a similar work, for the popular Enron-Corpus of e-mails
# To create the corpus and the associated dataFrame
# A description of its labels appears in the following lists:
# https://bailando.berkeley.edu/enron/enron_categories.txt
# https://data.world/brianray/enron-email-dataset
# where for example "label 2.13" in the lists  is "label B.B13" for enron-labels
enron = enron()
Looking for datasetenronin the download directory

Looking for datasetenrononline...

Downloading datasetenron

trying URL 'https://cometa.ujaen.es/public/full/enron.rds'
Content type 'text/plain' length 227057 bytes (221 KB)
downloaded 221 KB
summary(enron)
enron$labels
enron <- remove_skewness_labels(enron, 10)
# Type "enron$" for its list of attributes
plot(enron, type = "LB")

# visual relations among labels
plot(enron, type = "LC")
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.

# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(enron, c(train = 0.66, test = 0.34))
# an external GUI interface to explore the "enron" dataset
# mldrGUI() # press "escape" to exit GUI

Enron labels

Enron labelsets

Enron concurrence

3.1 Binary Relevance Naive Bayes

# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
       accuracy              F1    hamming-loss       precision          recall 
    0.226812312     0.342346750     0.242906489     0.281762099     0.693615840 
subset-accuracy 
    0.001727116 
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
      macro-AUC        macro-F1 macro-precision    macro-recall       micro-AUC 
      0.6431262       0.1991138       0.2018842       0.4658723       0.7537622 
       micro-F1 micro-precision    micro-recall 
      0.3043222       0.1980077       0.6571719 

3.2 Classifier Chain Naive Bayes

# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(enron$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
       accuracy              F1    hamming-loss       precision          recall 
    0.232044302     0.348937174     0.228678345     0.283524350     0.719364257 
subset-accuracy 
    0.003454231 
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
      macro-AUC        macro-F1 macro-precision    macro-recall       micro-AUC 
      0.6350099       0.2054387       0.2075607       0.4349603       0.7707133 
       micro-F1 micro-precision    micro-recall 
      0.3150634       0.2051323       0.6788747 

4 emotions Dataset

# "emotions" dataset to predict the set of emotions species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(emotions)
emotions$labels
emotions <- remove_skewness_labels(emotions, 10)
# Type "emotions$" for its list of attributes
plot(emotions, type = "LB")

# visual relations among labels
plot(emotions, type = "LC")
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.

# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(emotions, c(train = 0.66, test = 0.34))
# an external GUI interface to explore the "emotions" dataset
# mldrGUI() # press "escape" to exit GUI

Emotions labels

Emotions labelsets

Emotions concurrence

4.1 Binary Relevance Naive Bayes

# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
       accuracy              F1    hamming-loss       precision          recall 
      0.5045380       0.6050684       0.2739274       0.5594059       0.7351485 
subset-accuracy 
      0.1732673 
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
      macro-AUC        macro-F1 macro-precision    macro-recall       micro-AUC 
      0.7765587       0.6232502       0.5503731       0.7237345       0.7864504 
       micro-F1 micro-precision    micro-recall 
      0.6261261       0.5504950       0.7258486 

4.2 Classifier Chain Naive Bayes

# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(emotions$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
       accuracy              F1    hamming-loss       precision          recall 
      0.5153465       0.6146393       0.2656766       0.5730198       0.7400990 
subset-accuracy 
      0.1782178 
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
      macro-AUC        macro-F1 macro-precision    macro-recall       micro-AUC 
      0.7794148       0.6292523       0.5617664       0.7229584       0.7890771 
       micro-F1 micro-precision    micro-recall 
      0.6332574       0.5616162       0.7258486 

5 genbase Dataset

# "genbase" dataset to predict the set of genbase species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(genbase)
genbase$labels
genbase <- remove_skewness_labels(genbase, 10)
# Type "genbase$" for its list of attributes
plot(genbase, type = "LB")

# visual relations among labels
plot(genbase, type = "LC")
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
`major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.

# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(genbase, c(train = 0.66, test = 0.34))
# an external GUI interface to explore the "genbase" dataset
# mldrGUI() # press "escape" to exit GUI

Genbase labels

Genbase labelsets

Genbase concurrence

5.1 Binary Relevance Naive Bayes

# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
       accuracy              F1    hamming-loss       precision          recall 
      0.9622222       0.9751111       0.0050000       0.9622222       1.0000000 
subset-accuracy 
      0.9200000 
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
      macro-AUC        macro-F1 macro-precision    macro-recall       micro-AUC 
      1.0000000       0.9322917       0.9007353       1.0000000       0.9991798 
       micro-F1 micro-precision    micro-recall 
      0.9683099       0.9385666       1.0000000 

5.2 Classifier Chain Naive Bayes

# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(genbase$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
       accuracy              F1    hamming-loss       precision          recall 
    0.954074074     0.969777778     0.006111111     0.954074074     1.000000000 
subset-accuracy 
    0.902222222 
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
      macro-AUC        macro-F1 macro-precision    macro-recall       micro-AUC 
      0.9997185       0.9211806       0.8890595       1.0000000       0.9986942 
       micro-F1 micro-precision    micro-recall 
      0.9615385       0.9259259       1.0000000 
---
title: "Multi-Label Classification in R"
output:
  html_notebook: 
    toc: yes
    toc_float: yes
    number_sections: yes
---

```{r}
# "utiml" and "mldr" packages for multi-label classification in R
# https://journal.r-project.org/archive/2018/RJ-2018-041/RJ-2018-041.pdf
# https://github.com/rivolli/utiml
# https://cran.r-project.org/web/packages/mldr/vignettes/mldr.pdf
# https://github.com/fcharte/mldr
library(utiml)
library(mldr)
# following package offers benchmarks for multi-label classification
library(mldr.datasets)
set.seed(123)
```

# ng20 Dataset

```{r}
summary(ng20)
# ng20, a corpus with 19300 documents, 1006 words and 20 multi-labels
# Ken Lang, "Newsweeder: Learning to filter netnews", 12th ICML Conference
ng20$labels
# not practical, but part of its corpus can be viewed doing
ng20corpus <- ng20$dataset
# ng20's bag of words
dim(ng20corpus)
# colnames(ng20corpus)
# consult the help of the following function
ng20 <- remove_skewness_labels(ng20, 10)
# label bat plot
plot(ng20, type = "LB")
# visual relations among labels
plot(ng20, type = "LC")
# create a holdout partition: train and predict to evaluate
# I created a small test partition as the prediction step takes a long time
ds <- create_holdout_partition(ng20, c(train = 0.90, test = 0.10))
```

```{r}
# an external GUI interface to explore the "ng20" dataset
# mldrGUI() # press "escape" to exit GUI
```

![NG20 labels](../images/ng20_labels.png)

![NG20 labelsets](../images/ng20_labelsets.png)

![NG20 concurrence](../images/ng20_concurrence.png)

## Binary Relevance Naive Bayes

```{r}
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
```

## Classifier Chain Naive Bayes

```{r}
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(ng20$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
```

# birds Dataset

```{r}
# "birds" dataset to predict the set of birds species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(birds)
birds$labels
birds <- remove_skewness_labels(birds, 10)
# Type "birds$" for its list of attributes
plot(birds, type = "LB")
# visual relations among labels
plot(birds, type = "LC")
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(birds, c(train = 0.66, test = 0.34))
```

```{r}
# an external GUI interface to explore the "birds" dataset
# mldrGUI() # press "escape" to exit GUI
```

![Birds labels](../images/birds_labels.png)

![Birds labelsets](../images/birds_labelsets.png)

![Birds concurrence](../images/birds_concurrence.png)

## Binary Relevance Naive Bayes

```{r}
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
```

## Classifier Chain Naive Bayes

```{r}	
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(birds$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
```

# enron Dataset

```{r}
# PROPOSED EXERCISE
# Complete a similar work, for the popular Enron-Corpus of e-mails
# To create the corpus and the associated dataFrame
# A description of its labels appears in the following lists:
# https://bailando.berkeley.edu/enron/enron_categories.txt
# https://data.world/brianray/enron-email-dataset
# where for example "label 2.13" in the lists  is "label B.B13" for enron-labels
enron = enron()
summary(enron)
enron$labels
enron <- remove_skewness_labels(enron, 10)
# Type "enron$" for its list of attributes
plot(enron, type = "LB")
# visual relations among labels
plot(enron, type = "LC")
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(enron, c(train = 0.66, test = 0.34))
```

```{r}
# an external GUI interface to explore the "enron" dataset
# mldrGUI() # press "escape" to exit GUI
```

![Enron labels](../images/enron_labels.png)

![Enron labelsets](../images/enron_labelsets.png)

![Enron concurrence](../images/enron_concurrence.png)

## Binary Relevance Naive Bayes

```{r}
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
```

## Classifier Chain Naive Bayes

```{r}	
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(enron$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
```

# emotions Dataset

```{r}
# "emotions" dataset to predict the set of emotions species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(emotions)
emotions$labels
emotions <- remove_skewness_labels(emotions, 10)
# Type "emotions$" for its list of attributes
plot(emotions, type = "LB")
# visual relations among labels
plot(emotions, type = "LC")
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(emotions, c(train = 0.66, test = 0.34))
```

```{r}
# an external GUI interface to explore the "emotions" dataset
# mldrGUI() # press "escape" to exit GUI
```

![Emotions labels](../images/emotions_labels.png)

![Emotions labelsets](../images/emotions_labelsets.png)

![Emotions concurrence](../images/emotions_concurrence.png)

## Binary Relevance Naive Bayes

```{r}
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
```

## Classifier Chain Naive Bayes

```{r}	
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(emotions$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
```

# genbase Dataset

```{r}
# "genbase" dataset to predict the set of genbase species
# that are present, given a ten-second audio clip:
# https://doi.org/10.1109/MLSP.2013.6661934
summary(genbase)
genbase$labels
genbase <- remove_skewness_labels(genbase, 10)
# Type "genbase$" for its list of attributes
plot(genbase, type = "LB")
# visual relations among labels
plot(genbase, type = "LC")
# create a holdout partition: train and predict to evaluate
ds <- create_holdout_partition(genbase, c(train = 0.66, test = 0.34))
```

```{r}
# an external GUI interface to explore the "genbase" dataset
# mldrGUI() # press "escape" to exit GUI
```

![Genbase labels](../images/genbase_labels.png)

![Genbase labelsets](../images/genbase_labelsets.png)

![Genbase concurrence](../images/genbase_concurrence.png)

## Binary Relevance Naive Bayes

```{r}
# Binary relevance ML strategy with naive Bayes base classifier
model_BR_NB <- utiml::br(ds$train, "NB")
predictionsBR <- predict(model_BR_NB, ds$test)
resultsBRPerExamples <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("example-based")
)
resultsBRPerExamples
resultsBRPerLabel <- multilabel_evaluate(
    ds$test, predictionsBR,
    c("label-based")
)
resultsBRPerLabel
```

## Classifier Chain Naive Bayes

```{r}	
# Classifier Chain ML strategy with naive Bayes base classifier
# Define the chain-order between labels: sample a random order
mychain <- sample(rownames(genbase$labels))
model_CC_NB <- cc(ds$train, "NB", mychain)
predictionsCC <- predict(model_CC_NB, ds$test)
resultsCCPerExamples <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("example-based")
)
resultsCCPerExamples
resultsCCPerLabel <- multilabel_evaluate(
    ds$test, predictionsCC,
    c("label-based")
)
resultsCCPerLabel
```