Chapter 9 Further Classification Models
9.1 Multilabel classification
Some datasets, for example, reviews of applications and mobile applications repositories such as App Store or Google play contain reviews that can have several labels at the same time (e.g. bugs, feature requests, etc.)
9.2 Semi-supervised Learning
Self train a model on semi-supervised data http://www.inside-r.org/packages/cran/dmwr/docs/SelfTrain
library(DMwR2)
## Small example with the Iris classification data set
data(iris)
## Dividing the data set into train and test sets
<- sample(150,100)
idx <- iris[idx,]
tr <- iris[-idx,]
ts
## Learn a tree with the full train set and test it
<- rpartXse(Species~ .,tr,se=0.5)
stdTree table(predict(stdTree,ts,type='class'),ts$Species)
## Now let us create another training set with most of the target
## variable values unknown
<- tr
trSelfT <- sample(100,70)
nas 'Species'] <- NA
trSelfT[nas,
## Learn a tree using only the labelled cases and test it
<- rpartXse(Species~ .,trSelfT[-nas,],se=0.5)
baseTree table(predict(baseTree,ts,type='class'),ts$Species)
## The user-defined function that will be used in the self-training process
<- function(m,d) {
f <- predict(m,d,type='class')
l <- apply(predict(m,d),1,max)
c data.frame(cl=l,p=c)
}
## Self train the same model using the semi-superside data and test the
## resulting model
<- SelfTrain(Species~ .,trSelfT,learner('rpartXse',list(se=0.5)),'f')
treeSelfT table(predict(treeSelfT,ts,type='class'),ts$Species)