% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/word_imp.R
\name{word_imp}
\alias{word_imp}
\title{Importance of words (terms) embedded
in a text document}
\usage{
word_imp(textdoc, metric= "tf",
words_to_filter=NULL)
}
\arguments{
\item{textdoc}{An \code{n} x \code{1} list (dataframe) of
individual text records, where \code{n} is the total
number of individual records. An \code{n} x code{2} dataframe can
also be supplied, in which the second column represents the
labels of the pre-defined groupings of the text records,
e.g. labels of geographical areas where each text record
originates.
For an \code{n} x \code{1} dataframe, an arbitrary grouping is
automatically imposed.}

\item{metric}{(character) The measure for determining the level of
importance of each word within the text document. Options include \code{'tf'}
representing \verb{term frequency} and \code{'tf-idf'}
representing \verb{term frequency inverse document frequency}
(Silge & Robinson, 2016).}

\item{words_to_filter}{A pre-defined vector of words (terms) to
filter out from the DTD prior to highlighting words importance.
default: \code{NULL}. This parameter helps to eliminate
non-necessary words that may be too dominant in the results.}
}
\value{
Graphical representation of words importance
according to a specified metric. A wordcloud is used
to represent words importance if \code{tf} is specified, while
facet wrapped histogram is used if \code{tf-idf} is specified.
A wordcloud is represents each word with a size corresponding
to its level of importance. In the facet wrapped histograms
words are ranked in each group (histogram) in their order
of importance.
}
\description{
Produces a wordcloud which represents the
level of importance of each word (across different text groups)
within a text document, according to a specified measure.
}
\details{
The function determines the most important words
across various grouping of a text document. The measure
options include the \code{tf} and \code{tf-idf}. The idea of \code{tf}
is to rank words in the order of their number of occurrences
across the text document, whereas \code{tf-idf} finds words that
are not used very much, but appear across
many groups in the document.
}
\examples{
#words to filter out
wf <- c("police","policing")
output <- word_imp(textdoc = policing_dtd, metric= "tf",
words_to_filter= wf)
}
\references{
Silge, J. and Robinson, D. (2016) tidytext:
Text mining and analysis using tidy data principles in R.
Journal of Open Source Software, 1, 37.
}
