% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/NonProbEst.R
\name{matching}
\alias{matching}
\title{Predicts unknown responses by matching}
\usage{
matching(
  convenience_sample,
  reference_sample,
  covariates,
  estimated_var,
  positive_label = NULL,
  algorithm = "glm",
  proc = NULL,
  ...
)
}
\arguments{
\item{convenience_sample}{Data frame containing the non-probabilistic sample.}

\item{reference_sample}{Data frame containing the probabilistic sample.}

\item{covariates}{String vector specifying the common variables to use for training.}

\item{estimated_var}{String specifying the variable to estimate.}

\item{positive_label}{String specifying the label to be considered positive if the estimated variable is categorical. Leave it as the default NULL otherwise.}

\item{algorithm}{A string specifying which classification or regression model to use (same as caret's method).}

\item{proc}{A string or vector of strings specifying if any of the data preprocessing techniques available in \link[caret]{train} function from `caret` package should be applied to data prior to the propensity estimation. By default, its value is NULL and no preprocessing is applied.}

\item{...}{Further parameters to be passed to the \link[caret]{train} function.}
}
\value{
A vector containing the estimated responses for the reference sample.
}
\description{
It uses the matching method introduced by Rivers (2007). The idea is to model the relationship between y_k and x_k using the convenience sample in order to predict y_k for the reference sample. You can then predict the total using the `total_estimation` method.
}
\details{
Training of the models is done via the `caret` package. The algorithm specified in \code{algorithm} must match one of the names in the list of algorithms supported by `caret`. If the estimated variable is categorical, probabilities are returned.
}
\examples{
#Simple example with default parameters
N = 50000
covariates = c("education_primaria", "education_secundaria")
if (is.numeric(sampleNP$vote_gen))
   sampleNP$vote_gen = factor(sampleNP$vote_gen, c(0, 1), c('F', 'T'))
estimated_votes = data.frame(
   vote_gen = matching(sampleNP, sampleP, covariates, "vote_gen", 'T')
)
total_estimation(estimated_votes, N / nrow(estimated_votes), c("vote_gen"), N)
}
\references{
Rivers, D. (2007). \emph{Sampling for Web Surveys}. Presented in Joint Statistical Meetings, Salt Lake City, UT.
}
