#! /bin/sh
# This is a shell archive. Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file". To overwrite existing
# files, type "sh file -c". You can also feed this as standard input via
# unshar, or by typing "sh 'summary' <<'END_OF_FILE'
gendat -- generates a new sample of data statistically similar to a provided
X data set
END_OF_FILE
if test 96 -ne `wc -c <'summary'`; then
echo shar: \"'summary'\" unpacked with wrong size!
fi
# end of 'summary'
fi
if test -f 'README' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'README'\"
else
echo shar: Extracting \"'README'\" \(1897 characters\)
sed "s/^X//" >'README' <<'END_OF_FILE'
X LEGALITIES
X
COPYRIGHT: This code is copyright 1989 by E. Neely Atkinson for the
University of Texas M. D. Anderson Cancer Center.
X
CONDITIONS FOR INDIVIDUAL USE: Individuals may freely use and modify
this code. The code can be copied and passed along to other
individual users.
X
CONDITIONS FOR REDISTRIBUTION: This code may be systematically
redistributed as written or as modified, and either by itself or as
part of a package, providing that the documentation provided with the
redistribution carries a notice similar to:
X
X"Gendat was written by (adapted from code written by) members of the
Section of Computer Science, Department of Biomathematics, The
University of Texas M.D. Anderson Cancer Center, Houston.
X
The authors would appreciate being notified of any such redistribution
X(documenting use helps us hustle grant bucks). Please send a copy of
the documentation or at least a notice of inclusion of our code to
X
Barry W. Brown, Chief Section of Computer Science
Department of Biomathematics, Box 237
M. D. Anderson Hospital
X1515 Holcombe Blvd.
Houston, TX 77030
X
DISCLAIMER: Although care was taken in the creation and testing of
this system, any user employs it at his own risk. Neither the authors
nor the University of Texas assumes any legal responsibility for the
correct functioning of the system.
X
X INSTALLATION
X
The file 'gendat' contains the source for this S function. The file
X'gendat.d' is the text of the help file for gendat.
X
X FOR HELP
X
Comments or requests for further information or help on this code should
be directed to (internet)
X
neely@mdaali.cancer.utexas.edu
X
or (bitnet)
X
AN123651 at UTHVM1
X
or (U.S. Mail)
X
XE. Neely Atkinson
Department of Biomathematics, Box 237
University of Texas M. D. Anderson Cancer Center
X1515 Holcombe
Houston, TX 77030
END_OF_FILE
if test 1897 -ne `wc -c <'README'`; then
echo shar: \"'README'\" unpacked with wrong size!
fi
# end of 'README'
fi
if test -f 'gendat' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'gendat'\"
else
echo shar: Extracting \"'gendat'\" \(3644 characters\)
sed "s/^X//" >'gendat' <<'END_OF_FILE'
gendat <-
function(obs,ngen=-1,nneigh=-1,prt=FALSE)
X{
X#
X# This S function implements the algorithm of Thompson
X# and Taylor (Computational Statistics and Data
X# Analysis 4,pg. 93-101, 1986) for the generation
X# of pseudo data points drawn from the estimated
X# density of an observed data set.
X#
X# E. Neely Atkinson
X# Department of Biomathematics
X# University of Texas System Cancer Center
X# M.D. Anderson Hospital
X# Houston, Tx. 77030.
X# (713) 792-2619
X# AN123651@UTHVM1.BITNET
X#
X# James R. Thompson
X# Department of Statistics
X# Rice University
X# Houston, Tx. 77252
X# (713) 527-4828.
X# thomp@rice.edu
X#
X# August 22, 1989
X#
X# ARGUMENTS:
X#
X# obs: matrix of observed data.
X# ngen: number of pseudo data points to generate.
X# default is number of observed data points.
X# nneigh: number of nearest neighbors to use in
X# generating pseudo data. for nrow(obs) <= 50 ,
X# the default is .10*nrow(obs); for 50 < nrow(obs)
X# <= 100, 0.07*nrow(obs); for 100 < nrow(obs)
X# <= 200, 0.05*nrow(obs);for nrow(obs) > 200,
X# 0.02*nrow(obs).
X# prt : if TRUE, the values of ngen and nneigh are
X# printed and message is printed each time a
X# data point is simulated. default is FALSE.
X#
X# VALUE:
X#
X# an ngen by ncol(obs) matrix of pseudo data points sampled
X# from a nonparametric estimate of the density of obs.
X#
X x <- NULL
X obs <- as.matrix(obs)
X numobs <- nrow(obs)
X if (missing(ngen)) ngen <- numobs
X if (missing(nneigh))
X {
X if ( numobs <= 50) nneigh <- ceiling(.10* numobs)
X if ( numobs > 50 & numobs <= 100) nneigh <- ceiling(.075* numobs)
X if ( numobs > 100 & numobs <= 200) nneigh <- ceiling(.05* numobs)
X if ( numobs > 200) nneigh <- ceiling(.02* numobs)
X }
X if (nneigh < 2) nneigh <- 2
X if (prt) {
X print(paste( ngen," points will be generated."))
X print(paste( nneigh," nearest neighbors will be used."))}
X#
X# Select the observed points to be sampled.
X#
X sam <- sample(1: numobs, ngen,replace=TRUE)
X#
X# Generate the pseudo points.
X#
X for (i in 1: ngen) {
X if (prt) {
X print(paste("generating point ",i," of ", ngen))
X cat(' Sampled point number ',i,' = ',sam[i],'\n')
X }
X#
X# Compute the distance from the sampled point to each
X# observed data point.
X#
X if (ncol(obs)==1) {
X xdist <- (obs-obs[sam[i]])^2
X cat(' xdist: ',xdist,'\n')}
X else{
X xdist <- ((sweep(obs,2,obs[( sam)[i],]))^2)%*%
X matrix(1,ncol(obs),1)}
X#
X# Select the nneigh nearest neighbors.
X#
X nearest <- rank( xdist) <= nneigh
X#
X# Store the nearest neighbors.
X#
X if (ncol(obs)==1) {
X xneigh <- obs[ nearest]
X } else {
X xneigh <- obs[ nearest,]}
X#
X# Compute the mean of the nearest neighbors.
X#
X if (ncol(obs)==1) {
X xmean <- mean( xneigh)
X } else {
X xmean <- apply( xneigh,2,"mean")}
X#
X# Compute the random coefficients.
X#
X u <- runif( nneigh,(1-sqrt(3*( nneigh-1)))/ nneigh,
X (1+sqrt(3*( nneigh-1)))/ nneigh)
X#
X# Remove the means from the nearest neighbors, form a
X# linear combination of the centered values using the
X# random coefficients, and add the means back. Add the
X# pseudo observation thus generated to the matrix of
X# pseudo observations.
X#
X if (ncol(obs)==1) {
X x <- rbind( x,sum( u*( xneigh- xmean)) + xmean)
X } else {
X x <- rbind( x,(t( u)%*%sweep( xneigh,
X 2, xmean))+ xmean)}}
X#
X# Cleanup and return value.
X#
return( x )
X}
END_OF_FILE
if test 3644 -ne `wc -c <'gendat'`; then
echo shar: \"'gendat'\" unpacked with wrong size!
fi
# end of 'gendat'
fi
if test -f 'gendat.d' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'gendat.d'\"
else
echo shar: Extracting \"'gendat.d'\" \(1063 characters\)
sed "s/^X//" >'gendat.d' <<'END_OF_FILE'
X.BG
X.FN gendat
X.TL
gendat: Generates a new data set statistically similar to obs.
X
Reference: Thompson and Taylor (Computational Statistics and Data
Analysis 4,pg. 93-101, 1986)
X
X.CS
gendat(obs, ngen=-1, nneigh=-1, prt=FALSE)
X.AG obs
An array (if of single dimension) or a matrix (with ncol = dimension
of data) containing data from whose density a new pseudo-sample
is to be taken.
X.AG ngen
Number of points to generate. Defaults to the length or nrow of obs.
X.AG nneigh
Points generated are random linear combinations of the nneigh nearest
neighbors of a random point (row) from obs. The higher nneigh, the
smoother the density estimate of the generated points will be. Defaults
are provided depending on the size of obs.
X.AG prt
Turns on debug printing.
X.RT
A matrix whose column dimension matches that of obs (one column if obs is an
array) and whose row dimension is ngen. Each row contains a
pseudo-observation from the density of obs.
X.EX
data <- rnorm(100)
gener.dat <- gendat(data)
X.KW density estimation, resampling, random number generation
X.WR
END_OF_FILE
if test 1063 -ne `wc -c <'gendat.d'`; then
echo shar: \"'gendat.d'\" unpacked with wrong size!
fi
# end of 'gendat.d'
fi
echo shar: End of shell archive.
exit 0