% Encoding: UTF-8
@Article{HuKr12c,
author = {Hunter, David R. and Krivitsky, Pavel N. and Schweinberger, Michael},
title = {Computational Statistical Methods ror Social Network Models},
journal = {Journal of Computational and Graphical Statistics},
year = {2012},
volume = {21},
number = {4},
pages = {856--882},
abstract = {We review the broad range of recent statistical work in social network models, with emphasis on computational aspects of these methods. Particular focus is applied to exponential-family random graph models (ERGM) and latent variable models for data on complete networks observed at a single time point, though we also briefly review many methods for incompletely observed networks and networks observed at multiple time points. Although we mention far more modeling techniques than we can possibly cover in depth, we provide numerous citations to current literature. We illustrate several of the methods on a small, well-known network dataset, Sampson's monks, providing code where possible so that these analyses may be duplicated.},
doi = {10.1080/10618600.2012.732921},
keywords = {Degeneracy; ERGM; Latent variables; MCMC MLE; Variational methods},
publisher = {Taylor \& Francis},
}
@InProceedings{KaSl14d,
Title = {Differentially Private Exponential Random Graphs},
Author = {Karwa, Vishesh and Slavkovi{\'c}, Aleksandra B and Krivitsky, Pavel},
Booktitle = {Privacy in Statistical Databases},
Year = {2014},
Editor = {Josep Domingo-Ferrer},
Pages = {143--155},
Publisher = {Springer International Publishing},
Series = {Lecture Notes in Computer Science},
Volume = {8744},
Abstract = {We propose methods to release and analyze synthetic graphs in order to protect privacy of individual relationships captured by the social network. Proposed techniques aim at fitting and estimating a wide class of exponential random graph models (ERGMs) in a differentially private manner, and thus offer rigorous privacy guarantees. More specifically, we use the randomized response mechanism to release networks under ε-edge differential privacy. To maintain utility for statistical inference, treating the original graph as missing, we propose a way to use likelihood based inference and Markov chain Monte Carlo (MCMC) techniques to fit ERGMs to the produced synthetic networks. We demonstrate the usefulness of the proposed techniques on a real data example.},
Doi = {10.1007/978-3-319-11257-2_12},
Keywords = {Exponential random graphs; edge differential privacy; missing data; synthetic graphs}
}
@Article{Kr12e,
Title = {Exponential-Family Random Graph Models for Valued Networks},
Author = {Krivitsky, Pavel N.},
Journal = {Electronic Journal of Statistics},
Year = {2012},
Pages = {1100--1128},
Volume = {6},
Abstract = {Exponential-family random graph models (ERGMs) provide a principled and flexible way to model and simulate features common in social networks, such as propensities for homophily, mutuality, and friend-of-a-friend triad closure, through choice of model terms (sufficient statistics). However, those ERGMs modeling the more complex features have, to date, been limited to binary data: presence or absence of ties. Thus, analysis of valued networks, such as those where counts, measurements, or ranks are observed, has necessitated dichotomizing them, losing information and introducing biases.
In this work, we generalize ERGMs to valued networks. Focusing on modeling counts, we formulate an ERGM for networks whose ties are counts and discuss issues that arise when moving beyond the binary case. We introduce model terms that generalize and model common social network features for such data and apply these methods to a network dataset whose values are counts of interactions.},
Doi = {10.1214/12-EJS696}
}
@TechReport{Kr12m,
Title = {Modeling Tie Duration in ERGM-Based Dynamic Network Models},
Author = {Krivitsky, Pavel N.},
Institution = {Pennsylvania State University Department of Statistics},
Year = {2012},
Month = apr,
Number = {2012-02},
Abstract = {Krivitsky and Handcock (2010) proposed a Separable Temporal ERGM (STERGM) framework for modeling social networks, which facilitates separable modeling of the tie duration distributions and the structural dynamics of tie formation. In this note, we explore the hazard structures achievable in this framework, with first- and higher-order Markov assumptions, and propose ways to model a variety of duration distributions in this framework.},
Url = {http://stat.psu.edu/Research/technical-reports-1/2012-technical-reports}
}
@TechReport{Kr12ma,
Title = {Modeling of Dynamic Networks based on Egocentric Data with Durational Information},
Author = {Krivitsky, Pavel N.},
Institution = {Pennsylvania State University Department of Statistics},
Year = {2012},
Month = apr,
Number = {2012-01},
Abstract = {Modeling of dynamic networks â networks that evolve over time â has manifold applications in many fields. In epidemiology in particular, there is a need for data-driven modeling of human sexual relationship networks for the purpose of modeling and simulation of the spread of sexually transmitted disease. Dynamic network data about such networks are extremely difficult to collect, however, and much more readily available are egocentrically sampled data of a network at a single time point, with some attendant information about the sexual history of respondents.
Krivitsky and Handcock (2010) proposed a Separable Temporal ERGM (STERGM) framework, which facilitates separable modeling of the tie duration distributions and the structural dynamics of tie formation. In this work, we apply this modeling framework to this problem, by studying the long-run properties of STERGM processes, developing methods for fitting STERGMs to egocentrically sampled data, and extending the network size adjustment method of Krivitsky, Handcock, and Morris (2011) to dynamic models.},
Url = {http://stat.psu.edu/Research/technical-reports-1/2012-technical-reports}
}
@PhdThesis{Kr09s,
Title = {Statistical Models for Social Network Data and Processes},
Author = {Krivitsky, Pavel N.},
School = {University of Washington},
Year = {2009},
Address = {Seattle, WA},
Month = aug,
Abstract = {This work deals with three areas of network modeling. First, in the area of latent space modeling of social networks, it develops and extends latent cluster social network models by adding random effects and providing efficient algorithms for fitting these models. Second, it explores properties of ERGM and ERGM-based models under changing network size, and proposes a way of addressing the problems that arise. Third, in the area of dynamic networks, it proposes and develops a model separating tie formation process from tie dissolution process, facilitating flexible and realistic simulation of dynamic networks. Methods for integrating of adjustments for network size changes into the dynamic models are also developed.}
}
@Article{KrHa14s,
Title = {A Separable Model for Dynamic Networks},
Author = {Krivitsky, Pavel N. and Handcock, Mark S.},
Journal = {Journal of the Royal Statistical Society, Series B},
Year = {2014},
Number = {1},
Pages = {29--46},
Volume = {76},
Abstract = {Models of dynamic networks --- networks that evolve over time --- have manifold applications. We develop a discrete-time generative model for social network evolution that inherits the richness and flexibility of the class of exponential-family random graph models. The model facilitates separable modeling of the tie duration distributions and the structural dynamics of tie formation. We develop likelihood-based inference for the model, and provide computational algorithms for maximum likelihood estimation. We illustrate the interpretability of the model in analyzing a longitudinal network of friendship ties within a school.},
Doi = {10.1111/rssb.12014},
Keywords = {Social networks; Longitudinal; Exponential random graph model; Markov chain Monte Carlo; Maximum likelihood estimation}
}
@Article{KrHa08f,
Title = {Fitting Position Latent Cluster Models for Social Networks with \pkg{latentnet}},
Author = {Krivitsky, Pavel N. and Handcock, Mark S.},
Journal = {Journal of Statistical Software},
Year = {2008},
Month = may,
Number = {5},
Pages = {1--23},
Volume = {24},
Abstract = {latentnet is a package to fit and evaluate statistical latent position and cluster models for networks. Hoff, Raftery, and Handcock (2002) suggested an approach to modeling networks based on positing the existence of an latent space of characteristics of the actors. Relationships form as a function of distances between these characteristics as well as functions of observed dyadic level covariates. In latentnet social distances are represented in a Euclidean space. It also includes a variant of the extension of the latent position model to allow for clustering of the positions developed in Handcock, Raftery, and Tantrum (2007). The package implements Bayesian inference for the models based on an Markov chain Monte Carlo algorithm. It can also compute maximum likelihood estimates for the latent position model and a two-stage maximum likelihood method for the latent position cluster model. For latent position cluster models, the package provides a Bayesian way of assessing how many groups there are, and thus whether or not there is any clustering (since if the preferred number of groups is 1, there is little evidence for clustering). It also estimates which cluster each actor belongs to. These estimates are probabilistic, and provide the probability of each actor belonging to each cluster. It computes four types of point estimates for the coefficients and positions: maximum likelihood estimate, posterior mean, posterior mode and the estimator which minimizes Kullback-Leibler divergence from the posterior. You can assess the goodness-of-fit of the model via posterior predictive checks. It has a function to simulate networks from a latent position or latent position cluster model.},
ISSN = {1548-7660},
Url = {http://www.jstatsoft.org/v24/i05}
}
@Article{KrHa11a,
Title = {Adjusting for Network Size and Composition Effects in Exponential-Family Random Graph Models},
Author = {Krivitsky, Pavel N. and Handcock, Mark S. and Morris, Martina},
Journal = {Statistical Methodology},
Year = {2011},
Number = {4},
Volume = {8},
Abstract = {Exponential-family random graph models (ERGMs) provide a principled way to model and simulate features common in human social networks, such as propensities for homophily and friend-of-a-friend triad closure. We show that, without adjustment, ERGMs preserve density as network size increases. Density invariance is often not appropriate for social networks. We suggest a simple modification based on an offset which instead preserves the mean degree and accommodates changes in network composition asymptotically. We demonstrate that this approach allows ERGMs to be applied to the important situation of egocentrically sampled data. We analyze data from the National Health and Social Life Survey (NHSLS).},
Eprint = {http://arxiv.org/abs/1004.5328},
Keywords = {network size; ERGM; random graph; egocentrically-sampled data},
Mon = {jul},
Page = {319--339},
Url = {http://arxiv.org/abs/1004.5328}
}
@Article{KrHa09r,
Title = {Representing Degree Distributions, Clustering, and Homophily in Social Networks with Latent Cluster Random Effects Models},
Author = {Krivitsky, Pavel N. and Handcock, Mark S. and Raftery, Adrian E. and Hoff, Peter D.},
Journal = {Social Networks},
Year = {2009},
Month = jul,
Number = {3},
Pages = {204--213},
Volume = {31},
Abstract = {Social network data often involve transitivity, homophily on observed attributes, community structure, and heterogeneity of actor degrees. We propose a latent cluster random effects model to represent all of these features, and we develop Bayesian inference for it. The model is applicable to both binary and non-binary network data. We illustrate the model using two real datasets: liking between monks and coreaderships between Slovenian publications. We also apply it to two simulated network datasets with very different network structure but the same highly skewed degree sequence generated from a preferential attachment process. One has transitivity and community structure while the other does not. Models based solely on degree distributions, such as scale-free, preferential attachment and power-law models, cannot distinguish between these very different situations, but the latent cluster random effects model does.},
Doi = {10.1016/j.socnet.2009.04.001},
ISSN = {0378-8733}
}
@Article{KrKo15qa,
author = {Pavel N. Krivitsky and Eric D. Kolaczyk},
title = {On the Question of Effective Sample Size in Network Modeling: {An} Asymptotic Inquiry},
journal = {Statistical Science},
year = {2015},
volume = {\textnormal{to appear}},
__markedentry = {[pavel:]},
abstract = {The modeling and analysis of networks and network data has seen an explosion of interest in recent years and represents an exciting direction for potential growth in statistics. Despite the already substantial amount of work done in this area to date by researchers from various disciplines, however, there remain many questions of a decidedly foundational nature --- natural analogues of standard questions already posed and addressed in more classical areas of statistics --- that have yet to even be posed, much less addressed. Here we raise and consider one such question in connection with network modeling. Specifically, we ask, ``Given an observed network, what is the sample size?'' Using simple, illustrative examples from the class of exponential random graph models, we show that the answer to this question can very much depend on basic properties of the networks expected under the model, as the number of vertices $\nverts$ in the network grows. In particular, adopting the (asymptotic) scaling of the variance of the maximum likelihood parameter estimates as a notion of effective sample size ($\neff$), we show \newtxt{that when modeling the overall propensity to have ties and the propensity to reciprocate ties,} whether the networks are sparse or not under the model (i.e., having a constant or an increasing number of ties per vertex, respectively) is sufficient to yield an order of magnitude difference in $\neff$, from $O(\nverts)$ to $O(\nverts[2])$. \newtxt{In addition, we report simulation study results that suggest similar properties for models for triadic (friend-of-a-friend) effects.} We then explore some practical implications of this result, using both simulation and data on food-sharing from Lamalera, Indonesia.},
comments = {7 pages},
eprint = {1112.0840},
keywords = {Asymptotic normality; Consistency; Mutuality; Triadic closure; Exponential-family random graph model; Maximum likelihood},
oai2identifier = {1112.0840},
url = {http://arxiv.org/abs/1112.0840},
}
@InProceedings{RaNe07e,
Title = {Estimating the Integrated Likelihood via Posterior Simulation Using the Harmonic Mean Identity},
Author = {Raftery, Adrian E. and Newton, Michael A. and Satagopan, Jaya M. and Krivitsky, Pavel N.},
Booktitle = {Bayesian Statistics 8: Proceedings of the Valencia/ISBA 8th World Meeting on Bayesian Statistics},
Year = {2007},
Pages = {317--416},
Publisher = {Oxford University Press, USA},
Volume = {8},
Abstract = {The integrated likelihood (also called the marginal likelihood or the normalizing constant) is a central quantity in Bayesian model selection and model averaging. It is defined as the integral over the parameter space of the likelihood times the prior density. The Bayes factor for model comparison and Bayesian testing is a ratio of integrated likelihoods, and the model weights in Bayesian model averaging are proportional to the integrated likelihoods. We consider the estimation of the integrated likelihood from posterior simulation output, aiming at a generic method that uses only the likelihoods from the posterior simulation iterations. The key is the harmonic mean identity, which says that the reciprocal of the integrated likelihood is equal to the posterior harmonic mean of the likelihood. The simplest estimator based on the identity is thus the harmonic mean of the likelihoods. While this is an unbiased and simulation-consistent estimator, its reciprocal can have infinite variance and so it is unstable in general. We describe two methods for stabilizing the harmonic mean estimator. In the first one, the parameter space is reduced in such a way that the modified estimator involves a harmonic mean of heavier-tailed densities, thus resulting in a finite variance estimator. The resulting estimator is stable. It is also self-monitoring, since it obeys the central limit theorem, and so confidence intervals are available. We discuss general conditions under which this reduction is applicable. The second method is based on the fact that the posterior distribution of the log-likelihood is approximately a gamma distribution. This leads to an estimator of the maximum achievable likelihood, and also an estimator of the effective number of parameters that is extremely simple to compute from the loglikelihoods, independent of the model parametrization, and always positive. This yields estimates of the log integrated likelihood, and posterior simulation-based analogues of the BIC and AIC model selection criteria, called BICM and AICM. We illustrate the proposed methods through several examples.},
ISBN = {0199214654}
}
@Article{CaKr15a,
author = {Nicole Bohme Carnegie and Pavel N. Krivitsky and David R. Hunter and Steven M. Goodreau},
title = {An Approximation Method for Improving Dynamic Network Model Fitting},
journal = {Journal of Computational and Graphical Statistics},
year = {2015},
volume = {24},
number = {2},
pages = {502-519},
abstract = { There has been a great deal of interest recently in the modeling and simulation of dynamic networks, that is, networks that change over time. One promising model is the separable temporal exponential-family random graph model (ERGM) of Krivitsky and Handcock, which treats the formation and dissolution of ties in parallel at each time step as independent ERGMs. However, the computational cost of fitting these models can be substantial, particularly for large, sparse networks. Fitting cross-sectional models for observations of a network at a single point in time, while still a nonnegligible computational burden, is much easier. This article examines model fitting when the available data consist of independent measures of cross-sectional network structure and the duration of relationships under the assumption of stationarity. We introduce a simple approximation to the dynamic parameters for sparse networks with relationships of moderate or long duration and show that the approximation method works best in precisely those cases where parameter estimation is most likely to fail—networks with very little change at each time step. We consider a variety of cases: Bernoulli formation and dissolution of ties, independent-tie formation and Bernoulli dissolution, independent-tie formation and dissolution, and dependent-tie formation models. },
doi = {10.1080/10618600.2014.903087},
}
@Article{KrKo15q,
author = {Krivitsky, Pavel N. and Kolaczyk, Eric D.},
title = {On the Question of Effective Sample Size in Network Modeling: An Asymptotic Inquiry},
journal = {Statistical Science},
year = {2015},
volume = {30},
number = {2},
pages = {184--198},
month = {05},
doi = {10.1214/14-STS502},
fjournal = {Statistical Science},
publisher = {The Institute of Mathematical Statistics},
}
@Article{Kr17u,
author = {Pavel N. Krivitsky},
title = {Using Contrastive Divergence to Seed {Monte} {Carlo} {MLE} for Exponential-Family Random Graph Models},
journal = {Computational Statistics \& Data Analysis},
year = {2017},
volume = {107},
pages = {149--161},
month = mar,
abstract = {Exponential-family models for dependent data have applications in a wide variety of areas, but the dependence often results in an intractable likelihood, requiring either analytic approximation or MCMC-based techniques to fit, the latter requiring an initial parameter configuration to seed their simulations. A poor initial configuration can lead to slow convergence or outright failure. The approximate techniques that could be used to find them tend not to be as general as the simulation-based and require implementation separate from that of the MLE-finding algorithm.
Contrastive divergence is a more recent simulation-based approximation technique that uses a series of abridged MCMC runs instead of running them to stationarity. Combining it with the importance sampling Monte Carlo MLE yields a method for obtaining adequate initial values that is applicable to a wide variety of modeling scenarios. Practical issues such as stopping criteria and selection of tuning parameters are also addressed. A simple generalization of the Monte Carlo MLE partial stepping algorithm to curved exponential families (applicable to MLE-finding as well) is also proposed.
The proposed approach reuses the aspects of an MLE implementation that are model-specific, so little to no additional implementer effort is required to obtain adequate initial parameters. This is demonstrated on a series of network datasets and models drawn from exponential-family random graph model computation literature, also exploring the limitations of the techniques considered.},
doi = {10.1016/j.csda.2016.10.015},
keywords = {Curved exponential family ERGM Network data Partial stepping},
}
@Article{KaKr17s,
author = {Vishesh Karwa and Pavel N. Krivitsky and Aleksandra B. Slavkovi\'c},
title = {Sharing Social Network Data: Differentially Private Estimation of Exponential-Family Random Graph Models},
journal = {Journal of the Royal Statistical Society, Series C},
year = {2017},
volume = {66},
number = {3},
pages = {481--500},
__markedentry = {[pavel:]},
abstract = {Motivated by a real-life problem of sharing social network data that contain sensitive personal information, we propose a novel approach to release and analyze synthetic graphs in order to protect privacy of individual relationships captured by the social network while maintaining the validity of statistical results. Two case studies demonstrate the application and usefulness of the proposed techniques in solving the challenging problem of maintaining privacy \emph{and} supporting open access to network data to ensure reproducibility of existing studies and discovering new scientific insights that can be obtained by analyzing such data. We use a simple yet effective randomized response mechanism to generate synthetic networks under $\epsilon$-edge differential privacy. We combine ideas and methods from both the statistics and the computer sciences, by utilizing likelihood based inference for missing data and Markov chain Monte Carlo (MCMC) techniques to fit exponential-family random graph models (ERGMs) to the generated synthetic networks.},
doi = {10.1111/rssc.12185},
}
@Article{KrMo17i,
author = {Pavel N. Krivitsky and Martina Morris},
title = {Inference for Social Network Models from Egocentrically-Sampled Data, with Application to Understanding Persistent Racial Disparities in {HIV} Prevalence in the {US}},
journal = {Annals of Applied Statistics},
year = {2017},
volume = {11},
number = {1},
pages = {427--455},
abstract = {Egocentric network sampling observes the network of interest from the point of view of a set of sampled actors, who provide information about themselves and anonymized information on their network neighbors. In survey research, this is often the most practical, and sometimes the only, way to observe certain classes of networks, with the sexual networks that underlie HIV transmission being the archetypal case. Although methods exist for recovering some descriptive network features, there is no rigorous and practical statistical foundation for estimation and inference for network models from such data. We identify a subclass of exponential-family random graph models (ERGMs) amenable to being estimated from egocentrically sampled network data, and apply pseudo-maximum-likelihood estimation to do so and to rigorously quantify the uncertainty of the estimates. For ERGMs parametrized to be invariant to network size, we describe a computationally tractable approach to this problem. We use this methodology to help understand persistent racial disparities in HIV prevalence in the US. We also discuss some extensions, including how our framework may be applied to triadic effects when data about ties among the respondent’s neighbors are also collected.},
doi = {10.1214/16-AOAS1010},
journaltitle = {Annals of Applied Statistics},
keywords = {social network; ERGM; random graph; egocentrically-sampled data; pseudo maximum likelihood; pseudo likelihood},
}
@Article{KrBu17e,
author = {Krivitsky, Pavel N. and Butts, Carter T.},
title = {Exponential-Family Random Graph Models for Rank-Order Relational Data},
journal = {Sociological Methodology},
year = {2017},
volume = {47},
number = {1},
pages = {68--112},
abstract = {Rank-order relational data, in which each actor ranks the others according to some criterion, often arise from sociometric measurements of judgment (e.g., self-reported interpersonal interaction) or preference (e.g., relative liking). We propose a class of exponential-family models for rank-order relational data and derive a new class of sufficient statistics for such data, which assume no more than within-subject ordinal properties. Application of MCMC MLE to this family allows us to estimate effects for a variety of plausible mechanisms governing rank structure in cross-sectional context, and to model the evolution of such structures over time. We apply this framework to model the evolution of relative liking judgments in an acquaintance process, and to model recall of relative volume of interpersonal interaction among members of a technology education program.},
doi = {10.1177/0081175017692623},
keywords = {ERGM; social networks; ranks; weighted networks; transitivity; mutuality},
}
@TechReport{ScKr17f,
author = {Michael Schweinberger and Pavel N. Krivitsky and Carter T. Butts},
title = {Foundations of Finite-, Super-, and Infinite-Population Random Graph Inference},
year = {2017},
abstract = {An important problem in the statistical analysis of network data is that network data are non-standard data and therefore the meaning of core statistical notions, such as sample and population, is not obvious. All too often, the meaning of such core notions has been left implicit, which has led to considerable confusion. Starting from first principles, we build a statistical framework encompassing a wide range of inference scenarios and distinguish the graph generating process from the observation process. We discuss inference for graphs of fixed size, including finite- and super-population inference, and inference for sequences of graphs of increasing size. We review invariance properties of sequences of graphs of increasing size, including invariance to the labeling of nodes, invariance of expected degrees of nodes, and projectivity, and discuss implications in terms of inference. We conclude with consistency and asymptotic normality results for estimators in finite-, super-, and infinite-population inference scenarios.},
eprint = {1707.04800v1},
eprintclass = {stat.ME},
eprinttype = {arXiv},
keywords = {stat.ME},
url = {https://arxiv.org/abs/1707.04800},
}
@TechReport{ScKr17n,
author = {Michael Schweinberger and Pavel N. Krivitsky and Carter T. Butts},
title = {A note on the role of projectivity in likelihood-based inference for random graph models},
abstract = {There is widespread confusion about the role of projectivity in likelihood-based inference for random graph models. The confusion is rooted in claims that projectivity, a form of marginalizability, may be necessary for likelihood-based inference and consistency of maximum likelihood estimators. We show that likelihood-based superpopulation inference is not affected by lack of projectivity and that projectivity is not a necessary condition for consistency of maximum likelihood estimators.},
date = {2017-07-01},
eprint = {1707.00211v1},
eprintclass = {math.ST},
eprinttype = {arXiv},
keywords = {math.ST, stat.TH},
url = {https://arxiv.org/abs/1707.00211},
}
@Article{CrBu15c,
author = {Noel Cressie and Sandy Burden and Walter Davis and Pavel N. Krivitsky and Payam Mokhtarian and Thomas Suesse and Andrew Zammit-Mangion},
title = {Capturing Multivariate Spatial Dependence: {Model,} Estimate, and then Predict {(Discussion Paper)}},
journal = {Statistical Science},
year = {2015},
volume = {30},
number = {2},
pages = {170--175},
month = may,
abstract = {Physical processes rarely occur in isolation, rather they influence and interact with one another. Thus, there is great benefit in modeling potential dependence between both spatial locations and different processes. It is the interaction between these two dependencies that is the focus of Genton and Kleiber’s paper under discussion. We see the problem of ensuring that any multivariate spatial covariance matrix is nonnegative definite as important, but we also see it as a means to an end. That “end” is solving the scientific problem of predicting a multivariate field.},
doi = {10.1214/15-STS517},
}
@Comment{jabref-meta: databaseType:bibtex;}