@InProceedings{Hoffman_iEMSs-MSTC_20080707, author = {Forrest M. Hoffman and William W. Hargrove and Richard T. Mills and Salil Mahajan and David J. Erickson and Robert J. Oglesby}, title = {{M}ultivariate {S}patio-{T}emporal {C}lustering ({MSTC}) as a Data Mining Tool for Environmental Applications}, booktitle = {Proceedings of the {iEMSs} {F}ourth {B}iennial {M}eeting: {I}nternational {C}ongress on {E}nvironmental {M}odelling and {S}oftware {S}ociety ({iEMSs} 2008)}, editor = {Miquel S\`anchez-Marr\`e and Javier B\'ejar and Joaquim Comas and Andrea E. Rizzoli and Giorgio Guariso}, dates = {7--10 July 2008}, location = {Barcelona, Catalonia, Spain}, pages = {1774--1781}, ISBN = {978-84-7653-074-0}, day = 7, month = jul, year = 2008, abstract = {The authors have applied multivariate cluster analysis to a variety of environmental science domains, including ecological regionalization; environmental monitoring network design; analysis of satellite-, airborne-, and ground-based remote sensing, and climate model-model and model-measurement intercomparison. The clustering methodology employs a $k$-means statistical clustering algorithm that has been implemented in a highly scalable, parallel high performance computing (HPC) application. Because of its efficiency and use of HPC platforms, the clustering code may be applied as a data mining tool to analyze and compare very large data sets of high dimensionality, such as very long or high frequency/resolution time series measurements or model output. The method was originally applied across geographic space and called Multivariate Geographic Clustering (MGC). Now applied across space and through time, the environmental data mining method is called Multivariate Spatio-Temporal Clustering (MSTC). Described here are the clustering algorithm, recent code improvements that significantly reduce the time-to-solution, and a new parallel principal components analysis (PCA) tool that can analyze very large data sets. Finally, a sampling of the authors' applications of MGC and MSTC to problems in the environmental sciences are presented.} }