% This book is part of the series: % Communications in Computer and Information Science, volume 1315 @InProceedings{Durden_SMC2020_20201218, author = {David J. Durden and Stefan Metzger and Housen Chu and Nathan Collier and Kenneth J. Davis and Ankur R. Desai and Jitendra Kumar and William R. Wieder and Min Xu and Forrest M. Hoffman}, title = {Automated Integration of Continental-Scale Observations in Near-Real Time for Simulation and Analysis of Biosphere--Atmosphere Interaction}, editor = {Jeffrey Nichols and Becky Verastegui and Arthur `Barney' Maccabe and Oscar Hernandez and Suzanne Parete-Koon and Theresa Ahearn}, booktitle = {Driving Scientific and Engineering Discoveries Through the Convergence of HPC, Big Data and AI}, organization = {17th Smoky Mountains Computational Sciences and Engineering Conference, SMC 2020 (August 26--28, 2020)}, publisher = {Springer International Publishing, Cham}, isbn = {978-3-030-63393-6}, pages = {204--225}, doi = {10.1007/978-3-030-63393-6\_14}, day = 18, month = dec, year = 2020, abstract = {The National Ecological Observatory Network (NEON) is a continental-scale observatory with sites across the US collecting standardized ecological observations that will operate for multiple decades. To maximize the utility of NEON data, we envision edge computing systems that gather, calibrate, aggregate, and ingest measurements in an integrated fashion. Edge systems will employ machine learning methods to cross-calibrate, gap-fill and provision data in near-real time to the NEON Data Portal and to High Performance Computing (HPC) systems, running ensembles of Earth system models (ESMs) that assimilate the data. For the first time gridded EC data products and response functions promise to offset pervasive observational biases through evaluating, benchmarking, optimizing parameters, and training new machine learning parameterizations within ESMs all at the same model-grid scale. Leveraging open-source software for EC data analysis, we are already building software infrastructure for integration of near-real time data streams into the International Land Model Benchmarking (ILAMB) package for use by the wider research community. We will present a perspective on the design and integration of end-to-end infrastructure for data acquisition, edge computing, HPC simulation, analysis, and validation, where Artificial Intelligence (AI) approaches are used throughout the distributed workflow to improve accuracy and computational performance.} }