@InProceedings{Gwo_HPC2000_20000416, author = {Jin-Ping Gwo and Forrest M. Hoffman and William W. Hargrove}, title = {Mechanistic-Based Genetic Algorithm Search on a {B}eowulf Cluster of {L}inux {PC}s}, booktitle = {Proceedings of the High Performance Computing 2000 ({HPC}2000) Conference}, dates = {16--20 April 2000}, location = {Washington, DC}, day = 16, month = apr, year = 2000, abstract = {A simple genetic algorithm (SGA) was implemented on a cluster of Linux PCs to search for the most likely fracture networks in a soil column. The objective is to evaluate the performance of SGAs in a distributed computing environment that is widely and inexpensively available to environmental researchers and engineers. The Beowulf computer was built out of surplus personal computers at Oak Ridge National Laboratory by scientists in the Environmental Sciences Division (http://www.esd.ornl.gov). The communication on the Beowulf is via ordinary Ethernet connection private among the processors, with a peak bandwidth of 10 Mbit/s. The CPUs are mostly Intel 486DX-2/66 and Pentiums, with 16--32 MB of memory. Most of the software on the Beowulf is from the public domain. Using the PVM message passing library and a manager-worker paradigm, we seek to maximize the loads on CPUs of dissimilar speed and memory size. SGA is an inductive search algorithm that bases upon a few simple operators such as reproduction, crossover, and mutation. The underlying mechanisms of flow and transport phenomena in structured soils with discrete fractures are simulated by the computer code FRACTRAN. In a generation of SGA, hundreds of FRACTRAN simulations are required, which consume the majority of the CPU time needed by the SGA search process. For an entire SGA search, tens of millions of such simulations, often referred to as function evaluation in genetic algorithms literature, are performed. The minimal communication between the manager and workers, passing fracture networks represented in bit strings to the workers and bit string fitness back to the manager, suggests that small communication bandwidth is adequate to achieve high performance. The manager-worker paradigm is also highly effective in achieving load balance on heterogeneous, networked computers such as the Beowulf. In addition to reporting the performance of the implementation, we also explore the aspect of SGA related to information constraints. SGA may be trapped in local optima and genetic drifting may ensue. With additional information the SGA may be steered away from local optima and the uncertainty of the identified fracture networks may be reduced. Because multiple runs of the SGA search algorithm are necessary to determine the least uncertain fracture networks, a distributed computing environment proves to be highly effective.} }