@article{RISC121,author = {Károly Bósa and Wolfgang Schreiner},
title = {{Tolerating Stop Failures in Distributed Maple}},
language = {english},
abstract = {In previous work we have introduced some fault tolerance mechanisms to the
parallel computer algebra system Distributed Maple such that
a session may tolerate the failure of computing
nodes and of connections between nodes without overall failure.
In this paper, we extend this
fault tolerance by some advanced
mechanisms. The first one is the reconnection of a node after
a connection failure such that a session does not deadlock.
The second mechanism is the restarting of a node
after a failure such that the session does not fail. The
third mechanism is the change of the root node such that a session
may tolerate also the failure of the root without overall failure.},
journal = {Scalable Computing: Practice and Experience(SCPE)},
volume = {6},
number = {2},
pages = {59--70},
publisher = {NovaPublishers},
isbn_issn = {0},
year = {2005},
month = {July},
note = {Special issue on Dapsys 2002},
refereed = {yes},
keywords = {distributed systems, fault tolerance, computer algebra},
length = {15}
}