@article{10.1109/MASCOTS.2010.24,author = {Palden Lama and Xiaobo Zhou},title = {Autonomic Provisioning with Self-Adaptive Neural Fuzzy Control for End-to-end Delay Guarantee},journal ={Modeling, Analysis, and Simulation of Computer Systems, International Symposium on},volume = {0},issn = {1526-7539},year = {2010},pages = {151-160},doi = {http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2010.24},publisher = {IEEE Computer Society},address = {Los Alamitos, CA, USA},
abstract = {Autonomic server provisioning for performance assurance is a critical issue in data centers. It is important but challenging to guarantee an important performance metric, percentile-based end-to-end delay of requests flowing through a virtualized multi-tier server cluster. It is mainly due to dynamically varying workload and the lack of an accurate system performance model. In this paper, we propose a novel autonomic server allocation approach based on a model-independent and self-adaptive neural fuzzy control. There are model-independent fuzzy controllers that utilize heuristic knowledge in the form of rule base for performance assurance. Those controllers are designed manually on trial and error basis, often not effective in the face of highly dynamic workloads. We design the neural fuzzy controller as a hybrid of control theoretical and machine learning techniques. It is capable of self-constructing its structure and adapting its parameters through fast online learning. Unlike other supervised machine learning techniques, it does not require off-line training. We further enhance the neural fuzzy controller to compensate for the effect of server switching delays. Extensive simulations demonstrate the effectiveness of our new approach in achieving the percentile-based end-to-end delay guarantees. Compared to a rule-based fuzzy controller enabled server allocation approach, the new approach delivers superior performance in the face of highly dynamic workloads. It is robust to workload variation, change in delay target and server switching delays.},}

Taming Energy Costs of Large Enterprise Systems Through Adaptive Provisioning

Markus Hedwig, Albert-Ludwig-University of Freiburg
Simon Malkowski, Georgia Institute of Technology - Main Campus
Dirk Neumann, Albert-Ludwigs-University of Freiburg
Abstract

One of the most pressing concerns in modern datacenter management is the rising cost of operation. Therefore, reducing variable expense, such as energy cost, has become a number one priority. However, reducing energy cost in large distributed enterprise system is an open research topic. These systems are commonly subjected to highly volatile workload processes and characterized by complex performance dependencies. This paper explicitly addresses this challenge and presents a novel approach to Taming Energy Costs of Larger Enterprise Systems (Tecless). Our adaptive provisioning methodology combines a low-level technical perspective on distributed systems with a high-level treatment of workload processes. More concretely, Tecless fuses an empirical bottleneck detection model with a statistical workload prediction model. Our methodology forecasts the system load online, which enables on-demand infrastructure adaption while continuously guaranteeing quality of service. In our analysis we show that the prediction of future workload allows adaptive provisioning with a power saving potential of up 25 percent of the total energy cost.
Recommended Citation

Hedwig, Markus; Malkowski, Simon; and Neumann, Dirk, "Taming Energy Costs of Large Enterprise Systems Through Adaptive Provisioning" (2009). ICIS 2009 Proceedings. Paper 140.
http://aisel.aisnet.org/icis2009/140

@inproceedings{GERMAINRENAUD:2008:INRIA-00287826:1,
    HAL_ID = {inria-00287826},
    URL = {http://hal.inria.fr/inria-00287826/en/},
    title = { {G}rid {D}ifferentiated {S}ervices: a {R}einforcement {L}earning {A}pproach},
    author = {{G}ermain {R}enaud, {C}{\'e}cile and {P}erez, {J}ulien and {K}{\'e}gl, {B}al{\'a}zs and {L}oomis, {C}.},
    abstract = {{L}arge scale production grids are a major case for autonomic computing. {F}ollowing the classical definition of {K}ephart, an autonomic computing system should optimize its own behavior in accordance with high level guidance from humans. {T}his central tenet of this paper is that the combination of utility functions and reinforcement learning ({RL}) can provide a general and efficient method for dynamically allocating grid resources in order to optimize the satisfaction of both endusers and participating institutions. {T}he flexibility of an {RL}based system allows to model the state of the grid, the jobs to be scheduled, and the high-level objectives of the various actors on the grid. {RL}-based scheduling can seamlessly adapt its decisions to changes in the distributions of inter-arrival time, {Q}o{S} requirements, and resource availability. {M}oreover, it requires minimal prior knowledge about the target environment, including user requests and infrastructure. {O}ur experimental results, both on a synthetic workload and a real trace, show that {RL} is not only a realistic alternative to empirical scheduler design, but is able to outperform them.},
    language = {{A}nglais},
    affiliation = {{L}aboratoire de {R}echerche en {I}nformatique - {LRI} - {CNRS} : {UMR}8623 - {U}niversit{\'e} {P}aris {S}ud - {P}aris {XI} - {L}aboratoire de l'{A}cc{\'e}l{\'e}rateur {L}in{\'e}aire - {LAL} - {CNRS} : {UMR}8607 - {IN}2{P}3 - {U}niversit{\'e} {P}aris {S}ud - {P}aris {XI} },
    booktitle = {8th {IEEE} {I}nternational {S}ymposium on {C}luster {C}omputing and the {G}rid },
    address = {{L}yon {F}rance },
    audience = {internationale },
    year = {2008},
    URL = {http://hal.inria.fr/inria-00287826/PDF/RLccg08.pdf},
    abstract = { Large scale production grids are a major case for autonomic computing. Following the classical definition of Kephart, an autonomic computing system should optimize its own behavior in accordance with high level guidance from humans. This central tenet of this paper is that the combination of utility functions and reinforcement learning (RL) can provide a general and efficient method for dynamically allocating grid resources in order to optimize the satisfaction of both endusers and participating institutions. The flexibility of an RLbased system allows to model the state of the grid, the jobs to be scheduled, and the high-level objectives of the various actors on the grid. RL-based scheduling can seamlessly adapt its decisions to changes in the distributions of inter-arrival time, QoS requirements, and resource availability. Moreover, it requires minimal prior knowledge about the target environment, including user requests and infrastructure. Our experimental results, both on a synthetic workload and a real trace, show that RL is not only a realistic alternative to empirical scheduler design, but is able to outperform them.},
}

@article{zeppenfeld2011applying,
  title={{Applying ASoC to Multi-core Applications for Workload Management}},
  author={Zeppenfeld, J. and Bouajila, A. and Stechele, W. and Bernauer, A. and Bringmann, O. and Rosenstiel, W. and Herkersdorf, A.},
  journal={Organic Computing-A Paradigm Shift for Complex Systems},
  pages={461},
  isbn={3034801297},
  year={2011},
  publisher={Birkhauser Verlag AG}

@article{10.1109/DATE.2008.4484668,author = {Ying Tan and Qinru Qiu},title = {A Framework of Stochastic Power Management Using Hidden Markov Model},journal ={Design, Automation and Test in Europe Conference and Exhibition},volume = {0},isbn = {978-3-9810801-3-1},year = {2008},pages = {92-97},doi = {http://doi.ieeecomputersociety.org/10.1109/DATE.2008.4484668},publisher = {IEEE Computer Society},address = {Los Alamitos, CA, USA},
abstract = {The effectiveness of stochastic power management relies on the accurate system and workload model and effective policy optimization. Workload modeling is a machine learning procedure that finds the intrinsic pattern of the incoming tasks based on the observed workload attributes. Markov Decision Process (MDP) based model has been widely adopted for stochastic power management because it delivers provable optimal policy. Given a sequence of observed workload attributes, the hidden Markov model (HMM) of the workload is trained. If the observed workload attributes and states in the workload model do not have one-to-one correspondence, the MDP becomes a Partially Observable Markov Decision Process (POMDP). This paper presents a framework of modeling and optimization for stochastic power management using HMM and POMDP. The proposed technique discovers the HMM of the workload by maximizing the likelihood of the observed attribute sequence. The POMDP optimization is formulated and solved as a quadraticly constrained linear programming (QCLP). Compared with traditional optimization technique, which is based on value iteration, the QCLP based optimization provides superior policy by enabling stochastic control.
},}