@STRING{LNCS = "Lecture Notes in Computer Science"}
@INPROCEEDINGS{VanVaerenbergh2012Improving,
   affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
   author = {Van~Vaerenbergh, Kevin and Rodriguez, Abdel and Gagliolo, Matteo and Vrancx, Peter 
             and Now\'{e}, Ann and Stoev, Julian and Goossens, Stijn and Pinte, Gregory and Symens, Wym}, 
   title = {Improving wet clutch engagement with Reinforcement Learning},
   booktitle = {International Joint Conference on Neural Networks, IJCNN 2012},
   note = {Accepted},
   event = {http://www.ieee-wcci2012.org/}
}

@INPROCEEDINGS{Gagliolo2011Policy,
   affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
   author = {Gagliolo, Matteo and Van~Vaerenbergh, Kevin and Rodriguez, Abdel and Now\'{e}, Ann 
             and Goossens, Stijn and Pinte, Gregory and Symens, Wym}, 
   title = {Policy search reinforcement learning for automatic wet clutch engagement},
   abstract = {In most existing motion control algorithms, a reference trajectory
                  is tracked, based on a continuous measurement of the system's
                  response. In many industrial applications, however, it is either
                  not possible or too expensive to install sensors which measure the 
                  system's output over the complete stroke: instead, the motion can only be 
                  detected at certain discrete positions. The control objective in
                  these systems is often not to track a complete trajectory
                  accurately, but rather to achieve a given state at the
                  sensor locations (e.g. to pass by the sensor at a given time, or
                  with a given speed). 
                  Model-based control strategies are not suited for the control of these systems,
                  due to the lack of sensor data. We are currently investigating the potential of
                  a non-model-based learning strategy, Reinforcement Learning (RL), in
                  dealing with this kind of discrete sensor information. Here, 
                  we describe ongoing experiments with a wet clutch, which has to be engaged
                  smoothly yet quickly, without any feedback on piston position.},
   pages = {250-255},
   booktitle = {15th International Conference on System Theory, Control and Computing 
                --- ICSTCC 2011},
   year = {2011},
   editors = {Mihail Voicu},
   publisher = {IEEE},
   issn = {2086-0465},
   isbn = {978-973-621-322-9},
   url = {http://ieeexplore.ieee.org/search/freesrchabstract.jsp?arnumber=6085740},
   pdf = {http://como.vub.ac.be/~mgagliol/Gagliolo11ICSTCC.pdf},
   slides = {http://como.vub.ac.be/~mgagliol/Gagliolo11ICSTCCslides.pdf},
   event = {http://www.ace.tuiasi.ro/icstcc2011}
}

@INPROCEEDINGS{Pinte2011Learning,
   affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
   author = {Pinte, Gregory and Stoev, Julian and Symens, Wim and Dutta, Abhishek and Zhong, Yu 
             and Wyns, Bart and De~Keyser, Robin and Depraetere, Bruno and Swevers, Jan 
             and Gagliolo, Matteo and Now\'{e}, Ann}, 
   title = {Learning strategies for wet clutch control},
   abstract = {This paper presents an overview of model-based
               (Iterative Learning Control, Model Predictive Control and 
               Iterative Optimization) and non-model-based (Genetic-based 
               Machine Learning and Reinforcement Learning) learning 
               strategies for the control of wet clutches. Based on theoretical
               considerations and a validation on an experimental test bench
               containing wet clutches, the benefits and drawbacks of the
               different strategies are compared. Although after convergence
               a good engagement quality can be obtained by all strategies,
               only model-based strategies are suited for online applicability.
               The convergence time for non-model-based strategies is too long
               such that they can only be applied during an offline calibration
               phase.},		
   pages = {467-474},
   booktitle = {15th International Conference on System Theory, Control and Computing 
                --- ICSTCC 2011},
   year = {2011},
   editors = {Mihail Voicu},
   publisher = {IEEE},
   issn = {2086-0465},
   isbn = {978-973-621-322-9},
   event = {http://www.ace.tuiasi.ro/icstcc2011}
}

@MISC{Rodriguez2011Improving,
   affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
   author = {Rodriguez, Abdel and Gagliolo, Matteo  and Vrancx, Peter and Grau, Ricardo 
             and Now\'{e}, Ann}, 
   title = {Improving the performance of Continuous Action Reinforcement Learning Automata},
   abstract = {Learning automata (LA) are policy iteration reinforcement learners
	    that exhibit nice convergence properties in discrete action settings.
	    Recently, a novel formulation for continuous action reinforcement learning
	    automata  was proposed (CARLA), featuring an interesting exploration strategy,
	    which is well suited for learning from sparse reward signals.
	    In this paper, we propose an improvement of the algorithm,
	    and evaluate its impact on performance comparing with
	    the original version, as well as with another continuous LA.
	    The experimental evaluation
	    is carried out both in simulation and on a real control setup,
	    showing a clear advantage of our method, which
	    also performs well in distributed control settings.},
   note = {\emph{9th European Workshop on Reinforcement Learning, EWRL 2011}},
   year = {2011},
   url = {http://ewrl.files.wordpress.com/2011/08/ewrl2011_submission_24.pdf},
   event = {http://ewrl.wordpress.com/past-ewrl/ewrl9-2011/}
}

@MISC{Gagliolo2011Policy-abs,
   affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
   author = {Gagliolo, Matteo and Van~Vaerenbergh, Kevin and Rodriguez, Abdel and Now\'{e}, Ann 
             and Goossens, Stijn and Pinte, Gregory and Symens, Wym}, 
   title = {Policy gradient methods for controlling systems with discrete sensor information},
   abstract = {In most existing motion control algorithms, a reference trajectory
	    is tracked, based on a continuous measurement of the system's
	    response. In many industrial applications, however, it is either
	    not possible or too expensive to install sensors which measure the 
	    system's output over the complete stroke: instead, the motion can only be 
	    detected at certain discrete positions. The control objective in
	    these systems is often not to track a complete trajectory
	    accurately, but rather to achieve a given state at the
	    sensor locations (e.g. to pass by the sensor at a given time, or
	    with a given speed). 
	    Model-based control strategies are not suited for the control of these systems,
	    due to the lack of sensor data. We are currently investigating the potential of
	    a non-model-based learning strategy, Reinforcement Learning (RL), in
	    dealing with this kind of discrete sensor information. Here, 
	    we describe experiments with a simple yet challenging system, 
	    where a single sensor detects the passage of a mass being 
	    pushed by a linear motor.},
   note = {\emph{20th Annual Belgian-Dutch Conference on Machine Learning --- BeNeLearn}, 
           online proceedings, p. 115--116},
   pages = {115-116},
   editors = {Peter van~der~Putten and Cor Veenman and Joaquin Vanschoren and Menno Israel 
              and Hendrik Blockeel},
   year = {2011},
   url = {http://como.vub.ac.be/~mgagliol/Gagliolo11BENELEARN.pdf},
   event = {http://www.benelearn2011.org/}
}

@ARTICLE{Gagliolo2011Algorithm,
  author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
  affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
  title = {Algorithm Portfolio Selection as a Bandit Problem with Unbounded Losses},
  abstract = {We propose a method that learns to allocate computation time to a given set 
              of algorithms, of unknown performance, with the aim of solving a given 
              sequence of problem instances in a minimum time. Analogous \emph{meta-learning} 
              techniques  are typically based on models of algorithm performance, learned 
              during a separate \emph{offline} training sequence which can be prohibitively 
              expensive. We adopt instead an \emph{online} approach, named GambleTA, in 
              which algorithm performance models are iteratively updated, and used to guide 
              allocation on a sequence of problem instances. GambleTA is a general method 
              for selecting among two or more alternative algorithm portfolios. Each portfolio 
              has its own way of allocating computation time to the available algorithms, 
              possibly based on performance models, in which case its performance is expected 
              to improve over time, as more runtime data becomes available. The resulting 
              \emph{exploration-exploitation} trade-off is represented as a bandit problem.  
              In our previous work, the algorithms corresponded to the arms of the bandit, 
              and allocations evaluated by the different portfolios were mixed, using a solver 
              for the bandit problem with expert advice,  but this required the setting of 
              an arbitrary bound on algorithm runtimes, invalidating the optimal regret of 
              the solver. In this paper, we propose a simpler version of GambleTA, in which 
              the allocators correspond to the arms, such that a single portfolio is selected 
              for each instance. The selection is represented as a bandit problem with partial 
              information, and an \emph{unknown} bound on losses. We devise a solver for this 
              game, proving a bound on its expected regret. We present experiments based on 
              results from several solver competitions, in various domains, comparing GambleTA 
              with another online method.}, 
  journal = {Annals of Mathematics and Artificial Intelligence},
  publisher = {Springer Netherlands},
  issn = {1012-2443},
  pages = {49-86},
  volume = {61},
  number = {2},
  year = {2011},
  url = {http://como.vub.ac.be/~mgagliol/Gagliolo11AMAI.pdf},
  doi = {10.1007/s10472-011-9228-z}
}
@INPROCEEDINGS{Goossens2011Reinforcement,
   affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
   author = {Goossens, Stijn and Pinte, Gregory and Symens, Wym and Gagliolo, Matteo 
             and Rodriguez, Abdel and Now\'{e}, Ann}, 
   title = {Reinforcement learning for repetitive systems with discrete sensors},
   booktitle = {30th Benelux Meeting on Systems and Control --- Book of Abstracts},
   pages = {149},
   editors = {Clara Ionescu and Robin De Keyser and Patrick Guillaume and Rik Pintelon 
              and Johan Schoukens and Ilse Smets and Joos Vandewalle and Jan Van Impe 
              and Jan Swevers},
   publisher = {Universiteit Gent},
   address = {Gent, Belgium},
   isbn = {978-90-9026089-1},
   year = {2011},
   url = {http://como.vub.ac.be/~mgagliol/Goossens11BENEMEET.pdf},
   slides = {http://como.vub.ac.be/~mgagliol/Goossens11BENEMEETslides.pdf},
   event = {http://www.beneluxmeeting.org}
}
@INCOLLECTION {Gagliolo2010Survival,
   author = {Matteo Gagliolo and Catherine Legrand},
   affiliation = {IRIDIA, CoDE, Universit\'{e} Libre de Bruxelles, Brussels, Belgium},
   title = {Algorithm Survival Analysis},
   booktitle = {Experimental Methods for the Analysis of Optimization Algorithms},
   editor = {Thomas Bartz-Beielstein and Marco Chiarandini and Lu\'{i}s Paquete 
             and Mike Preuss},
   publisher = {Springer},
   address = {Berlin, Heidelberg},
   isbn = {978-3-642-02538-9},
   pages = {161--184},
   url = {http://como.vub.ac.be/~mgagliol/Gagliolo10Survival.pdf},
   doi = {10.1007/978-3-642-02538-9_7},
   abstract = {Algorithm selection is typically based on models of algorithm 
               performance,learned during a separate \emph{offline} training sequence, 
               which can be prohibitively expensive. In recent work, we adopted 
               an \emph{online} approach, in which models of the runtime distributions of 
               the available algorithms are iteratively updated and used to guide 
               the allocation of computational resources, while solving a sequence 
               of problem instances. The models are estimated using survival analysis 
               techniques, which allow us to reduce computation time, censoring the 
               runtimes of the slower algorithms. Here, we review the statistical 
               aspects of our online selection method, discussing the bias induced 
               in the runtime distributions (RTD) models by the competition of 
               different algorithms on the same problem instances.},
   year = {2010}
}
@INPROCEEDINGS{Gagliolo2010Selection,
  author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
  affiliation = {IDSIA Galleria 2 6928 Manno (Lugano) Switzerland},
  title = {Algorithm Selection as a Bandit Problem with Unbounded Losses},
  abstract = {Algorithm selection is typically based on models of algorithm 
              performance learned during a separate \emph{offline} training sequence, 
              which can be prohibitively expensive. In recent work, we adopted 
              an \emph{online} approach, in which a performance model is iteratively 
              updated and used to guide selection on a sequence of problem instances. 
              The resulting \emph{exploration-exploitation} trade-off was represented as a 
              bandit problem with expert advice, using an existing solver for this game, 
              but this required the setting of an arbitrary bound on algorithm runtimes, 
              thus invalidating the optimal regret of the solver. In this paper, we 
              propose a simpler framework for representing algorithm selection as a bandit 
              problem, with partial information, and an \emph{unknown} bound on losses. We adapt 
              an existing solver to this game, proving a bound on its expected regret, which 
              holds also for the resulting algorithm selection technique. We present 
              experiments with a set of SAT solvers on a mixed SAT-UNSAT benchmark.},
  booktitle = {Learning and Intelligent Optimization. 4th International Conference, LION 4, 
               Venice, Italy, January 18-22, 2010. Selected Papers},
  editor = {Christian Blum and Roberto Battiti},
  pages = {82--96},
  publisher = {Springer},
  address = {Berlin, Heidelberg},
  series = {LNCS},
  volume = {6073},
  year = {2010},
  eprint = {0807.1494},
  doi = {10.1007/978-3-642-13800-3_7},
  isbn = {978-3-642-13799-0}, 
  url = {http://como.vub.ac.be/~mgagliol/Gagliolo10LION.pdf},
  slides = {http://como.vub.ac.be/~mgagliol/Gagliolo10LIONslides.pdf},
  techrep = {http://www.idsia.ch/idsiareport/IDSIA-07-08.pdf},
  event = {http://www.intelligent-optimization.org/LION4/}
}
@PHDTHESIS{Gagliolo2010PhD, 
  author = {Matteo Gagliolo}, 
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {Online Dynamic Algorithm Portfolios}, 
  abstract = {This thesis presents methods for minimizing the computational effort
                of problem solving. Rather than looking at a
		particular algorithm, we consider the issue of computational complexity at a higher
		level, and propose techniques that, given a set of 
		candidate algorithms, of unknown performance,
		learn to use these algorithms \emph{while} solving a
		sequence of problem instances, with the aim of solving all
		instances in a minimum time. 
		An analogous \emph{meta-level} approach to problem solving has been
		adopted in many different fields, with different aims and
		terminology. A widely accepted term to describe
		it is \emph{algorithm selection}. Algorithm \emph{portfolios} 
		represent a more general framework, in which computation 
		time is allocated to a set of algorithms running on one or more
		processors.

		Automating algorithm selection
		is an old dream of the AI community, which has been
		brought closer to reality in the last decade. 
		Most available selection techniques are based 
		on a model of algorithm performance, assumed to be available, or  
		learned during a separate \emph{offline} training sequence,
		which is often prohibitively expensive. The model is used
		to perform a \emph{static} allocation of resources,
		with no feedback from the actual execution of the
		algorithms. 
		There is a trade-off between the performance
		of model-based selection, and the cost of learning
		the model. In this thesis, we formulate this trade-off
		as a bandit problem. 

		We propose GambleTA, a fully dynamic and
		online algorithm portfolio selection technique, with no 
		separate training phase: all candidate algorithms are run in parallel,
		while a model incrementally learns their runtime distributions. 
		A redundant set of \emph{time allocators} 
		uses the partially trained model to optimize
		machine time shares for the algorithms, 
		in order to minimize runtime. 
		A bandit problem solver picks the allocator to use
		on each instance, gradually increasing
		the impact of the best time allocators
		as the model improves. A similar approach is adopted 
		for learning restart strategies online (GambleR). 
		In both cases, the runtime distributions are modeled  
		using survival analysis techniques; unsuccessful runs
		are correctly considered as censored runtime observations, 
		allowing to save further computation time. 

		The methods proposed are validated with several experiments, mostly
		based on data from solver competitions, displaying a robust performance
		in a variety of settings, and showing that rough performance 
		models already allow to allocate resources efficiently, 
		reducing the risk of wasting computation time.},
  school = {IDSIA/University of Lugano}, 
  address = {Lugano, Switzerland}, 
  year = {2010},
  month = {March},
  day = {24}, 
  url = {http://como.vub.ac.be/~mgagliol/Gagliolo10PhD.pdf},
  slides = {http://como.vub.ac.be/~mgagliol/Gagliolo10PhDslides.pdf}
}

@INPROCEEDINGS{Gagliolo2009Mixed,
  author = {Matteo Gagliolo and Catherine Legrand and Mauro Birattari},
  affiliation = {Universit\'{e}  Libre de Bruxelles IRIDIA, CoDE Brussels Belgium},
  title = {Mixed-Effects Modeling of Optimisation Algorithm Performance.},
  abstract = {The learning curves of optimisation algorithms, plotting the evolution 
              of the objective vs. runtime spent. can be viewed as a sample of 
              \emph{longitudinal} data. In this paper we describe \emph{mixed-effects} 
              modeling, a standard technique in longitudinal data analysis, and give 
              an example of its application to algorithm performance modeling.},
  booktitle = {Engineering Stochastic Local Search Algorithms. Designing,
               Implementing and Analyzing Effective Heuristics, Second
               International Workshop, SLS 2009, Brussels, Belgium, September
               3--4, 2009. Proceedings},
  editor = {Thomas St\"{u}tzle and Mauro Birattari and Holger H. Hoos},
  pages = {150--154},
  publisher = {Springer},
  series = {LNCS},
  volume = {5752},
  year = {2009},
  url = {http://como.vub.ac.be/~mgagliol/Gagliolo09SLS.pdf},
  poster = {http://como.vub.ac.be/~mgagliol/Gagliolo09SLSposter.pdf},
  doi = {10.1007/978-3-642-03751-1_17},
  isbn = {978-3-642-03750-4},
  event = {http://iridia.ulb.ac.be/sls2009/}
}

@INPROCEEDINGS{Gagliolo2008Towards,
  author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
  affiliation = {IDSIA Galleria 2 6928 Manno (Lugano) Switzerland},
  title = {Towards Distributed Algorithm Portfolios},
  abstract = {In recent work we have developed an online algorithm selection technique, 
              in which a model of algorithm performance is learned incrementally \emph{while} 
              being used. The resulting \emph{exploration-exploitation} trade-off is solved as a 
              bandit problem. The candidate solvers are run in parallel on a single machine, 
              as an \emph{algorithm portfolio}, and computation time is shared among them according 
              to their expected performances. In this paper, we extend our technique to the 
              more interesting and practical case of multiple CPUs.},
  booktitle = {International Symposium on Distributed Computing and Artificial Intelligence 
               2008 (DCAI 2008)},
  editor = {Juan M. Corchado and Sara Rodr\'{i}guez and James Llinas and Jos\'{e} M. Molina},
  publisher = {Springer},
  series = {Advances in Soft Computing},
  volume = {50},
  pages = {634--643},
  year = {2009},
  doi = {10.1007/978-3-540-85863-8_75},
  isbn = {978-3-540-85862-1},
  url = {http://como.vub.ac.be/~mgagliol/Gagliolo08DCAI.pdf},
  slides = {http://como.vub.ac.be/~mgagliol/Gagliolo08DCAIslides.pdf},
  event = {http://gsii.usal.es/~dcai/}
}

@ARTICLE{Schmidhuber2007Training,
  author = {J\"{u}rgen Schmidhuber and Daan Wierstra and Matteo Gagliolo and
	Faustino Gomez},
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {Training recurrent networks by Evolino},
  abstract = {In recent years, gradient-based LSTM recurrent neural networks 
              (RNNs) solved many previously RNN-unlearnable tasks. Sometimes, 
              however, gradient information is of little use for training RNNs, 
              due to numerous local minima. For such cases, we present a novel 
              method: EVOlution of systems with LINear Outputs (Evolino). Evolino 
              evolves weights to the nonlinear, hidden nodes of RNNs while 
              computing optimal linear mappings from hidden state to output, 
              using methods such as pseudo-inverse-based linear regression. If we 
              instead use quadratic programming to maximize the margin, we obtain 
              the first evolutionary recurrent support vector machines. We show 
              that Evolino-based LSTM can solve tasks that Echo State nets 
              (Jaeger, 2004a) cannot and achieves higher accuracy in certain 
              continuous function generation tasks than conventional gradient 
              descent RNNs, including gradient-based LSTM.},
  journal = {Neural computation},
  year = {2007},
  volume = {19},
  pages = {757--779},
  number = {3},
  month = {March},
  doi = {10.1162/neco.2007.19.3.757},
  url = {http://www.idsia.ch/~tino/papers/schmidhuber.neco07.pdf}
}

@ARTICLE{Gagliolo2007Universal,
  author = {Matteo Gagliolo},
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {Universal Search},
  journal = {Scholarpedia},
  year = {2007},
  volume = {2},
  pages = {2575},
  number = {11},
  month = {nov},
  doi = {10.4249/scholarpedia.2575},
  url = {http://www.scholarpedia.org/article/Universal_search}
}

@INPROCEEDINGS{Gagliolo2007Learning,
  author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {Learning restart strategies},
  abstract = {Restart strategies are commonly used for minimizing the computational cost of 
              randomized algorithms, but require prior knowledge of the run-time distribution 
              in order to be effective. We propose a portfolio of two strategies, one fixed, 
              with a provable bound on performance, the other based on a model of run-time 
              distribution, updated as the two strategies are run on a sequence of problems. 
              Computational resources are allocated probabilistically to the two strategies, 
              based on their performances, using a well-known K-armed bandit problem solver. 
              We present bounds on the performance of the resulting technique, and experiments 
              with a satisfiability problem solver, showing rapid convergence to a near-optimal 
              execution time.},
  booktitle = {IJCAI 2007 --- Twentieth International Joint Conference 
               on Artificial Intelligence, vol. 1},
  year = {2007},
  editor = {Manuela M. Veloso},
  pages = {792-797},
  month = {January},
  publisher = {AAAI Press},
  url = {http://www.ijcai.org/papers07/Papers/IJCAI07-127.pdf},
  slides = {http://como.vub.ac.be/~mgagliol/Gagliolo07IJCAIslides.pdf},
  event = {http://www.ijcai-07.org/}
}

@ARTICLE{Gagliolo2006Learning,
  author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
  affiliation = {IDSIA Galleria 2 6928 Manno (Lugano) Switzerland},
  title = {Learning Dynamic Algorithm Portfolios},
  abstract = {Algorithm selection can be performed using a model of runtime distribution, 
              learned during a preliminary training phase. There is a trade-off between 
              the performance of model-based algorithm selection, and the cost of learning 
              the model. In this paper, we treat this trade-off in the context of bandit 
              problems. We propose a fully dynamic and online algorithm selection technique, 
              with no separate training phase: all candidate algorithms are run in parallel, 
              while a model incrementally learns their runtime distributions. A redundant set 
              of time allocators uses the partially trained model to propose machine time shares 
              for the algorithms. A bandit problem solver mixes the model-based shares with a 
              uniform share, gradually increasing the impact of the best time allocators as the 
              model improves. We present experiments with a set of SAT solvers on a mixed SAT-UNSAT 
              benchmark; and with a set of solvers for the Auction Winner Determination problem.},
  journal = {Annals of Mathematics and Artificial Intelligence},
  year = {2006},
  volume = {47},
  pages = {295--328},
  number = {3},
  month = {August},
  note = {{AI}\&{MATH} 2006 Special Issue},
  doi = {10.1007/s10472-006-9036-z},
  url = {http://como.vub.ac.be/~mgagliol/Gagliolo06AMAI.pdf},
  techrep = {http://www.idsia.ch/idsiareport/IDSIA-02-07.pdf},
  slides = {http://como.vub.ac.be/~mgagliol/Gagliolo07LIONslides.pdf}
}

@MISC{Gagliolo2006Gambling,
  author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {Gambling in a Computationally Expensive Casino: Algorithm Selection
	as a Bandit Problem},
  abstract = {Automating algorithm selection and parameter tuning
                  is an old dream of the AI community, which has been
                  brought closer to reality in the last decade.
                  Most available techniques are either \emph{oblivious},
                  with no knowledge transfer across different problems;
                  or are based on a model of algorithm performance,  
                  learned in a separate \emph{offline} training sequence,
                  which is often prohibitively expensive.
                  We describe recent work in which the problem is treated 
                  in a fully \emph{online} setting. A model of algorithm
                  performance can be learned \emph{and}  used to reduce
                  the cost of learning it. The resulting \emph{exploration-exploitation}
                  trade-off can be treated in the context of Bandit problems.},	
  note = {\emph{Online Trading of Exploration and Exploitation} --- NIPS 2006 Workshop},
  year = {2006},
  url = {http://como.vub.ac.be/~mgagliol/Gagliolo06NIPSEEWS.pdf},
  event = {http://www.homepages.ucl.ac.uk/~ucabzhu/OTEE.htm}
}

@INPROCEEDINGS{Gagliolo2006Impact,
  author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
  affiliation = {IDSIA, Galleria 2, 6928 Manno (Lugano) Switzerland Switzerland},
  title = {Impact of censored sampling on the performance of restart strategies},
  abstract = {Algorithm selection, algorithm portfolios, and randomized restarts, 
              can profit from a probabilistic model of algorithm run-time, to be 
              estimated from data gathered by solving a set of experiments. 
              Censored sampling offers a principled way of reducing this initial 
              training time. We study the trade-off between training time and 
              model precision by varying the censoring threshold, and analyzing 
              the consequent impact on the performance of an optimal restart strategy, 
              based on an estimated model of runtime distribution.  We present 
              experiments with a SAT solver on a graph-coloring benchmark. Due to the  
              heavy-tailed runtime distribution, a modest censoring can already reduce 
              training time by a few orders of magnitudes. The nature of the optimization 
              process underlying the restart strategy renders its performance surprisingly 
              robust, also to more aggressive censoring.},
  booktitle = {Principle and Practice of Constraint Programming --- CP 2006, 12th	
               International Conference, CP 2006, Nantes, France, September 25-29, 
               2006, Proceedings},
  year = {2006},
  editor = {Fr\'ed\'eric Benhamou},
  volume = {4204},
  series = {LNCS},
  pages = {167--181},
  month = {September},
  publisher = {Springer},
  doi = {10.1007/11889205_14},
  isbn = {978-3-540-46267-5},
  url = {http://como.vub.ac.be/~mgagliol/Gagliolo06CP.pdf},
  slides = {http://como.vub.ac.be/~mgagliol/Gagliolo06CPslides.pdf},
  event = {http://www.sciences.univ-nantes.fr/cp06/}
}

@MISC{Gagliolo2006Dynamic-abs,
  author = {Matteo Gagliolo},
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {Dynamic Meta Learning},
  note = {\emph{50th Anniversary Summit of Artificial Intelligence}, Monte Verit\'a,
	Switzerland},
  month = {July},
  year = {2006},
  url = {http://como.vub.ac.be/~mgagliol/Gagliolo06AI50.pdf},
  poster = {http://como.vub.ac.be/~mgagliol/Gagliolo06AI50poster.pdf},
  event = {http://www.ai50.org/}
}

@INPROCEEDINGS{Schmidhuber2006Evolino,
  author = {J\"{u}rgen Schmidhuber and Matteo Gagliolo and Daan Wierstra and
	Faustino Gomez},
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {Evolino for Recurrent Support Vector Machines},
  booktitle = {ESANN '06 --- 14 th European Symposium on Artificial Neural Networks},
  year = {2006},
  editor = {Michel Verleysen},
  pages = {593--598},
  address = {Evere, Belgium},
  month = {April},
  publisher = {d-side},
  isbn = {2-930307-06-4},
  eprint = {cs.NE/0512062},
  techrep = {http://www.idsia.ch/idsiareport/IDSIA-19-05.v2.pdf},
  event = {http://www.dice.ucl.ac.be/esann/index.php?pg=esann06}
}

@MISC{Gagliolo2006Dynamic,
  author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {Dynamic Algorithm Portfolios},
  note = {\emph{AI\&MATH '06 --- Ninth International Symposium 
          on Artificial Intelligence and Mathematics}},
  month = {January},
  year = {2006},
  url = {http://anytime.cs.umass.edu/aimath06/proceedings/P37.pdf},
  slides = {http://como.vub.ac.be/~mgagliol/Gagliolo06AIMATHslides.pdf},
  event = {http://anytime.cs.umass.edu/aimath06/}
}

@MISC{Gagliolo2005Towards,
  author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {Towards Life-Long Meta Learning},
  note = {\emph{Inductive Transfer : 10 Years Later} --- NIPS 2005 workshop},
  month = {December},
  year = {2005},
  url = {http://iitrl.acadiau.ca/itws05/Papers/ITWS09-Gagliolo_REV.pdf},
  event = {http://iitrl.acadiau.ca/itws05/}
}

@INPROCEEDINGS{Gagliolo2005Neural,
  author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {A Neural Network Model for Inter-Problem Adaptive Online Time Allocation},
  abstract = {One aim of Meta-learning techniques is to minimize the time needed for 
              problem solving, and the effort of parameter hand-tuning, by automating 
              algorithm selection. The predictive model of algorithm performance needed 
              for this task often requires long training times. We address the problem 
              in an online fashion, running multiple algorithms in parallel on a sequence 
              of tasks, continually updating their relative priorities according to a 
              neural model that maps their current state to the expected time to the solution. 
              The model itself is updated at the end of each task, based on the actual 
              performance of each algorithm. Censored sampling allows us to train the model 
              effectively, without need of additional exploration after each task's solution. 
              We present a preliminary experiment in which this new inter-problem technique 
              learns to outperform a previously proposed intra-problem heuristic.},
  booktitle = {Artificial Neural Networks: Formal Models and Their Applications
	--- ICANN 2005, 15th International Conference, Warsaw, Poland, September
	11-15, 2005, Proceedings, Part 2},
  year = {2005},
  editor = {W{\l}odzis{\l}aw Duch and Janusz Kacprzyk and Erkki Oja and S{\l}awomir
	Zadro\.{z}ny},
  pages = {7--12},
  month = {September},
  publisher = {Springer},
  series = {LNCS},
  volume = {3697},
  doi = {10.1007/11550907_2},
  isbn = {978-3-540-28755-1}, 
  url = {http://como.vub.ac.be/~mgagliol/Gagliolo05ICANN.pdf},
  slides = {http://como.vub.ac.be/~mgagliol/Gagliolo05ICANNslides.pdf},
  event = {http://www.ibspan.waw.pl/ICANN-2005/}
}

@INPROCEEDINGS{Gagliolo2004Adaptive,
  author = {Matteo Gagliolo and Viktor Zhumatiy and J\"{u}rgen Schmidhuber},
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {Adaptive Online Time Allocation to Search Algorithms},
  abstract = {Given is a search problem or a sequence of search problems, as well 
              as a set of potentially useful search algorithms. We propose a general 
              framework for online allocation of computation time to search algorithms 
              based on experience with their performance so far. In an example instantiation, 
              we use simple linear extrapolation of performance for allocating time to 
              various simultaneously running genetic algorithms characterized by different 
              parameter values. Despite the large number of searchers tested in parallel, 
              on various tasks this rather general approach compares favorably to a more 
              specialized state-of-the-art heuristic; in one case it is nearly two orders 
              of magnitude faster.},
  booktitle = {Machine Learning: ECML 2004. Proceedings of the 15th European Conference
	on Machine Learning, Pisa, Italy, September 20-24, 2004},
  year = {2004},
  editor = {J.F. Boulicaut and F.Esposito and F. Giannotti and Dino Pedreschi},
  pages = {134--143},
  month = {September},
  publisher = {Springer},
  series = {LNCS},
  volume = {3201},
  doi = {10.1007/978-3-540-30115-8_15},
  isbn = {978-3-540-23105-9},
  techrep = {http://www.idsia.ch/idsiareport/IDSIA-23-04.ps.gz},
  slides = {http://como.vub.ac.be/~mgagliol/Gagliolo04ECMLslides.pdf},
  event = {http://ecmlpkdd.isti.cnr.it/}
}

@INPROCEEDINGS{Schmidhuber2004Bias-Optimal,
  author = {J\"{u}rgen Schmidhuber and Viktor Zhumatiy and Matteo Gagliolo},
  affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
  title = {Bias-Optimal Incremental Learning of Control Sequences for Virtual
	Robots},
  booktitle = {Proc. 8th Conference on Intelligent Autonomous Systems, IAS--8},
  year = {2004},
  editor = {F. Groen et al.},
  pages = {658--665},
  address = {Amsterdam, NL},
  publisher = {IOS Press}
}

@INPROCEEDINGS{Anguita2002MDL,
  author = {Davide Anguita and Matteo Gagliolo},
  affiliation = {University of Genova, Dept. of Biophysical and Electronic Engineering, 
                 Via Opera Pia 11a I-16145 Genova Italy},
  title = {MDL Based Model Selection for Relevance Vector Regression},
  abstract = {Relevance Vector regression is a form of Support Vector regression, recently 
              proposed by M.E.Tipping, which allows a sparse representation of the data. 
              The Bayesian learning algorithm proposed by the author leaves the partially 
              open question of how to automatically choose the optimal model.  In this paper 
              we describe a model selection criterion inspired by the Minimum Description Length 
              (MDL) principle. We show that our proposal is effective in finding the optimal 
              kernel parameter both on an artificial dataset and a real-world application.},
  booktitle = {Artificial Neural Networks --- ICANN 2002},
  year = {2002},
  editor = {J.R. Dorronsoro},
  pages = {468--473},
  month = {August},
  publisher = {Springer},
  series = {LNCS},
  volume = {2415},
  isbn ={978-3-540-44074-1},
  doi = {10.1007/3-540-46084-5_76},
  url = {http://como.vub.ac.be/~mgagliol/Anguita02ICANN.pdf}
}

