@inproceedings{VanVaerenbergh2012Improving,
affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
author = {Van~Vaerenbergh, Kevin and Rodriguez, Abdel and Gagliolo, Matteo and Vrancx, Peter
and Now\'{e}, Ann and Stoev, Julian and Goossens, Stijn and Pinte, Gregory and Symens, Wym},
title = {Improving wet clutch engagement with Reinforcement Learning},
booktitle = {International Joint Conference on Neural Networks, IJCNN 2012},
note = {Accepted},
event = {http://www.ieee-wcci2012.org/}
}
@inproceedings{Gagliolo2011Policy,
affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
author = {Gagliolo, Matteo and Van~Vaerenbergh, Kevin and Rodriguez, Abdel and Now\'{e}, Ann
and Goossens, Stijn and Pinte, Gregory and Symens, Wym},
title = {Policy search reinforcement learning for automatic wet clutch engagement},
abstract = {In most existing motion control algorithms, a reference trajectory
is tracked, based on a continuous measurement of the system's
response. In many industrial applications, however, it is either
not possible or too expensive to install sensors which measure the
system's output over the complete stroke: instead, the motion can only be
detected at certain discrete positions. The control objective in
these systems is often not to track a complete trajectory
accurately, but rather to achieve a given state at the
sensor locations (e.g. to pass by the sensor at a given time, or
with a given speed).
Model-based control strategies are not suited for the control of these systems,
due to the lack of sensor data. We are currently investigating the potential of
a non-model-based learning strategy, Reinforcement Learning (RL), in
dealing with this kind of discrete sensor information. Here,
we describe ongoing experiments with a wet clutch, which has to be engaged
smoothly yet quickly, without any feedback on piston position.},
pages = {250-255},
booktitle = {15th International Conference on System Theory, Control and Computing
--- ICSTCC 2011},
year = {2011},
editors = {Mihail Voicu},
publisher = {IEEE},
issn = {2086-0465},
isbn = {978-973-621-322-9},
url = {http://ieeexplore.ieee.org/search/freesrchabstract.jsp?arnumber=6085740},
pdf = {http://como.vub.ac.be/~mgagliol/Gagliolo11ICSTCC.pdf},
slides = {http://como.vub.ac.be/~mgagliol/Gagliolo11ICSTCCslides.pdf},
event = {http://www.ace.tuiasi.ro/icstcc2011}
}
@inproceedings{Pinte2011Learning,
affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
author = {Pinte, Gregory and Stoev, Julian and Symens, Wim and Dutta, Abhishek and Zhong, Yu
and Wyns, Bart and De~Keyser, Robin and Depraetere, Bruno and Swevers, Jan
and Gagliolo, Matteo and Now\'{e}, Ann},
title = {Learning strategies for wet clutch control},
abstract = {This paper presents an overview of model-based
(Iterative Learning Control, Model Predictive Control and
Iterative Optimization) and non-model-based (Genetic-based
Machine Learning and Reinforcement Learning) learning
strategies for the control of wet clutches. Based on theoretical
considerations and a validation on an experimental test bench
containing wet clutches, the benefits and drawbacks of the
different strategies are compared. Although after convergence
a good engagement quality can be obtained by all strategies,
only model-based strategies are suited for online applicability.
The convergence time for non-model-based strategies is too long
such that they can only be applied during an offline calibration
phase.},
pages = {467-474},
booktitle = {15th International Conference on System Theory, Control and Computing
--- ICSTCC 2011},
year = {2011},
editors = {Mihail Voicu},
publisher = {IEEE},
issn = {2086-0465},
isbn = {978-973-621-322-9},
event = {http://www.ace.tuiasi.ro/icstcc2011}
}
@misc{Rodriguez2011Improving,
affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
author = {Rodriguez, Abdel and Gagliolo, Matteo and Vrancx, Peter and Grau, Ricardo
and Now\'{e}, Ann},
title = {Improving the performance of Continuous Action Reinforcement Learning Automata},
abstract = {Learning automata (LA) are policy iteration reinforcement learners
that exhibit nice convergence properties in discrete action settings.
Recently, a novel formulation for continuous action reinforcement learning
automata was proposed (CARLA), featuring an interesting exploration strategy,
which is well suited for learning from sparse reward signals.
In this paper, we propose an improvement of the algorithm,
and evaluate its impact on performance comparing with
the original version, as well as with another continuous LA.
The experimental evaluation
is carried out both in simulation and on a real control setup,
showing a clear advantage of our method, which
also performs well in distributed control settings.},
note = {\emph{9th European Workshop on Reinforcement Learning, EWRL 2011}},
year = {2011},
url = {http://ewrl.files.wordpress.com/2011/08/ewrl2011_submission_24.pdf},
event = {http://ewrl.wordpress.com/past-ewrl/ewrl9-2011/}
}
@misc{Gagliolo2011Policy-abs,
affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
author = {Gagliolo, Matteo and Van~Vaerenbergh, Kevin and Rodriguez, Abdel and Now\'{e}, Ann
and Goossens, Stijn and Pinte, Gregory and Symens, Wym},
title = {Policy gradient methods for controlling systems with discrete sensor information},
abstract = {In most existing motion control algorithms, a reference trajectory
is tracked, based on a continuous measurement of the system's
response. In many industrial applications, however, it is either
not possible or too expensive to install sensors which measure the
system's output over the complete stroke: instead, the motion can only be
detected at certain discrete positions. The control objective in
these systems is often not to track a complete trajectory
accurately, but rather to achieve a given state at the
sensor locations (e.g. to pass by the sensor at a given time, or
with a given speed).
Model-based control strategies are not suited for the control of these systems,
due to the lack of sensor data. We are currently investigating the potential of
a non-model-based learning strategy, Reinforcement Learning (RL), in
dealing with this kind of discrete sensor information. Here,
we describe experiments with a simple yet challenging system,
where a single sensor detects the passage of a mass being
pushed by a linear motor.},
note = {\emph{20th Annual Belgian-Dutch Conference on Machine Learning --- BeNeLearn},
online proceedings, p. 115--116},
pages = {115-116},
editors = {Peter van~der~Putten and Cor Veenman and Joaquin Vanschoren and Menno Israel
and Hendrik Blockeel},
year = {2011},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo11BENELEARN.pdf},
event = {http://www.benelearn2011.org/}
}
@article{Gagliolo2011Algorithm,
author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
title = {Algorithm Portfolio Selection as a Bandit Problem with Unbounded Losses},
abstract = {We propose a method that learns to allocate computation time to a given set
of algorithms, of unknown performance, with the aim of solving a given
sequence of problem instances in a minimum time. Analogous \emph{meta-learning}
techniques are typically based on models of algorithm performance, learned
during a separate \emph{offline} training sequence which can be prohibitively
expensive. We adopt instead an \emph{online} approach, named GambleTA, in
which algorithm performance models are iteratively updated, and used to guide
allocation on a sequence of problem instances. GambleTA is a general method
for selecting among two or more alternative algorithm portfolios. Each portfolio
has its own way of allocating computation time to the available algorithms,
possibly based on performance models, in which case its performance is expected
to improve over time, as more runtime data becomes available. The resulting
\emph{exploration-exploitation} trade-off is represented as a bandit problem.
In our previous work, the algorithms corresponded to the arms of the bandit,
and allocations evaluated by the different portfolios were mixed, using a solver
for the bandit problem with expert advice, but this required the setting of
an arbitrary bound on algorithm runtimes, invalidating the optimal regret of
the solver. In this paper, we propose a simpler version of GambleTA, in which
the allocators correspond to the arms, such that a single portfolio is selected
for each instance. The selection is represented as a bandit problem with partial
information, and an \emph{unknown} bound on losses. We devise a solver for this
game, proving a bound on its expected regret. We present experiments based on
results from several solver competitions, in various domains, comparing GambleTA
with another online method.},
journal = {Annals of Mathematics and Artificial Intelligence},
publisher = {Springer Netherlands},
issn = {1012-2443},
pages = {49-86},
volume = {61},
number = {2},
year = {2011},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo11AMAI.pdf},
doi = {10.1007/s10472-011-9228-z}
}
@inproceedings{Goossens2011Reinforcement,
affiliation = {CoMo, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium},
author = {Goossens, Stijn and Pinte, Gregory and Symens, Wym and Gagliolo, Matteo
and Rodriguez, Abdel and Now\'{e}, Ann},
title = {Reinforcement learning for repetitive systems with discrete sensors},
booktitle = {30th Benelux Meeting on Systems and Control --- Book of Abstracts},
pages = {149},
editors = {Clara Ionescu and Robin De Keyser and Patrick Guillaume and Rik Pintelon
and Johan Schoukens and Ilse Smets and Joos Vandewalle and Jan Van Impe
and Jan Swevers},
publisher = {Universiteit Gent},
address = {Gent, Belgium},
isbn = {978-90-9026089-1},
year = {2011},
url = {http://como.vub.ac.be/~mgagliol/Goossens11BENEMEET.pdf},
slides = {http://como.vub.ac.be/~mgagliol/Goossens11BENEMEETslides.pdf},
event = {http://www.beneluxmeeting.org}
}
@incollection{Gagliolo2010Survival,
author = {Matteo Gagliolo and Catherine Legrand},
affiliation = {IRIDIA, CoDE, Universit\'{e} Libre de Bruxelles, Brussels, Belgium},
title = {Algorithm Survival Analysis},
booktitle = {Experimental Methods for the Analysis of Optimization Algorithms},
editor = {Thomas Bartz-Beielstein and Marco Chiarandini and Lu\'{i}s Paquete
and Mike Preuss},
publisher = {Springer},
address = {Berlin, Heidelberg},
isbn = {978-3-642-02538-9},
pages = {161--184},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo10Survival.pdf},
doi = {10.1007/978-3-642-02538-9_7},
abstract = {Algorithm selection is typically based on models of algorithm
performance,learned during a separate \emph{offline} training sequence,
which can be prohibitively expensive. In recent work, we adopted
an \emph{online} approach, in which models of the runtime distributions of
the available algorithms are iteratively updated and used to guide
the allocation of computational resources, while solving a sequence
of problem instances. The models are estimated using survival analysis
techniques, which allow us to reduce computation time, censoring the
runtimes of the slower algorithms. Here, we review the statistical
aspects of our online selection method, discussing the bias induced
in the runtime distributions (RTD) models by the competition of
different algorithms on the same problem instances.},
year = {2010}
}
@inproceedings{Gagliolo2010Selection,
author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
affiliation = {IDSIA Galleria 2 6928 Manno (Lugano) Switzerland},
title = {Algorithm Selection as a Bandit Problem with Unbounded Losses},
abstract = {Algorithm selection is typically based on models of algorithm
performance learned during a separate \emph{offline} training sequence,
which can be prohibitively expensive. In recent work, we adopted
an \emph{online} approach, in which a performance model is iteratively
updated and used to guide selection on a sequence of problem instances.
The resulting \emph{exploration-exploitation} trade-off was represented as a
bandit problem with expert advice, using an existing solver for this game,
but this required the setting of an arbitrary bound on algorithm runtimes,
thus invalidating the optimal regret of the solver. In this paper, we
propose a simpler framework for representing algorithm selection as a bandit
problem, with partial information, and an \emph{unknown} bound on losses. We adapt
an existing solver to this game, proving a bound on its expected regret, which
holds also for the resulting algorithm selection technique. We present
experiments with a set of SAT solvers on a mixed SAT-UNSAT benchmark.},
booktitle = {Learning and Intelligent Optimization. 4th International Conference, LION 4,
Venice, Italy, January 18-22, 2010. Selected Papers},
editor = {Christian Blum and Roberto Battiti},
pages = {82--96},
publisher = {Springer},
address = {Berlin, Heidelberg},
series = {LNCS},
volume = {6073},
year = {2010},
eprint = {0807.1494},
doi = {10.1007/978-3-642-13800-3_7},
isbn = {978-3-642-13799-0},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo10LION.pdf},
slides = {http://como.vub.ac.be/~mgagliol/Gagliolo10LIONslides.pdf},
techrep = {http://www.idsia.ch/idsiareport/IDSIA-07-08.pdf},
event = {http://www.intelligent-optimization.org/LION4/}
}
@phdthesis{Gagliolo2010PhD,
author = {Matteo Gagliolo},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {Online Dynamic Algorithm Portfolios},
abstract = {This thesis presents methods for minimizing the computational effort
of problem solving. Rather than looking at a
particular algorithm, we consider the issue of computational complexity at a higher
level, and propose techniques that, given a set of
candidate algorithms, of unknown performance,
learn to use these algorithms \emph{while} solving a
sequence of problem instances, with the aim of solving all
instances in a minimum time.
An analogous \emph{meta-level} approach to problem solving has been
adopted in many different fields, with different aims and
terminology. A widely accepted term to describe
it is \emph{algorithm selection}. Algorithm \emph{portfolios}
represent a more general framework, in which computation
time is allocated to a set of algorithms running on one or more
processors.
Automating algorithm selection
is an old dream of the AI community, which has been
brought closer to reality in the last decade.
Most available selection techniques are based
on a model of algorithm performance, assumed to be available, or
learned during a separate \emph{offline} training sequence,
which is often prohibitively expensive. The model is used
to perform a \emph{static} allocation of resources,
with no feedback from the actual execution of the
algorithms.
There is a trade-off between the performance
of model-based selection, and the cost of learning
the model. In this thesis, we formulate this trade-off
as a bandit problem.
We propose GambleTA, a fully dynamic and
online algorithm portfolio selection technique, with no
separate training phase: all candidate algorithms are run in parallel,
while a model incrementally learns their runtime distributions.
A redundant set of \emph{time allocators}
uses the partially trained model to optimize
machine time shares for the algorithms,
in order to minimize runtime.
A bandit problem solver picks the allocator to use
on each instance, gradually increasing
the impact of the best time allocators
as the model improves. A similar approach is adopted
for learning restart strategies online (GambleR).
In both cases, the runtime distributions are modeled
using survival analysis techniques; unsuccessful runs
are correctly considered as censored runtime observations,
allowing to save further computation time.
The methods proposed are validated with several experiments, mostly
based on data from solver competitions, displaying a robust performance
in a variety of settings, and showing that rough performance
models already allow to allocate resources efficiently,
reducing the risk of wasting computation time.},
school = {IDSIA/University of Lugano},
address = {Lugano, Switzerland},
year = {2010},
month = {March},
day = {24},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo10PhD.pdf},
slides = {http://como.vub.ac.be/~mgagliol/Gagliolo10PhDslides.pdf}
}
@inproceedings{Gagliolo2009Mixed,
author = {Matteo Gagliolo and Catherine Legrand and Mauro Birattari},
affiliation = {Universit\'{e} Libre de Bruxelles IRIDIA, CoDE Brussels Belgium},
title = {Mixed-Effects Modeling of Optimisation Algorithm Performance.},
abstract = {The learning curves of optimisation algorithms, plotting the evolution
of the objective vs. runtime spent. can be viewed as a sample of
\emph{longitudinal} data. In this paper we describe \emph{mixed-effects}
modeling, a standard technique in longitudinal data analysis, and give
an example of its application to algorithm performance modeling.},
booktitle = {Engineering Stochastic Local Search Algorithms. Designing,
Implementing and Analyzing Effective Heuristics, Second
International Workshop, SLS 2009, Brussels, Belgium, September
3--4, 2009. Proceedings},
editor = {Thomas St\"{u}tzle and Mauro Birattari and Holger H. Hoos},
pages = {150--154},
publisher = {Springer},
series = {LNCS},
volume = {5752},
year = {2009},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo09SLS.pdf},
poster = {http://como.vub.ac.be/~mgagliol/Gagliolo09SLSposter.pdf},
doi = {10.1007/978-3-642-03751-1_17},
isbn = {978-3-642-03750-4},
event = {http://iridia.ulb.ac.be/sls2009/}
}
@inproceedings{Gagliolo2008Towards,
author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
affiliation = {IDSIA Galleria 2 6928 Manno (Lugano) Switzerland},
title = {Towards Distributed Algorithm Portfolios},
abstract = {In recent work we have developed an online algorithm selection technique,
in which a model of algorithm performance is learned incrementally \emph{while}
being used. The resulting \emph{exploration-exploitation} trade-off is solved as a
bandit problem. The candidate solvers are run in parallel on a single machine,
as an \emph{algorithm portfolio}, and computation time is shared among them according
to their expected performances. In this paper, we extend our technique to the
more interesting and practical case of multiple CPUs.},
booktitle = {International Symposium on Distributed Computing and Artificial Intelligence
2008 (DCAI 2008)},
editor = {Juan M. Corchado and Sara Rodr\'{i}guez and James Llinas and Jos\'{e} M. Molina},
publisher = {Springer},
series = {Advances in Soft Computing},
volume = {50},
pages = {634--643},
year = {2009},
doi = {10.1007/978-3-540-85863-8_75},
isbn = {978-3-540-85862-1},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo08DCAI.pdf},
slides = {http://como.vub.ac.be/~mgagliol/Gagliolo08DCAIslides.pdf},
event = {http://gsii.usal.es/~dcai/}
}
@article{Schmidhuber2007Training,
author = {J\"{u}rgen Schmidhuber and Daan Wierstra and Matteo Gagliolo and
Faustino Gomez},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {Training recurrent networks by Evolino},
abstract = {In recent years, gradient-based LSTM recurrent neural networks
(RNNs) solved many previously RNN-unlearnable tasks. Sometimes,
however, gradient information is of little use for training RNNs,
due to numerous local minima. For such cases, we present a novel
method: EVOlution of systems with LINear Outputs (Evolino). Evolino
evolves weights to the nonlinear, hidden nodes of RNNs while
computing optimal linear mappings from hidden state to output,
using methods such as pseudo-inverse-based linear regression. If we
instead use quadratic programming to maximize the margin, we obtain
the first evolutionary recurrent support vector machines. We show
that Evolino-based LSTM can solve tasks that Echo State nets
(Jaeger, 2004a) cannot and achieves higher accuracy in certain
continuous function generation tasks than conventional gradient
descent RNNs, including gradient-based LSTM.},
journal = {Neural computation},
year = {2007},
volume = {19},
pages = {757--779},
number = {3},
month = {March},
doi = {10.1162/neco.2007.19.3.757},
url = {http://www.idsia.ch/~tino/papers/schmidhuber.neco07.pdf}
}
@article{Gagliolo2007Universal,
author = {Matteo Gagliolo},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {Universal Search},
journal = {Scholarpedia},
year = {2007},
volume = {2},
pages = {2575},
number = {11},
month = {nov},
doi = {10.4249/scholarpedia.2575},
url = {http://www.scholarpedia.org/article/Universal_search}
}
@inproceedings{Gagliolo2007Learning,
author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {Learning restart strategies},
abstract = {Restart strategies are commonly used for minimizing the computational cost of
randomized algorithms, but require prior knowledge of the run-time distribution
in order to be effective. We propose a portfolio of two strategies, one fixed,
with a provable bound on performance, the other based on a model of run-time
distribution, updated as the two strategies are run on a sequence of problems.
Computational resources are allocated probabilistically to the two strategies,
based on their performances, using a well-known K-armed bandit problem solver.
We present bounds on the performance of the resulting technique, and experiments
with a satisfiability problem solver, showing rapid convergence to a near-optimal
execution time.},
booktitle = {IJCAI 2007 --- Twentieth International Joint Conference
on Artificial Intelligence, vol. 1},
year = {2007},
editor = {Manuela M. Veloso},
pages = {792-797},
month = {January},
publisher = {AAAI Press},
url = {http://www.ijcai.org/papers07/Papers/IJCAI07-127.pdf},
slides = {http://como.vub.ac.be/~mgagliol/Gagliolo07IJCAIslides.pdf},
event = {http://www.ijcai-07.org/}
}
@article{Gagliolo2006Learning,
author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
affiliation = {IDSIA Galleria 2 6928 Manno (Lugano) Switzerland},
title = {Learning Dynamic Algorithm Portfolios},
abstract = {Algorithm selection can be performed using a model of runtime distribution,
learned during a preliminary training phase. There is a trade-off between
the performance of model-based algorithm selection, and the cost of learning
the model. In this paper, we treat this trade-off in the context of bandit
problems. We propose a fully dynamic and online algorithm selection technique,
with no separate training phase: all candidate algorithms are run in parallel,
while a model incrementally learns their runtime distributions. A redundant set
of time allocators uses the partially trained model to propose machine time shares
for the algorithms. A bandit problem solver mixes the model-based shares with a
uniform share, gradually increasing the impact of the best time allocators as the
model improves. We present experiments with a set of SAT solvers on a mixed SAT-UNSAT
benchmark; and with a set of solvers for the Auction Winner Determination problem.},
journal = {Annals of Mathematics and Artificial Intelligence},
year = {2006},
volume = {47},
pages = {295--328},
number = {3},
month = {August},
note = {{AI}\&{MATH} 2006 Special Issue},
doi = {10.1007/s10472-006-9036-z},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo06AMAI.pdf},
techrep = {http://www.idsia.ch/idsiareport/IDSIA-02-07.pdf},
slides = {http://como.vub.ac.be/~mgagliol/Gagliolo07LIONslides.pdf}
}
@misc{Gagliolo2006Gambling,
author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {Gambling in a Computationally Expensive Casino: Algorithm Selection
as a Bandit Problem},
abstract = {Automating algorithm selection and parameter tuning
is an old dream of the AI community, which has been
brought closer to reality in the last decade.
Most available techniques are either \emph{oblivious},
with no knowledge transfer across different problems;
or are based on a model of algorithm performance,
learned in a separate \emph{offline} training sequence,
which is often prohibitively expensive.
We describe recent work in which the problem is treated
in a fully \emph{online} setting. A model of algorithm
performance can be learned \emph{and} used to reduce
the cost of learning it. The resulting \emph{exploration-exploitation}
trade-off can be treated in the context of Bandit problems.},
note = {\emph{Online Trading of Exploration and Exploitation} --- NIPS 2006 Workshop},
year = {2006},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo06NIPSEEWS.pdf},
event = {http://www.homepages.ucl.ac.uk/~ucabzhu/OTEE.htm}
}
@inproceedings{Gagliolo2006Impact,
author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
affiliation = {IDSIA, Galleria 2, 6928 Manno (Lugano) Switzerland Switzerland},
title = {Impact of censored sampling on the performance of restart strategies},
abstract = {Algorithm selection, algorithm portfolios, and randomized restarts,
can profit from a probabilistic model of algorithm run-time, to be
estimated from data gathered by solving a set of experiments.
Censored sampling offers a principled way of reducing this initial
training time. We study the trade-off between training time and
model precision by varying the censoring threshold, and analyzing
the consequent impact on the performance of an optimal restart strategy,
based on an estimated model of runtime distribution. We present
experiments with a SAT solver on a graph-coloring benchmark. Due to the
heavy-tailed runtime distribution, a modest censoring can already reduce
training time by a few orders of magnitudes. The nature of the optimization
process underlying the restart strategy renders its performance surprisingly
robust, also to more aggressive censoring.},
booktitle = {Principle and Practice of Constraint Programming --- CP 2006, 12th
International Conference, CP 2006, Nantes, France, September 25-29,
2006, Proceedings},
year = {2006},
editor = {Fr\'ed\'eric Benhamou},
volume = {4204},
series = {LNCS},
pages = {167--181},
month = {September},
publisher = {Springer},
doi = {10.1007/11889205_14},
isbn = {978-3-540-46267-5},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo06CP.pdf},
slides = {http://como.vub.ac.be/~mgagliol/Gagliolo06CPslides.pdf},
event = {http://www.sciences.univ-nantes.fr/cp06/}
}
@misc{Gagliolo2006Dynamic-abs,
author = {Matteo Gagliolo},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {Dynamic Meta Learning},
note = {\emph{50th Anniversary Summit of Artificial Intelligence}, Monte Verit\'a,
Switzerland},
month = {July},
year = {2006},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo06AI50.pdf},
poster = {http://como.vub.ac.be/~mgagliol/Gagliolo06AI50poster.pdf},
event = {http://www.ai50.org/}
}
@inproceedings{Schmidhuber2006Evolino,
author = {J\"{u}rgen Schmidhuber and Matteo Gagliolo and Daan Wierstra and
Faustino Gomez},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {Evolino for Recurrent Support Vector Machines},
booktitle = {ESANN '06 --- 14 th European Symposium on Artificial Neural Networks},
year = {2006},
editor = {Michel Verleysen},
pages = {593--598},
address = {Evere, Belgium},
month = {April},
publisher = {d-side},
isbn = {2-930307-06-4},
eprint = {cs.NE/0512062},
techrep = {http://www.idsia.ch/idsiareport/IDSIA-19-05.v2.pdf},
event = {http://www.dice.ucl.ac.be/esann/index.php?pg=esann06}
}
@misc{Gagliolo2006Dynamic,
author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {Dynamic Algorithm Portfolios},
note = {\emph{AI\&MATH '06 --- Ninth International Symposium
on Artificial Intelligence and Mathematics}},
month = {January},
year = {2006},
url = {http://anytime.cs.umass.edu/aimath06/proceedings/P37.pdf},
slides = {http://como.vub.ac.be/~mgagliol/Gagliolo06AIMATHslides.pdf},
event = {http://anytime.cs.umass.edu/aimath06/}
}
@misc{Gagliolo2005Towards,
author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {Towards Life-Long Meta Learning},
note = {\emph{Inductive Transfer : 10 Years Later} --- NIPS 2005 workshop},
month = {December},
year = {2005},
url = {http://iitrl.acadiau.ca/itws05/Papers/ITWS09-Gagliolo_REV.pdf},
event = {http://iitrl.acadiau.ca/itws05/}
}
@inproceedings{Gagliolo2005Neural,
author = {Matteo Gagliolo and J\"{u}rgen Schmidhuber},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {A Neural Network Model for Inter-Problem Adaptive Online Time Allocation},
abstract = {One aim of Meta-learning techniques is to minimize the time needed for
problem solving, and the effort of parameter hand-tuning, by automating
algorithm selection. The predictive model of algorithm performance needed
for this task often requires long training times. We address the problem
in an online fashion, running multiple algorithms in parallel on a sequence
of tasks, continually updating their relative priorities according to a
neural model that maps their current state to the expected time to the solution.
The model itself is updated at the end of each task, based on the actual
performance of each algorithm. Censored sampling allows us to train the model
effectively, without need of additional exploration after each task's solution.
We present a preliminary experiment in which this new inter-problem technique
learns to outperform a previously proposed intra-problem heuristic.},
booktitle = {Artificial Neural Networks: Formal Models and Their Applications
--- ICANN 2005, 15th International Conference, Warsaw, Poland, September
11-15, 2005, Proceedings, Part 2},
year = {2005},
editor = {W{\l}odzis{\l}aw Duch and Janusz Kacprzyk and Erkki Oja and S{\l}awomir
Zadro\.{z}ny},
pages = {7--12},
month = {September},
publisher = {Springer},
series = {LNCS},
volume = {3697},
doi = {10.1007/11550907_2},
isbn = {978-3-540-28755-1},
url = {http://como.vub.ac.be/~mgagliol/Gagliolo05ICANN.pdf},
slides = {http://como.vub.ac.be/~mgagliol/Gagliolo05ICANNslides.pdf},
event = {http://www.ibspan.waw.pl/ICANN-2005/}
}
@inproceedings{Gagliolo2004Adaptive,
author = {Matteo Gagliolo and Viktor Zhumatiy and J\"{u}rgen Schmidhuber},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {Adaptive Online Time Allocation to Search Algorithms},
abstract = {Given is a search problem or a sequence of search problems, as well
as a set of potentially useful search algorithms. We propose a general
framework for online allocation of computation time to search algorithms
based on experience with their performance so far. In an example instantiation,
we use simple linear extrapolation of performance for allocating time to
various simultaneously running genetic algorithms characterized by different
parameter values. Despite the large number of searchers tested in parallel,
on various tasks this rather general approach compares favorably to a more
specialized state-of-the-art heuristic; in one case it is nearly two orders
of magnitude faster.},
booktitle = {Machine Learning: ECML 2004. Proceedings of the 15th European Conference
on Machine Learning, Pisa, Italy, September 20-24, 2004},
year = {2004},
editor = {J.F. Boulicaut and F.Esposito and F. Giannotti and Dino Pedreschi},
pages = {134--143},
month = {September},
publisher = {Springer},
series = {LNCS},
volume = {3201},
doi = {10.1007/978-3-540-30115-8_15},
isbn = {978-3-540-23105-9},
techrep = {http://www.idsia.ch/idsiareport/IDSIA-23-04.ps.gz},
slides = {http://como.vub.ac.be/~mgagliol/Gagliolo04ECMLslides.pdf},
event = {http://ecmlpkdd.isti.cnr.it/}
}
@inproceedings{Schmidhuber2004Bias-Optimal,
author = {J\"{u}rgen Schmidhuber and Viktor Zhumatiy and Matteo Gagliolo},
affiliation = {IDSIA, Galleria 2, 6928 Manno-Lugano Switzerland},
title = {Bias-Optimal Incremental Learning of Control Sequences for Virtual
Robots},
booktitle = {Proc. 8th Conference on Intelligent Autonomous Systems, IAS--8},
year = {2004},
editor = {F. Groen et al.},
pages = {658--665},
address = {Amsterdam, NL},
publisher = {IOS Press}
}
@inproceedings{Anguita2002MDL,
author = {Davide Anguita and Matteo Gagliolo},
affiliation = {University of Genova, Dept. of Biophysical and Electronic Engineering,
Via Opera Pia 11a I-16145 Genova Italy},
title = {MDL Based Model Selection for Relevance Vector Regression},
abstract = {Relevance Vector regression is a form of Support Vector regression, recently
proposed by M.E.Tipping, which allows a sparse representation of the data.
The Bayesian learning algorithm proposed by the author leaves the partially
open question of how to automatically choose the optimal model. In this paper
we describe a model selection criterion inspired by the Minimum Description Length
(MDL) principle. We show that our proposal is effective in finding the optimal
kernel parameter both on an artificial dataset and a real-world application.},
booktitle = {Artificial Neural Networks --- ICANN 2002},
year = {2002},
editor = {J.R. Dorronsoro},
pages = {468--473},
month = {August},
publisher = {Springer},
series = {LNCS},
volume = {2415},
isbn = {978-3-540-44074-1},
doi = {10.1007/3-540-46084-5_76},
url = {http://como.vub.ac.be/~mgagliol/Anguita02ICANN.pdf}
}
This file was generated by bibtex2html 1.96.