%PDF-1.4 % 4 0 obj << /S /GoTo /D (section.1) >> endobj 7 0 obj (INTRODUCTION) endobj 8 0 obj << /S /GoTo /D (subsection.1.1) >> endobj 11 0 obj (Lvis: Learning from Value Interval Sampling) endobj 12 0 obj << /S /GoTo /D (section.2) >> endobj 15 0 obj (RELATED WORK) endobj 16 0 obj << /S /GoTo /D (section.3) >> endobj 19 0 obj (Robot Models) endobj 20 0 obj << /S /GoTo /D (section.4) >> endobj 23 0 obj (TECHNICAL APPROACH) endobj 24 0 obj << /S /GoTo /D (subsection.4.1) >> endobj 27 0 obj (Modeling) endobj 28 0 obj << /S /GoTo /D (subsection.4.2) >> endobj 31 0 obj (Data Collection via Optimal Control) endobj 32 0 obj << /S /GoTo /D (subsubsection.4.2.1) >> endobj 35 0 obj (Generating Feasible Solutions as Warm-Starts) endobj 36 0 obj << /S /GoTo /D (subsubsection.4.2.2) >> endobj 39 0 obj (Early Termination) endobj 40 0 obj << /S /GoTo /D (subsection.4.3) >> endobj 43 0 obj (Training the Neural Net) endobj 44 0 obj << /S /GoTo /D (subsubsection.4.3.1) >> endobj 47 0 obj (Loss Function) endobj 48 0 obj << /S /GoTo /D (subsubsection.4.3.2) >> endobj 51 0 obj (Optimization) endobj 52 0 obj << /S /GoTo /D (subsection.4.4) >> endobj 55 0 obj (Online Control Using the Learned Cost) endobj 56 0 obj << /S /GoTo /D (subsection.4.5) >> endobj 59 0 obj (Choosing Initial States with DAgger) endobj 60 0 obj << /S /GoTo /D (subsection.4.6) >> endobj 63 0 obj (Policy Net) endobj 64 0 obj << /S /GoTo /D (section.5) >> endobj 67 0 obj (RESULTS) endobj 68 0 obj << /S /GoTo /D (subsection.5.1) >> endobj 71 0 obj (Cart-Pole With Walls) endobj 72 0 obj << /S /GoTo /D (subsubsection.5.1.1) >> endobj 75 0 obj (Evaluation) endobj 76 0 obj << /S /GoTo /D (subsection.5.2) >> endobj 79 0 obj (Planar Humanoid) endobj 80 0 obj << /S /GoTo /D (subsubsection.5.2.1) >> endobj 83 0 obj (Evaluation) endobj 84 0 obj << /S /GoTo /D (subsubsection.5.2.2) >> endobj 87 0 obj (Capturability Analysis) endobj 88 0 obj << /S /GoTo /D (subsubsection.5.2.3) >> endobj 91 0 obj (The Importance of Intervals) endobj 92 0 obj << /S /GoTo /D (subsection.5.3) >> endobj 95 0 obj (Learning in Parameterized Environments) endobj 96 0 obj << /S /GoTo /D (section.6) >> endobj 99 0 obj (FUTURE WORK) endobj 100 0 obj << /S /GoTo /D (section*.1) >> endobj 103 0 obj (References) endobj 104 0 obj << /S /GoTo /D [105 0 R /Fit] >> endobj 116 0 obj << /Length 4894 /Filter /FlateDecode >> stream xڍ[IFWM#O{ьՖ Y$4ڿ嗙l4DVV֞k:O/|vߏW~4qtz~伽g%4\ Oh>ެ3 t ?ܬ4vޚc.`TΜ$7y`f}R{CԞ?Ei7A4^7u_ls `~+?]Zݜų4PRU;KZGzjnc%&{ΏeR71C7MUH_?r0'[&uބ2xIj`i uv ][|4¨q? b*s$xWC H MtoM-BZ?Aެ$u~ʝGI蜛>#lR.;