<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of hal-05366923</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-23T17:09:34+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">Push, See, Predict: Emergent Perception Through Intrinsically Motivated Play</title>
            <author role="aut">
              <persName>
                <forename type="first">Orestis</forename>
                <surname>Konstantaropoulos</surname>
              </persName>
              <email type="md5">796c8bb0d03583ffb08fb5a4166ba6d2</email>
              <email type="domain">gmail.com</email>
              <idno type="idhal" notation="numeric">1608761</idno>
              <idno type="halauthorid" notation="string">3891026-1608761</idno>
              <affiliation ref="#struct-58785"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">George</forename>
                <surname>Retsinas</surname>
              </persName>
              <email type="md5">0d68d18fd45bd76b8babaa734188d63a</email>
              <email type="domain">central.ntua.gr</email>
              <idno type="idhal" notation="numeric">1245277</idno>
              <idno type="halauthorid" notation="string">2779479-1245277</idno>
              <affiliation ref="#struct-58785"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Mehdi</forename>
                <surname>Khamassi</surname>
              </persName>
              <email type="md5">89090976f2a2899545a7140d7b6699af</email>
              <email type="domain">isir.upmc.fr</email>
              <idno type="idhal" notation="string">mehdi-khamassi</idno>
              <idno type="idhal" notation="numeric">186</idno>
              <idno type="halauthorid" notation="string">22061-186</idno>
              <idno type="ORCID">https://orcid.org/0000-0002-2515-1046</idno>
              <idno type="IDREF">https://www.idref.fr/12845072X</idno>
              <affiliation ref="#struct-541937"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Petros</forename>
                <surname>Maragos</surname>
              </persName>
              <email type="md5">68f161876d4f758897a8ae542f4865ee</email>
              <email type="domain">cs.ntua.gr</email>
              <idno type="idhal" notation="numeric">843146</idno>
              <idno type="halauthorid" notation="string">232569-843146</idno>
              <affiliation ref="#struct-58785"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Mehdi</forename>
                <surname>Khamassi</surname>
              </persName>
              <email type="md5">89090976f2a2899545a7140d7b6699af</email>
              <email type="domain">isir.upmc.fr</email>
            </editor>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2025-11-15 11:26:09</date>
              <date type="whenModified">2026-02-07 05:32:56</date>
              <date type="whenReleased">2025-11-18 12:27:26</date>
              <date type="whenProduced">2025-07-19</date>
              <date type="whenEndEmbargoed">2025-11-15</date>
              <ref type="file" target="https://hal.sorbonne-universite.fr/hal-05366923v1/document">
                <date notBefore="2025-11-15"/>
              </ref>
              <ref type="file" subtype="author" n="1" target="https://hal.sorbonne-universite.fr/hal-05366923v1/file/159_Push_See_Predict_Emergent_.pdf" id="file-5366923-4594319">
                <date notBefore="2025-11-15"/>
              </ref>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="172200">
                <persName>
                  <forename>Mehdi</forename>
                  <surname>Khamassi</surname>
                </persName>
                <email type="md5">89090976f2a2899545a7140d7b6699af</email>
                <email type="domain">isir.upmc.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">hal-05366923</idno>
            <idno type="halUri">https://hal.sorbonne-universite.fr/hal-05366923</idno>
            <idno type="halBibtex">konstantaropoulos:hal-05366923</idno>
            <idno type="halRefHtml">&lt;i&gt;Greeks in AI&lt;/i&gt;, Jul 2025, Athènes, Greece</idno>
            <idno type="halRef">Greeks in AI, Jul 2025, Athènes, Greece</idno>
            <availability status="restricted">
              <licence target="https://about.hal.science/hal-authorisation-v1/">HAL Authorization<ref corresp="#file-5366923-4594319"/></licence>
            </availability>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="ISIR" corresp="SORBONNE-UNIVERSITE">Institut des Systèmes Intelligents et de Robotique</idno>
            <idno type="stamp" n="SORBONNE-UNIVERSITE">Sorbonne Université</idno>
            <idno type="stamp" n="SORBONNE-UNIV" corresp="SORBONNE-UNIVERSITE">Sorbonne Université 01/01/2018</idno>
            <idno type="stamp" n="SU-SCIENCES" corresp="SORBONNE-UNIVERSITE">Faculté des Sciences de Sorbonne Université</idno>
            <idno type="stamp" n="SU-TI">Sorbonne Université - Texte Intégral</idno>
            <idno type="stamp" n="ALLIANCE-SU"> Alliance Sorbonne Université</idno>
            <idno type="stamp" n="ISIR_ACIDE" corresp="ISIR">Action, Cognition, Interaction et Décisions Encorporées</idno>
            <idno type="stamp" n="SUPRA_INGENIERIE">Ingénierie</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="invited" n="0">No</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
            <note type="proceedings" n="0">No</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">Push, See, Predict: Emergent Perception Through Intrinsically Motivated Play</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Orestis</forename>
                    <surname>Konstantaropoulos</surname>
                  </persName>
                  <email type="md5">796c8bb0d03583ffb08fb5a4166ba6d2</email>
                  <email type="domain">gmail.com</email>
                  <idno type="idhal" notation="numeric">1608761</idno>
                  <idno type="halauthorid" notation="string">3891026-1608761</idno>
                  <affiliation ref="#struct-58785"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">George</forename>
                    <surname>Retsinas</surname>
                  </persName>
                  <email type="md5">0d68d18fd45bd76b8babaa734188d63a</email>
                  <email type="domain">central.ntua.gr</email>
                  <idno type="idhal" notation="numeric">1245277</idno>
                  <idno type="halauthorid" notation="string">2779479-1245277</idno>
                  <affiliation ref="#struct-58785"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Mehdi</forename>
                    <surname>Khamassi</surname>
                  </persName>
                  <email type="md5">89090976f2a2899545a7140d7b6699af</email>
                  <email type="domain">isir.upmc.fr</email>
                  <idno type="idhal" notation="string">mehdi-khamassi</idno>
                  <idno type="idhal" notation="numeric">186</idno>
                  <idno type="halauthorid" notation="string">22061-186</idno>
                  <idno type="ORCID">https://orcid.org/0000-0002-2515-1046</idno>
                  <idno type="IDREF">https://www.idref.fr/12845072X</idno>
                  <affiliation ref="#struct-541937"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Petros</forename>
                    <surname>Maragos</surname>
                  </persName>
                  <email type="md5">68f161876d4f758897a8ae542f4865ee</email>
                  <email type="domain">cs.ntua.gr</email>
                  <idno type="idhal" notation="numeric">843146</idno>
                  <idno type="halauthorid" notation="string">232569-843146</idno>
                  <affiliation ref="#struct-58785"/>
                </author>
              </analytic>
              <monogr>
                <meeting>
                  <title>Greeks in AI</title>
                  <date type="start">2025-07-19</date>
                  <date type="end">2025-07-20</date>
                  <settlement>Athènes</settlement>
                  <country key="GR">Greece</country>
                </meeting>
                <imprint/>
              </monogr>
              <ref type="publisher">https://www.greeksin.ai</ref>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <keywords scheme="author">
                <term xml:lang="en">World Models</term>
                <term xml:lang="en">Object-Centric Computer Vision</term>
                <term xml:lang="en">Active Perception</term>
                <term xml:lang="en">Robotics and Embodied AI</term>
              </keywords>
              <classCode scheme="halDomain" n="info.info-rb">Computer Science [cs]/Robotics [cs.RO]</classCode>
              <classCode scheme="halDomain" n="info.info-lg">Computer Science [cs]/Machine Learning [cs.LG]</classCode>
              <classCode scheme="halTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halOldTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halTreeTypology" n="COMM">Conference papers</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>Unlike conventional vision systems that rely on passive observation, biological agents learn through physical interaction. Can a robot similarly develop an understanding of its environment purely through interaction, without prior knowledge or external supervision? In this work, we explore how artificial agents can autonomously learn via intrinsic motivation, much like how children engage in curious free play. We propose a novel, fully self-supervised, object-centric learning framework. The system first segments visual input into discrete entities using Slot Attention, trained on data collected from random robotic actions. A graph-based world model is then trained to predict object-centric dynamics but initially struggles to capture object motion due to the limited diversity of the initial interactions. To overcome this, we introduce an intrinsically motivated reward signal based on world model's prediction error, which drives a policy to collect more informative trajectories. This results in up to three times more object displacement than random actions, significantly enriching the dataset. Fine-tuning both the vision and world model on these data improves prediction and reconstruction performance. We validate our method in a simulated robotic environment with diverse objects, demonstrating that meaningful visual and physical representations can emerge entirely from self-supervised interaction. This highlights the potential of intrinsically motivated, object-centric learning for autonomous world perception and modeling.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="institution" xml:id="struct-58785" status="VALID">
          <idno type="ROR">https://ror.org/03cx6bg69</idno>
          <orgName>National Technical University of Athens</orgName>
          <orgName type="acronym">NTUA</orgName>
          <desc>
            <address>
              <addrLine>Patission Complex, 42, Patission str, 10682 Athens Zografou Campus - 9, Iroon Polytechniou str - 15780 Zografou, Athens</addrLine>
              <country key="GR"/>
            </address>
            <ref type="url">https://www.ntua.gr/en/</ref>
          </desc>
        </org>
        <org type="laboratory" xml:id="struct-541937" status="VALID">
          <idno type="IdRef">241179122</idno>
          <idno type="RNSR">200918463J</idno>
          <idno type="ROR">05neq8668</idno>
          <orgName>Institut des Systèmes Intelligents et de Robotique</orgName>
          <orgName type="acronym">ISIR</orgName>
          <date type="start">2018-01-01</date>
          <desc>
            <address>
              <addrLine>Sorbonne-Université, Boite courrier 173 4 Place JUSSIEU 75252 Paris cedex 05</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.isir.upmc.fr</ref>
          </desc>
          <listRelation>
            <relation active="#struct-413221" type="direct"/>
            <relation name="UMR7222" active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="regroupinstitution" xml:id="struct-413221" status="VALID">
          <idno type="IdRef">221333754</idno>
          <idno type="ROR">https://ror.org/02en5vm52</idno>
          <orgName>Sorbonne Université</orgName>
          <orgName type="acronym">SU</orgName>
          <date type="start">2018-01-01</date>
          <desc>
            <address>
              <addrLine>21 rue de l’École de médecine - 75006 Paris</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.sorbonne-universite.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>