<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of hal-03364396</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-22T23:57:52+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">A White Box Analysis of ColBERT</title>
            <author role="aut">
              <persName>
                <forename type="first">Thibault</forename>
                <surname>Formal</surname>
              </persName>
              <idno type="halauthorid">2114463-0</idno>
              <affiliation ref="#struct-541720"/>
              <affiliation ref="#struct-500187"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Benjamin</forename>
                <surname>Piwowarski</surname>
              </persName>
              <email type="md5">c7b41b216d6ec87414d6178690a3906d</email>
              <email type="domain">piwowarski.fr</email>
              <idno type="idhal" notation="string">benjamin-piwowarski</idno>
              <idno type="idhal" notation="numeric">9362</idno>
              <idno type="halauthorid" notation="string">17258-9362</idno>
              <idno type="ARXIV">https://arxiv.org/a/piwowarski_b_1</idno>
              <idno type="ORCID">https://orcid.org/0000-0001-6792-3262</idno>
              <idno type="IDREF">https://www.idref.fr/226846601</idno>
              <affiliation ref="#struct-541720"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Stéphane</forename>
                <surname>Clinchant</surname>
              </persName>
              <idno type="halauthorid">665739-0</idno>
              <affiliation ref="#struct-500187"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Benjamin</forename>
                <surname>Piwowarski</surname>
              </persName>
              <email type="md5">c7b41b216d6ec87414d6178690a3906d</email>
              <email type="domain">piwowarski.fr</email>
            </editor>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2021-10-07 10:35:54</date>
              <date type="whenModified">2024-10-30 13:33:19</date>
              <date type="whenReleased">2021-10-07 12:50:21</date>
              <date type="whenProduced">2021-03-28</date>
              <date type="whenEndEmbargoed">2021-10-07</date>
              <ref type="file" target="https://hal.sorbonne-universite.fr/hal-03364396v1/document">
                <date notBefore="2021-10-07"/>
              </ref>
              <ref type="file" subtype="author" n="1" target="https://hal.sorbonne-universite.fr/hal-03364396v1/file/Formal%20et%20al_2020_A%20White%20Box%20Analysis%20of%20ColBERT.pdf" id="file-3366135-2956876">
                <date notBefore="2021-10-07"/>
              </ref>
              <ref type="externalLink" target="http://arxiv.org/pdf/2012.09650"/>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="180694">
                <persName>
                  <forename>Benjamin</forename>
                  <surname>Piwowarski</surname>
                </persName>
                <email type="md5">c7b41b216d6ec87414d6178690a3906d</email>
                <email type="domain">piwowarski.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">hal-03364396</idno>
            <idno type="halUri">https://hal.sorbonne-universite.fr/hal-03364396</idno>
            <idno type="halBibtex">formal:hal-03364396</idno>
            <idno type="halRefHtml">&lt;i&gt;43rd EUROPEAN CONFERENCE ON INFORMATION RETRIEVAL&lt;/i&gt;, Mar 2021, Lucca (online), Italy. pp.257-263, &lt;a target="_blank" href="https://dx.doi.org/10.1007/978-3-030-72240-1_23"&gt;&amp;#x27E8;10.1007/978-3-030-72240-1_23&amp;#x27E9;&lt;/a&gt;</idno>
            <idno type="halRef">43rd EUROPEAN CONFERENCE ON INFORMATION RETRIEVAL, Mar 2021, Lucca (online), Italy. pp.257-263, &amp;#x27E8;10.1007/978-3-030-72240-1_23&amp;#x27E9;</idno>
            <availability status="restricted">
              <licence target="https://about.hal.science/hal-authorisation-v1/">HAL Authorization<ref corresp="#file-3366135-2956876"/></licence>
            </availability>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="LIP6" corresp="SORBONNE-UNIVERSITE">Laboratoire d'Informatique de Paris 6</idno>
            <idno type="stamp" n="SORBONNE-UNIVERSITE">Sorbonne Université</idno>
            <idno type="stamp" n="SORBONNE-UNIV" corresp="SORBONNE-UNIVERSITE">Sorbonne Université 01/01/2018</idno>
            <idno type="stamp" n="SU-SCIENCES" corresp="SORBONNE-UNIVERSITE">Faculté des Sciences de Sorbonne Université</idno>
            <idno type="stamp" n="TEST-HALCNRS">Collection test HAL CNRS</idno>
            <idno type="stamp" n="SU-TI">Sorbonne Université - Texte Intégral</idno>
            <idno type="stamp" n="ALLIANCE-SU"> Alliance Sorbonne Université</idno>
            <idno type="stamp" n="SUPRA_MATHS_INFO">Mathématiques + Informatique</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="invited" n="0">No</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
            <note type="proceedings" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">A White Box Analysis of ColBERT</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Thibault</forename>
                    <surname>Formal</surname>
                  </persName>
                  <idno type="halauthorid">2114463-0</idno>
                  <affiliation ref="#struct-541720"/>
                  <affiliation ref="#struct-500187"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Benjamin</forename>
                    <surname>Piwowarski</surname>
                  </persName>
                  <email type="md5">c7b41b216d6ec87414d6178690a3906d</email>
                  <email type="domain">piwowarski.fr</email>
                  <idno type="idhal" notation="string">benjamin-piwowarski</idno>
                  <idno type="idhal" notation="numeric">9362</idno>
                  <idno type="halauthorid" notation="string">17258-9362</idno>
                  <idno type="ARXIV">https://arxiv.org/a/piwowarski_b_1</idno>
                  <idno type="ORCID">https://orcid.org/0000-0001-6792-3262</idno>
                  <idno type="IDREF">https://www.idref.fr/226846601</idno>
                  <affiliation ref="#struct-541720"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Stéphane</forename>
                    <surname>Clinchant</surname>
                  </persName>
                  <idno type="halauthorid">665739-0</idno>
                  <affiliation ref="#struct-500187"/>
                </author>
              </analytic>
              <monogr>
                <meeting>
                  <title>43rd EUROPEAN CONFERENCE ON INFORMATION RETRIEVAL</title>
                  <date type="start">2021-03-28</date>
                  <date type="end">2021-04-01</date>
                  <settlement>Lucca (online)</settlement>
                  <country key="IT">Italy</country>
                </meeting>
                <imprint>
                  <publisher>Springer International Publishing</publisher>
                  <biblScope unit="serie">Lecture Notes in Computer Science</biblScope>
                  <biblScope unit="volume">12657</biblScope>
                  <biblScope unit="pp">257-263</biblScope>
                  <date type="datePub">2021-03-30</date>
                </imprint>
              </monogr>
              <idno type="doi">10.1007/978-3-030-72240-1_23</idno>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <keywords scheme="author">
                <term xml:lang="en">BERT</term>
                <term xml:lang="en">Transformer</term>
                <term xml:lang="en">Term Matching</term>
                <term xml:lang="en">Information Retrieval</term>
              </keywords>
              <classCode scheme="halDomain" n="info.info-ir">Computer Science [cs]/Information Retrieval [cs.IR]</classCode>
              <classCode scheme="halDomain" n="info.info-ai">Computer Science [cs]/Artificial Intelligence [cs.AI]</classCode>
              <classCode scheme="halDomain" n="info.info-lg">Computer Science [cs]/Machine Learning [cs.LG]</classCode>
              <classCode scheme="halDomain" n="info.info-tt">Computer Science [cs]/Document and Text Processing</classCode>
              <classCode scheme="halTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halOldTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halTreeTypology" n="COMM">Conference papers</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>Transformer-based models are nowadays state-of-the-art in adhoc Information Retrieval, but their behavior are far from being understood. Recent work has claimed that BERT does not satisfy the classical IR axioms. However, we propose to dissect the matching process of ColBERT, through the analysis of term importance and exact/soft matching patterns. Even if the traditional axioms are not formally verified, our analysis reveals that ColBERT (i) is able to capture a notion of term importance; (ii) relies on exact matches for important terms.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="researchteam" xml:id="struct-541720" status="OLD">
          <orgName>Machine Learning and Information Access</orgName>
          <orgName type="acronym">MLIA</orgName>
          <date type="start">2018-01-01</date>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
          </desc>
          <listRelation>
            <relation active="#struct-541703" type="direct"/>
            <relation active="#struct-413221" type="indirect"/>
            <relation name="UMR7606" active="#struct-441569" type="indirect"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-500187" status="VALID">
          <orgName>Naver Labs Europe [Meylan]</orgName>
          <desc>
            <address>
              <addrLine>6-8 Chemin de Maupertuis38240 Meylan</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.europe.naverlabs.com/</ref>
          </desc>
        </org>
        <org type="laboratory" xml:id="struct-541703" status="VALID">
          <idno type="IdRef">13558292X</idno>
          <idno type="RNSR">199712651U</idno>
          <idno type="ROR">https://ror.org/05krcen59</idno>
          <orgName>LIP6</orgName>
          <date type="start">2018-01-01</date>
          <desc>
            <address>
              <addrLine>4 Place JUSSIEU 75252 PARIS CEDEX 05</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.lip6.fr/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-413221" type="direct"/>
            <relation name="UMR7606" active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="regroupinstitution" xml:id="struct-413221" status="VALID">
          <idno type="IdRef">221333754</idno>
          <idno type="ROR">https://ror.org/02en5vm52</idno>
          <orgName>Sorbonne Université</orgName>
          <orgName type="acronym">SU</orgName>
          <date type="start">2018-01-01</date>
          <desc>
            <address>
              <addrLine>21 rue de l’École de médecine - 75006 Paris</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.sorbonne-universite.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>