<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of hal-02444359</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-22T01:22:16+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">Efficient similarity-based alignment of temporally-situated graph nodes with Apache Spark</title>
            <author role="aut">
              <persName>
                <forename type="first">Hubert</forename>
                <surname>Naacke</surname>
              </persName>
              <email type="md5">fb2112a843a757bff27e0398b762ba0b</email>
              <email type="domain">lip6.fr</email>
              <idno type="idhal" notation="string">hubert-naacke</idno>
              <idno type="idhal" notation="numeric">9627</idno>
              <idno type="halauthorid" notation="string">9851-9627</idno>
              <idno type="ORCID">https://orcid.org/0000-0003-0559-9908</idno>
              <idno type="IDREF">https://www.idref.fr/06104203X</idno>
              <affiliation ref="#struct-541715"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Ke</forename>
                <surname>Li</surname>
              </persName>
              <email type="md5">b1bc7c227245651012e15f00d7e4f56d</email>
              <email type="domain">lip6.fr</email>
              <idno type="idhal" notation="numeric">1063176</idno>
              <idno type="halauthorid" notation="string">208584-1063176</idno>
              <affiliation ref="#struct-541715"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Bernd</forename>
                <surname>Amann</surname>
              </persName>
              <email type="md5">5ffc73d3de10460ec80fc861c8f18e8f</email>
              <email type="domain">lip6.fr</email>
              <idno type="idhal" notation="string">bernd-amann</idno>
              <idno type="idhal" notation="numeric">3057</idno>
              <idno type="halauthorid" notation="string">13288-3057</idno>
              <idno type="ORCID">https://orcid.org/0000-0002-6822-4049</idno>
              <idno type="GOOGLE SCHOLAR">https://scholar.google.com/citations?user=jKHvF80AAAAJ&amp;hl=en</idno>
              <idno type="IDREF">https://www.idref.fr/060259418</idno>
              <affiliation ref="#struct-541715"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Olivier</forename>
                <surname>Curé</surname>
              </persName>
              <email type="md5">8f5647ae62c7899105f9757af96f0258</email>
              <email type="domain">univ-mlv.fr</email>
              <idno type="idhal" notation="string">olivier-cure</idno>
              <idno type="idhal" notation="numeric">18350</idno>
              <idno type="halauthorid" notation="string">7229-18350</idno>
              <idno type="IDREF">https://www.idref.fr/153626011</idno>
              <affiliation ref="#struct-1001627"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Hubert</forename>
                <surname>Naacke</surname>
              </persName>
              <email type="md5">fb2112a843a757bff27e0398b762ba0b</email>
              <email type="domain">lip6.fr</email>
            </editor>
            <funder ref="#projanr-44702"/>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2020-01-17 17:50:06</date>
              <date type="whenModified">2025-10-18 03:29:45</date>
              <date type="whenReleased">2020-01-17 17:50:06</date>
              <date type="whenProduced">2019-12-09</date>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="450218">
                <persName>
                  <forename>Hubert</forename>
                  <surname>Naacke</surname>
                </persName>
                <email type="md5">fb2112a843a757bff27e0398b762ba0b</email>
                <email type="domain">lip6.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">hal-02444359</idno>
            <idno type="halUri">https://hal.sorbonne-universite.fr/hal-02444359</idno>
            <idno type="halBibtex">naacke:hal-02444359</idno>
            <idno type="halRefHtml">&lt;i&gt;IEEE International Conference on Big Data, High Performance Big Graph Data Management, Analysis, and Mining&lt;/i&gt;, Dec 2019, Los Angeles, CA, United States. pp.4793-4798, &lt;a target="_blank" href="https://dx.doi.org/10.1109/BigData47090.2019.9005483"&gt;&amp;#x27E8;10.1109/BigData47090.2019.9005483&amp;#x27E9;&lt;/a&gt;</idno>
            <idno type="halRef">IEEE International Conference on Big Data, High Performance Big Graph Data Management, Analysis, and Mining, Dec 2019, Los Angeles, CA, United States. pp.4793-4798, &amp;#x27E8;10.1109/BigData47090.2019.9005483&amp;#x27E9;</idno>
            <availability status="restricted"/>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="ENPC" corresp="PARISTECH">École nationale des ponts et chaussées </idno>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="PARISTECH">ParisTech</idno>
            <idno type="stamp" n="ENPC-LIGM" corresp="ENPC">Laboratoire d'informatique Gaspard-Monge</idno>
            <idno type="stamp" n="LIGM" corresp="ENPC">Laboratoire d'informatique Gaspard-Monge</idno>
            <idno type="stamp" n="LIGM_MOA" corresp="LIGM">Models and Algorithms</idno>
            <idno type="stamp" n="LIP6" corresp="SORBONNE-UNIVERSITE">Laboratoire d'Informatique de Paris 6</idno>
            <idno type="stamp" n="ESIEE-PARIS">ESIEE Paris</idno>
            <idno type="stamp" n="SORBONNE-UNIVERSITE">Sorbonne Université</idno>
            <idno type="stamp" n="SORBONNE-UNIV" corresp="SORBONNE-UNIVERSITE">Sorbonne Université 01/01/2018</idno>
            <idno type="stamp" n="SU-SCIENCES" corresp="SORBONNE-UNIVERSITE">Faculté des Sciences de Sorbonne Université</idno>
            <idno type="stamp" n="TEST-HALCNRS">Collection test HAL CNRS</idno>
            <idno type="stamp" n="ANR">ANR</idno>
            <idno type="stamp" n="ALLIANCE-SU"> Alliance Sorbonne Université</idno>
            <idno type="stamp" n="UNIV-EIFFEL">Université Gustave Eiffel</idno>
            <idno type="stamp" n="U-EIFFEL">Université Gustave Eiffel</idno>
            <idno type="stamp" n="TEST3-HALCNRS">TEST3-HALCNRS</idno>
            <idno type="stamp" n="LIGM_BAAM" corresp="LIGM">Base de données, Automate, Analyse d'agorithmes et Modèles</idno>
            <idno type="stamp" n="SUPRA_MATHS_INFO">Mathématiques + Informatique</idno>
            <idno type="stamp" n="IP-PARIS-INFORMATIQUE-DONNEES-ET-IA">IP Paris Département d'Informatique, de Données et d'IA</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="invited" n="0">No</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
            <note type="proceedings" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">Efficient similarity-based alignment of temporally-situated graph nodes with Apache Spark</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Hubert</forename>
                    <surname>Naacke</surname>
                  </persName>
                  <email type="md5">fb2112a843a757bff27e0398b762ba0b</email>
                  <email type="domain">lip6.fr</email>
                  <idno type="idhal" notation="string">hubert-naacke</idno>
                  <idno type="idhal" notation="numeric">9627</idno>
                  <idno type="halauthorid" notation="string">9851-9627</idno>
                  <idno type="ORCID">https://orcid.org/0000-0003-0559-9908</idno>
                  <idno type="IDREF">https://www.idref.fr/06104203X</idno>
                  <affiliation ref="#struct-541715"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Ke</forename>
                    <surname>Li</surname>
                  </persName>
                  <email type="md5">b1bc7c227245651012e15f00d7e4f56d</email>
                  <email type="domain">lip6.fr</email>
                  <idno type="idhal" notation="numeric">1063176</idno>
                  <idno type="halauthorid" notation="string">208584-1063176</idno>
                  <affiliation ref="#struct-541715"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Bernd</forename>
                    <surname>Amann</surname>
                  </persName>
                  <email type="md5">5ffc73d3de10460ec80fc861c8f18e8f</email>
                  <email type="domain">lip6.fr</email>
                  <idno type="idhal" notation="string">bernd-amann</idno>
                  <idno type="idhal" notation="numeric">3057</idno>
                  <idno type="halauthorid" notation="string">13288-3057</idno>
                  <idno type="ORCID">https://orcid.org/0000-0002-6822-4049</idno>
                  <idno type="GOOGLE SCHOLAR">https://scholar.google.com/citations?user=jKHvF80AAAAJ&amp;hl=en</idno>
                  <idno type="IDREF">https://www.idref.fr/060259418</idno>
                  <affiliation ref="#struct-541715"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Olivier</forename>
                    <surname>Curé</surname>
                  </persName>
                  <email type="md5">8f5647ae62c7899105f9757af96f0258</email>
                  <email type="domain">univ-mlv.fr</email>
                  <idno type="idhal" notation="string">olivier-cure</idno>
                  <idno type="idhal" notation="numeric">18350</idno>
                  <idno type="halauthorid" notation="string">7229-18350</idno>
                  <idno type="IDREF">https://www.idref.fr/153626011</idno>
                  <affiliation ref="#struct-1001627"/>
                </author>
              </analytic>
              <monogr>
                <idno type="isbn">978-1-7281-0858-2</idno>
                <meeting>
                  <title>IEEE International Conference on Big Data, High Performance Big Graph Data Management, Analysis, and Mining</title>
                  <date type="start">2019-12-09</date>
                  <date type="end">2019-12-12</date>
                  <settlement>Los Angeles, CA</settlement>
                  <country key="US">United States</country>
                </meeting>
                <imprint>
                  <publisher>IEEE</publisher>
                  <biblScope unit="pp">4793-4798</biblScope>
                </imprint>
              </monogr>
              <idno type="doi">10.1109/BigData47090.2019.9005483</idno>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <keywords scheme="author">
                <term xml:lang="en">Big Data</term>
                <term xml:lang="en">Cluster computing</term>
                <term xml:lang="en">Document handling</term>
                <term xml:lang="en">Graph theory</term>
                <term xml:lang="en">Parallel processing</term>
              </keywords>
              <classCode scheme="https://dl.acm.org/ccs" n="ACM2012.H.0.1"/>
              <classCode scheme="https://dl.acm.org/ccs" n="ACM2012.K.1.0"/>
              <classCode scheme="halDomain" n="info.info-db">Computer Science [cs]/Databases [cs.DB]</classCode>
              <classCode scheme="halDomain" n="info.info-dc">Computer Science [cs]/Distributed, Parallel, and Cluster Computing [cs.DC]</classCode>
              <classCode scheme="halDomain" n="info.info-ds">Computer Science [cs]/Data Structures and Algorithms [cs.DS]</classCode>
              <classCode scheme="halTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halOldTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halTreeTypology" n="COMM">Conference papers</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>Topic evolution networks are widely used to represent the evolution of research topics in scientific document archives. These networks might contain thousands of topics and alignment edges which are computed by comparing millions of topic pairs with some similarity function. In this work, we are addressing the problem of computing a very large number cosine-based topic alignments on top of Apache Spark. We present the native map-reduce implementation proposed by Spark and a more efficient implementation which is tuned for alignment computation. Both implementations are evaluated on three real-world datasets.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="researchteam" xml:id="struct-541715" status="VALID">
          <orgName>Bases de Données</orgName>
          <orgName type="acronym">BD</orgName>
          <date type="start">2008-01-01</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
          </desc>
          <listRelation>
            <relation active="#struct-541703" type="direct"/>
            <relation active="#struct-413221" type="indirect"/>
            <relation name="UMR7606" active="#struct-441569" type="indirect"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-1001627" status="OLD">
          <idno type="IdRef">142877891</idno>
          <idno type="RNSR">200212717U</idno>
          <orgName>Laboratoire d'Informatique Gaspard-Monge</orgName>
          <orgName type="acronym">LIGM</orgName>
          <date type="start">2020-01-01</date>
          <date type="end">2024-12-31</date>
          <desc>
            <address>
              <addrLine>Université Gustave Eiffel, Cité Descartes, Bâtiment Copernic, 5 bd Descartes, 77454 Marne-la-Vallée Cedex 2</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://ligm.u-pem.fr</ref>
          </desc>
          <listRelation>
            <relation active="#struct-301545" type="direct"/>
            <relation name="UMR8049" active="#struct-441569" type="direct"/>
            <relation active="#struct-580722" type="direct"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-541703" status="VALID">
          <idno type="IdRef">13558292X</idno>
          <idno type="RNSR">199712651U</idno>
          <idno type="ROR">https://ror.org/05krcen59</idno>
          <orgName>LIP6</orgName>
          <date type="start">2018-01-01</date>
          <desc>
            <address>
              <addrLine>4 Place JUSSIEU 75252 PARIS CEDEX 05</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.lip6.fr/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-413221" type="direct"/>
            <relation name="UMR7606" active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="regroupinstitution" xml:id="struct-413221" status="VALID">
          <idno type="IdRef">221333754</idno>
          <idno type="ROR">https://ror.org/02en5vm52</idno>
          <orgName>Sorbonne Université</orgName>
          <orgName type="acronym">SU</orgName>
          <date type="start">2018-01-01</date>
          <desc>
            <address>
              <addrLine>21 rue de l’École de médecine - 75006 Paris</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.sorbonne-universite.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-301545" status="OLD">
          <idno type="ROR">https://ror.org/02nwvxz07</idno>
          <orgName>École nationale des ponts et chaussées</orgName>
          <orgName type="acronym">ENPC</orgName>
          <date type="start">1747-02-14</date>
          <date type="end">2024-12-31</date>
          <desc>
            <address>
              <addrLine>École nationale des ponts et chaussées, 6-8 avenue Blaise-Pascal, Cité Descartes, Champs-sur-Marne, 77455 Marne-la-Vallée cedex 2</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://ecoledesponts.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-580722" status="VALID">
          <idno type="ROR">https://ror.org/03x42jk29</idno>
          <orgName>Université Gustave Eiffel</orgName>
          <date type="start">2020-01-01</date>
          <desc>
            <address>
              <addrLine>Cité Descartes, 5 Boulevard Descartes • Champs-sur-Marne, 77454 Marne-la-Vallée Cedex 2</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.univ-gustave-eiffel.fr/</ref>
          </desc>
        </org>
      </listOrg>
      <listOrg type="projects">
        <org type="anrProject" xml:id="projanr-44702" status="VALID">
          <idno type="anr">ANR-16-CE38-0002</idno>
          <orgName>EPIQUE</orgName>
          <desc>Reconstruire l'évolution des sciences à grande échelle - vers une épistémologie quantitative</desc>
          <date type="start">2016</date>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>