noise-tail/index.html at main · moverseai/noise-tail · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <meta name="description"
        content="Noise-in, Bias-out: Balanced and Real-time MoCap Solving.">
  <meta name="keywords" content="Moverse, AI, Motion Capture, MoCap, SMPL, SMPL-X, Body Fitting, Body Estimation, Pose Estimation, Multiview System">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Noise-in, Bias-out: Balanced and Real-time MoCap Solving</title>

  <!-- Global site tag (gtag.js) - Google Analytics
  <script async src="https://www.googletagmanager.com/gtag/js?id=G-XXXXXXXXXXX"></script>
  <script>
    window.dataLayer = window.dataLayer || [];

    function gtag() {
      dataLayer.push(arguments);
    }

    gtag('js', new Date());

    gtag('config', 'G-XXXXXXXXXXXXX');
  </script> -->

  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">

  <link rel="stylesheet" href="./static/css/bulma.min.css">
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
        href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="./static/css/index.css">
  <link rel="icon" href="./static/images/favicon.png">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script defer src="./static/js/fontawesome.all.min.js"></script>
  <script src="./static/js/bulma-carousel.min.js"></script>
  <script src="./static/js/bulma-slider.min.js"></script>
  <script src="./static/js/index.js"></script>

  <link rel="stylesheet" href="./static/css/dics.css">
  <script src="./static/js/dics.js"></script>
  <link rel="stylesheet" href="./static/css/BeerSlider.css"/>
  <script src="./static/js/BeerSlider.js"></script>
</head>
<body>

<nav class="navbar" role="navigation" aria-label="main navigation">
  <div class="navbar-brand">
    <a role="button" class="navbar-burger" aria-label="menu" aria-expanded="false">
      <span aria-hidden="true"></span>
      <span aria-hidden="true"></span>
      <span aria-hidden="true"></span>
    </a>
  </div>
  <div class="navbar-menu">
    <div class="navbar-start" style="flex-grow: 1; justify-content: center;">
      <a class="navbar-item" href="https://www.github.com/moverseai">
      <span class="icon">
          <i class="fas fa-home"></i>
      </span>
      </a>

      <div class="navbar-item has-dropdown is-hoverable">
        <a class="navbar-link">
          More Research
        </a>
        <div class="navbar-dropdown">
          <a class="navbar-item" href="https://moverseai.github.io/noise-tail">
            Long-tail
          </a>
          <a class="navbar-item" href="https://moverseai.github.io/bundle">
            BundleMoCap
          </a>
          <!-- <a class="navbar-item" href="https://moverseai.github.io/Placeholder3">
            Moverse Placeholder #3
          </a>
          <a class="navbar-item" href="https://moverseai.github.io/Placeholder4">
            Moverse Placeholder #4
          </a> -->
        </div>
      </div>
    </div>

  </div>
</nav>

<section class="hero">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <div class="columns is-centered">
        <div class="column has-text-centered">
          <img width=8% src="./static/images/mov_icon.png" class="center">
          <br><br>
          <h1 class="title is-1 publication-title">Noise-in, Bias-out: Balanced and Real-time MoCap Solving</h1>
          <div class="is-size-5 publication-authors">
            <span class="author-block">
                <a href="https://tzole1155.github.io/">Georgos Albanis</a><sup>1, 2</sup>,</span>
            <span class="author-block">
                <a href="https://zokin.github.io">Nikolaos Zioulis</a><sup>1</sup>,</span>
            <span class="author-block">
                <a href="https://spthermo.github.io/">Spyridon Thermos</a><sup>1</sup>,</span>
            <span class="author-block">
                <a href="https://tofis.github.io/">Chatzitofis Anargyros</a><sup>1</sup>,</span>
            <span class="author-block">
                <a href="http://kostasks.users.uth.gr/index.html">Kostas Kolomvatsos</a><sup>2</sup>,</span>
          </div>

          <div class="is-size-5 publication-authors">
            <span class="author-block"><sup>1</sup><a href="https://www.moverse.ai">Moverse</a>,</span>
            <span class="author-block"><sup>2</sup><a href="https://iprism.eu/">University of Thessaly</a></span>
          </div>

          <div class="column has-text-centered">
            <div class="publication-links">
              <!-- PDF Link. -->
              <span class="link-block">
                <a href="https://openaccess.thecvf.com/content/ICCV2023W/CV4Metaverse/papers/Albanis_Noise-in_Bias-out_Balanced_and_Real-Time_MoCap_Solving_ICCVW_2023_paper.pdf"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fas fa-file-pdf"></i>
                  </span>
                  <span>Paper</span>
                </a>
              </span>
              <!-- <span class="link-block">
                <a href="https://openaccess.thecvf.com/content/ICCV2023W/WORKSHOP/supplemental/XXXX_supplemental.pdf"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fas fa-file-pdf"></i>
                  </span>
                  <span>Supplementary</span>
                </a>
              </span>   -->
              <span class="link-block">
                <a href="https://arxiv.org/pdf/2309.14330.pdf"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="ai ai-arxiv"></i>
                  </span>
                  <span>arXiv</span>
                </a>
              </span>
              <!-- Video Link. -->
              <span class="link-block">
                <a href="https://www.youtube.com/watch?v=62ncJL4D0Cw"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-youtube"></i>
                  </span>
                  <span>Video</span>
                </a>
              </span>
              <!-- Code Link. -->
              <!-- <span class="link-block">
                <a href="https://github.com/moverseai/noise-tail"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-github"></i>
                  </span>
                  <span>Code</span>
                  </a>
              </span> -->
              <!-- Dataset Link. -->
              <!-- <span class="link-block">
                <a href="https://github.com/moverseai/noise-tail"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="far fa-images"></i>
                  </span>
                  <span>Data</span>
                  </a> -->
            </div>
            <br>
          </div>
        </div>
      </div>
    </div>
  </div>
</section>

<!-- Teaser. -->
<section class="section">
  <div class="container is-max-desktop">
    <div class="columns is-centered">
      <div class="column is-centered has-text-centered">
        <div class="container has-text-centered is-max-desktop">
          <img width=100% src="./static/images/teaser.png" class="center">
        </div>
      </div>
    </div>
  </div>
</section>
<!--/ Teaser. -->

<!-- Abstract. -->
<section class="section">
  <div class="container is-max-desktop">
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            Real-time optical Motion Capture (MoCap) systems have not benefited from the advances in modern data-driven modeling.
          </p>
          <p>
            In this work we apply machine learning to solve noisy unstructured marker estimates in real-time and deliver robust marker-based MoCap even when using sparse affordable sensors.
            To achieve this we focus on a number of challenges related to model training, namely the sourcing of training data and their long-tailed distribution.
            Leveraging representation learning we design a technique for imbalanced regression that requires no additional data or labels and improves the performance of our model in rare and challenging poses.
            By relying on a unified representation, we show that training such a model is not bound to high-end MoCap training data acquisition, and instead, can exploit the advances in marker-less MoCap to acquire the necessary data.
          </p>
          <p>
            Finally, we take a step towards richer and affordable MoCap by adapting a body model-based inverse kinematics solution to account for measurement and inference uncertainty, further improving performance and robustness.
          </p>
        </div>
      </div>
    </div>
</section>
<!--/ Abstract-->

<!-- Paper video. -->
<section>
  <div class="container is-max-desktop">
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Video</h2>
        <div class="publication-video">
          <iframe width="560" height="315" src="https://www.youtube.com/embed/62ncJL4D0Cw?si=LF_LtEGGARIcrO8q" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>
        </div>
      </div>
    </div>
  </div>
</section>
<!--/ Paper video. -->

<!-- Interpolation. -->
<section class="section">
  <div class="container is-max-desktop">
    <div class="columns is-centered">
      <div class="column has-text-centered">
        <div class="column is-full-width">
          <h3 class="title is-3">Balancing with Controlled Synthesis</h3>
          <div class="content has-text-justified">
            <p>
              The generative and disentangling nature of modern synthesis models shape manifolds
              that map inputs to the underlying factors of data variation, effectively mapping similar
              poses to nearby latent codes which can be traversed across the latent space dimensions.
              Using two anchor poses we use SLERP to select a code in-between them and generate plausible poses.
            </p>
          </div>
          <div class="columns is-vcentered interpolation-panel">
            <div class="column is-3 has-text-centered">
              <img src="./static/images/interpolation/interpolate_start.png"
                  class="interpolation-image"
                  alt="Interpolate start reference image."/>
              <p>Start Frame</p>
            </div>
            <div class="column interpolation-video-column">
              <div id="interpolation-image-wrapper">
                Loading...
              </div>
              <input class="slider is-fullwidth is-large is-info"
                    id="interpolation-slider"
                    step="1" min="0" max="5" value="0" type="range">
            </div>
            <div class="column is-3 has-text-centered">
              <img src="./static/images/interpolation/interpolate_end.png"
                  class="interpolation-image"
                  alt="Interpolation end reference image."/>
              <p class="is-bold">End Frame</p>
            </div>
          </div>
        </div>
      </div>
      <br/>
    </div>
  </div>
</section>
<!--/ Interpolation. -->

<!-- Relevance. -->
<section class="section">
  <div class="container is-max-desktop">
    <div class="columns is-centered">
      <div class="column is-centered has-text-centered">
        <h3 class="title is-3">Balancing by Assigning Relevance</h3>
        <div class="content has-text-justified">
          <p>
            Tail poses are hard to reconstruct accurately. We exploit this bias in
            pose reconstructability and transform it to a confidence value through
            a relevance function. From the investigated functions we opt for the
            exponential one - normalized by a scaling factor σ - which assigns higher penalties to the worst reconstracted
            poses. We use turbo colorization to color-code the assigned penalty (weight)
            as you can see below. The depicted samples belong to the AMASS dataset [1].
          </p>
        </div>
        <div class="container has-text-centered is-max-desktop">
            <img width=100% src="./static/images/relevance/all.png" class="center">
        </div>
      </div>
    </div>
  </div>
</section>
<!--/ Relevance. -->

<!-- NN. -->
<section class="section">
  <div class="container is-max-desktop">
    <div class="columns is-centered">
      <div class="column is-centered has-text-centered">
        <h3 class="title is-3">Combining Oversampling and Relevance</h3>
        <div class="content has-text-justified">
          <p>
            We train our real-time MoCap model by balancing the biased data distribution with
            a combination of oversampling through synthesis and re-weighting relevance based on sample rarity.
            Our convolutional model is able to estimate 3D landmarks through learning to solve a 2D task, exploiting
            the maturity of structured heatmap representations. The estimated markers and joints are used as input to
            our robust solver.
          </p>
        </div>
        <div class="container has-text-centered is-max-desktop">
            <img width=100% src="./static/images/model.png" class="center">
        </div>
      </div>
    </div>
  </div>
</section>
<!--/ NN. -->

<!-- Slider Animation. -->
<section class="section">
  <div class="container is-centered is-max-desktop">
    <div class="column is-centered has-text-centered">
      <h3 class="title is-3">Noise-aware Solving</h3>
      <div class="content has-text-justified">
        <p>
          Instead of labeling the 3D landmark positions as in SOMA [2], our solver fits a parametric model (we use SMPL [3])
          to the esimated (thourgh regression) values to obtain the articulated skeleton and the mesh surface of the input sample.
          Our solver assumes noisy input from off-the-self sensors and is trained with an adaptive noise-aware fitting objective.
          Compared to existing solutions like MoSh [4], our method is more robust to in-the-wild captures
          by affordable sensors.
        </p>
      </div>
      <table style="margin-left: auto;margin-right: auto;">
        <tr>
          <th style="text-align: center; font-size:100%">
              Regression vs. Labeling
          </th>
        </tr>
        <tr>
          <td>
            <div id="beer-slider" class="beer-slider beer-ready" data-beer-label="Ours" data-beer-start="40">
              <img src="./static/images/solving/ours.png" style="width: 300px">
              <div class="beer-reveal" data-beer-label="SOMA">
                <img src="./static/images/solving/soma.png" style="width: 300px">
              </div>
            </div>
          </td>
        </tr>
        <tr>
          <th style="text-align: center; font-size:100%">
              Noise-aware vs. Plain Fitting
          </th>
        </tr>
        <tr>
          <td>
            <div id="beer-slider" class="beer-slider beer-ready" data-beer-label="Ours" data-beer-start="40">
              <img src="./static/images/solving/uncertainty.png">
              <div class="beer-reveal" data-beer-label="MoSh">
                <img src="./static/images/solving/mosh.png">
              </div>
            </div>
          </td>
        </tr>
      </table>
      <div class="content has-text-justified">
        <p>
          Left: Fits to our regressed (purple) vs SOMA labeled (orange) markers, compared with ground truth meshes (gray). Right: Our noise-aware
          approach vs. MoSh Euclidean distance from the ground truth mesh (colorized with 'jet' colormap).
        </p>
      </div>
    </div>
  </div>
</section>
<!--/ Slider Animation. -->

<!-- References. -->
<section class="section">
  <div class="container is-max-desktop content">
    <h2 class="title is-3">References</h2>
    <div class="content has-text-justified">
      <p>
        [1] <a href="https://amass.is.tue.mpg.de/">AMASS</a>: Archive of motion capture as surface shapes.
      </p>
      <p>
        [2] <a href="https://soma.is.tue.mpg.de/">SOMA</a>: Solving optical marker-based mocap automatically.
      </p>
      <p>
        [3] <a href="https://smpl.is.tue.mpg.de/">SMPL</a>: A skinned multiperson linear model.
      </p>
      <p>
        [4] <a href="https://files.is.tue.mpg.de/black/papers/MoSh.pdf">MoSh</a>: Motion and shape capture from sparse markers.
      </p>
    </div>
  </div>
</section>
<!--/ References. -->

<section class="section" id="BibTeX">
  <div class="container is-max-desktop content">
    <h2 class="title">BibTeX</h2>
    <pre><code>@inproceedings{albanis2023noise,
  author    = {Albanis, Georgios, and Zioulis, Nikolaos, and Thermos, Spyridon, and Chatzitofis, Anargyros and Kolomvatsos, Kostas.},
  title     = {Noise-in, Bias-out: Balanced and Real-time MoCap Solving},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops},
  url       = {https://moverseai.github.io/noise-tail/},
  month     = {October},
  year      = {2023}
}</code></pre>
  </div>
</section>


<footer class="footer">
  <div class="container">
    <div class="content has-text-centered">
      <a class="icon-link"
         href="./static/pdf/paper.pdf">
        <i class="fas fa-file-pdf"></i>
      </a>
      <!-- <a class="icon-link" href="https://github.com/moverseai" class="external-link" disabled>
        <i class="fab fa-github"></i>
      </a> -->
    </div>
    <div class="columns is-centered">
        <div class="content">
          <p>
            The website template is borrowed from <a href="https://nerfies.github.io" target="_blank">nerfies</a>.
          </p>
        </div>
    </div>
  </div>
</footer>

<script>
  $.fn.BeerSlider = function ( options ) {
    options = options || {};
    return this.each(function() {
      new BeerSlider(this, options);

    });
  };
  $('.beer-slider').each( (function( index, el ) {
    $(el).BeerSlider({start: $(el).data('beer-start')})
  }));
</script>

</body>
</html>